MYSQL python列表索引超出范围
我正在编写一个网络抓取程序,从truecar.com收集数据 我的数据库有3列 当我运行程序时,我得到一个错误,它是这样的:list indext超出范围 以下是我迄今为止所做的工作:MYSQL python列表索引超出范围,python,mysql,web-scraping,Python,Mysql,Web Scraping,我正在编写一个网络抓取程序,从truecar.com收集数据 我的数据库有3列 当我运行程序时,我得到一个错误,它是这样的:list indext超出范围 以下是我迄今为止所做的工作: import mysql.connector from bs4 import BeautifulSoup import requests import re # take the car's name requested_car_name = input() # inject the car's name i
import mysql.connector
from bs4 import BeautifulSoup
import requests
import re
# take the car's name
requested_car_name = input()
# inject the car's name into the URL
my_request = requests.get('https://www.truecar.com/used-cars-for-sale/listings/' +
requested_car_name + '/location-holtsville-ny/?sort[]=best_match')
my_soup = BeautifulSoup(my_request.text, 'html.parser')
# ************ car_model column in database ******************
car_model = my_soup.find_all(
'span', attrs={'class': 'vehicle-header-make-model text-truncate'})
# we have a list of car models
car_list = []
for item in range(20):
# appends car_model to car_list
car_list.append(car_model[item].text)
car_string = ', '.join('?' * len(car_list))
# ************** price column in database *****************************
price = my_soup.find_all(
'div', attrs={'data-test': 'vehicleCardPricingBlockPrice'})
price_list = []
for item in range(20):
# appends price to price_list
price_list.append(price[item].text)
price_string = ', '.join('?' * len(price_list))
# ************** distance column in database ***************************
distance = my_soup.find_all('div', attrs={'data-test': 'vehicleMileage'})
distance_list = []
for item in range(20):
# appends distance to distance_list
distance_list.append(distance[item].text)
distance_string = ', '.join('?' * len(distance_list))
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
insert_command = 'INSERT INTO car_name (car_model, price, distance) VALUES (%s, %s, %s);' % (car_string, price_string, distance_string)
# values = (car_string, price_string, distance_string)
my_cursor.execute(insert_command, car_list, price_list, distance_list)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
我还有一个问题,我不能在我的列中插入列表,我尝试了很多方法,但不幸的是,我没能让它工作
我认为问题出在这方面:
IndexError Traceback (most recent call last)
<ipython-input-1-4a3930bf0f57> in <module>
23 for item in range(20):
24 # appends car_model to car_list
---> 25 car_list.append(car_model[item].text)
26
27 car_string = ', '.join('?' * len(car_list))
IndexError: list index out of range
索引器错误回溯(最近一次调用)
在里面
23对于范围内的项目(20):
24#将汽车模型附加到汽车列表
--->25车辆列表。追加(车辆模型[项目]。文本)
26
27 car_string=',join('?'*len(car_列表))
索引器:列表索引超出范围
我不希望它将整个列表插入数据库中的一行。我想在我的数据库中找到truecar.com中的前20辆车的价格、型号、里程数问题似乎是车型列表中的条目不到20个
for item in range(20):
car_list.append(car_model[item].text)
这总是试图将20个项目添加到汽车列表中。如果少于20个条目,则会出现错误,因为汽车模型[20]。只有10个条目时,文本不存在。你可以试试
for item in range(len(car_model)):
car_list.append(car_model[item].text)
你在硬编码长度。更改对soup元素进行迭代的方式。因此:
import mysql.connector
from bs4 import BeautifulSoup
import requests
# take the car's name
requested_car_name = input('Enter car name: ')
# inject the car's name into the URL
my_request = requests.get('https://www.truecar.com/used-cars-for-sale/listings/' +
requested_car_name + '/location-holtsville-ny/?sort[]=best_match')
my_soup = BeautifulSoup(my_request.text, 'html.parser')
# ************ car_model column in database ******************
car_model = my_soup.find_all(
'span', attrs={'class': 'vehicle-header-make-model text-truncate'})
# we have a list of car models
car_list = []
for item in car_model:
# appends car_model to car_list
car_list.append(item.text)
# ************** price column in database *****************************
price = my_soup.find_all(
'div', attrs={'data-test': 'vehicleCardPricingBlockPrice'})
price_list = []
for item in price:
# appends price to price_list
price_list.append(item.text)
# ************** distance column in database ***************************
distance = my_soup.find_all('div', attrs={'data-test': 'vehicleMileage'})
distance_list = []
for item in distance:
# appends distance to distance_list
distance_list.append(item.text)
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
insert_command = 'INSERT INTO car_name (car_model, price, distance) VALUES (%s, %s, %s)'
values = list(zip(car_list, price_list, distance_list))
my_cursor.executemany(insert_command, values)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
备选方案:
还有API,您可以在其中获取dat:
import mysql.connector
import requests
import math
# take the car's name
requested_car_name = input('Enter car name: ')
# inject the car's name into the URL
url = 'https://www.truecar.com/abp/api/vehicles/used/listings'
payload = {
'city': 'holtsville',
'collapse': 'true',
'fallback': 'true',
'include_incentives': 'true',
'include_targeted_incentives': 'true',
'make_slug': requested_car_name,
'new_or_used': 'u',
'per_page': '30',
'postal_code': '',
'search_event': 'true',
'sort[]': 'best_match',
'sponsored': 'true',
'state': 'ny',
'page':'1'}
jsonData = requests.get(url, params=payload).json()
total = jsonData['total']
total_pages = math.ceil(total/30)
total_pages_input = input('There are %s pages to iterate.\nEnter the number of pages to go through or type ALL: ' %total_pages)
if total_pages_input.upper() == 'ALL':
total_pages = total_pages
else:
total_pages = int(total_pages_input)
values = []
for page in range(1,total_pages+1):
if page == 1:
car_listings = jsonData['listings']
else:
payload.update({'page':'%s' %page})
jsonData = requests.get(url, params=payload).json()
car_listings = jsonData['listings']
for listing in car_listings:
vehicle = listing['vehicle']
ex_color = vehicle['exterior_color']
in_color = vehicle['interior_color']
location = vehicle['location']
price = vehicle['list_price']
make = vehicle['make']
model = vehicle['model']
mileage = vehicle['mileage']
style = vehicle['style']
year = vehicle['year']
engine = vehicle['engine']
accidentCount = vehicle['condition_history']['accidentCount']
ownerCount = vehicle['condition_history']['ownerCount']
isCleanTitle = vehicle['condition_history']['titleInfo']['isCleanTitle']
isFrameDamaged = vehicle['condition_history']['titleInfo']['isFrameDamaged']
isLemon = vehicle['condition_history']['titleInfo']['isLemon']
isSalvage = vehicle['condition_history']['titleInfo']['isSalvage']
isTheftRecovered = vehicle['condition_history']['titleInfo']['isTheftRecovered']
values.append((ex_color, in_color,location,price,make,model,mileage,
style,year,engine,accidentCount,ownerCount,isCleanTitle,isFrameDamaged,
isLemon, isSalvage,isTheftRecovered))
print('Completed: Page %s of %s' %(page,total_pages))
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
# create_command = ''' create table car_information (exterior_color varchar(255), interior_color varchar(255),location varchar(255),price varchar(255),make varchar(255),model varchar(255),mileage varchar(255),
# style varchar(255),year varchar(255),engine varchar(255),accidentCount varchar(255),ownerCount varchar(255),isCleanTitle varchar(255),isFrameDamaged varchar(255),
# isLemon varchar(255), isSalvage varchar(255),isTheftRecovered varchar(255))'''
# my_cursor.execute(create_command)
# print('created')
insert_command = '''INSERT INTO car_name (exterior_color, interior_color,location,price,make,model,mileage,
style,year,engine,accidentCount,ownerCount,isCleanTitle,isFrameDamaged,
isLemon, isSalvage,isTheftRecovered) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
my_cursor.executemany(insert_command, values)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
你能通过完整的跟踪来显示到底是哪一行导致了问题吗?伙计,你能给我们回溯一下吗?很抱歉,我在internetchange中将第25行更改为范围内的项目(len(car_model)):你不能硬编码为20将第25行更改为什么?我在您的更改中遇到了这个错误:ResultSet对象没有属性“text”。您可能将元素列表视为单个元素。当您打算调用find()时,是否调用了find_all()?对不起。有个打字错误。修复了上面的问题它在数据库中给了我一个问号,与Raphael Eckert的答案相同。您将字符串与
car_string=','。连接('?'*len(car_list))
我在数据库第1行的'car_model'列中编辑了太长的解决方案数据,所有的问号都是这样的=>5?,从模型开始,你可以将它从右方拖动到右方,为了看看到底是什么吸引了我?我进入宝马,它给了我一条插入的记录,在数据库中我有这样的信息:'6','?,我不希望它将整个列表插入数据库中的一行。我想要truecar.com中的前20辆车的价格、型号、里程数在我的数据库中“你能在从数据库中提取后立即打印(汽车模型)以查看实际提取的内容吗?”你还能这样做吗。我说的不是插入,我说的是在追加之前这个安全变量的格式和内容。