Python Scrapy不创建JSON文件
蜘蛛完成后,我无法从API下载数据错误文件中没有显示是否有人可以提供线索Python Scrapy不创建JSON文件,python,scrapy,Python,Scrapy,蜘蛛完成后,我无法从API下载数据错误文件中没有显示是否有人可以提供线索 import json import scrapy class SpidyQuotesSpider(scrapy.Spider): name = 'spidyquotes' quotes_base_url = 'https://www.olx.co.id/api/relevance/search?category=198&facet_limit=100&location=10
import json
import scrapy
class SpidyQuotesSpider(scrapy.Spider):
name = 'spidyquotes'
quotes_base_url = 'https://www.olx.co.id/api/relevance/search?category=198&facet_limit=100&location=1000001&location_facet_limit=20&page=%s'
start_urls = [quotes_base_url % 1]
download_delay = 1.5
def parse(self, response):
data = json.loads(response.body)
for item in data.get('data', []):
yield {
'car_id': item.get('id'),
'car_name' : item.get('title'),
'price': item.get('price.value.currency.display'),
'user_id': item.get('user_id')
# 'user_name':
}
if data['has_next']:
next_page = data['page'] + 1
yield scrapy.Request(self.quotes_base_url % next_page)
没有数据['has_next'],但有数据['metadata']['next_page_url'],因此您可以使用
url = data['metadata']['next_page_url']
if url:
yield scrapy.Request(url)
或者让它更安全
metadata = data.get('metadata')
if metadata:
url = metadata.get('next_page_url')
if url:
yield scrapy.Request(url)
或者您可以使用try/except
无需创建项目即可运行的完整代码
import json
import scrapy
class MySpider(scrapy.Spider):
name = 'spidyquotes'
quotes_base_url = 'https://www.olx.co.id/api/relevance/search?category=198&facet_limit=100&location=1000001&location_facet_limit=20&page=%s'
start_urls = [quotes_base_url % 1]
download_delay = 1.5
def parse(self, response):
data = json.loads(response.body)
for item in data.get('data', []):
yield {
'car_id': item.get('id'),
'car_name' : item.get('title'),
'price': item.get('price.value.currency.display'),
'user_id': item.get('user_id')
# 'user_name':
}
metadata = data.get('metadata')
if metadata:
url = metadata.get('next_page_url')
if url:
yield scrapy.Request(url)
# --- it runs without project and saves in `output.csv` ---
from scrapy.crawler import CrawlerProcess
c = CrawlerProcess({
'USER_AGENT': 'Mozilla/5.0',
# save in file as CSV, JSON or XML
'FEED_FORMAT': 'csv', # csv, json, xml
'FEED_URI': 'output.csv', #
})
c.crawl(MySpider)
c.start()
我在数据['has_next']上得到一个键错误。你确定那是真的吗?
import json
import scrapy
class MySpider(scrapy.Spider):
name = 'spidyquotes'
quotes_base_url = 'https://www.olx.co.id/api/relevance/search?category=198&facet_limit=100&location=1000001&location_facet_limit=20&page=%s'
start_urls = [quotes_base_url % 1]
download_delay = 1.5
def parse(self, response):
data = json.loads(response.body)
for item in data.get('data', []):
yield {
'car_id': item.get('id'),
'car_name' : item.get('title'),
'price': item.get('price.value.currency.display'),
'user_id': item.get('user_id')
# 'user_name':
}
metadata = data.get('metadata')
if metadata:
url = metadata.get('next_page_url')
if url:
yield scrapy.Request(url)
# --- it runs without project and saves in `output.csv` ---
from scrapy.crawler import CrawlerProcess
c = CrawlerProcess({
'USER_AGENT': 'Mozilla/5.0',
# save in file as CSV, JSON or XML
'FEED_FORMAT': 'csv', # csv, json, xml
'FEED_URI': 'output.csv', #
})
c.crawl(MySpider)
c.start()