Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/video/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 脏兮兮的空白断言错误?_Python_Scrapy - Fatal编程技术网

Python 脏兮兮的空白断言错误?

Python 脏兮兮的空白断言错误?,python,scrapy,Python,Scrapy,下面的代码针对发送到解析方法(Scrapy v0.24.4)的每个请求抛出以下错误: 我已尝试进行了大量更改,但仍然无法找出触发此错误的原因。您需要从parse()方法返回: if response.url.find('?start=') == -1: return self.createRestaurantPageLinks(response) 这个callback=“scrape_reviews”不应该是callback=self.scrape_reviews?这样,我得到了错误:

下面的代码针对发送到解析方法(Scrapy v0.24.4)的每个请求抛出以下错误:


我已尝试进行了大量更改,但仍然无法找出触发此错误的原因。

您需要从
parse()
方法返回:

if response.url.find('?start=') == -1:
    return self.createRestaurantPageLinks(response)

这个
callback=“scrape_reviews”
不应该是
callback=self.scrape_reviews
?这样,我得到了错误:SyntaxError:'return'和生成器中的参数
import scrapy
from scrapy import Request
import re

ROOT_URL = "http://www.yelp.com"

class YelpReview(scrapy.Item):
    zip_code = scrapy.Field()
    review_date = scrapy.Field()

class yelp_spider(scrapy.Spider):
    name = 'yelp_spider'
    allowed_domains = ['yelp.com']
    start_urls = ["http://www.yelp.com/search?find_desc=Restaurants&find_loc=02557&ns=1"]

    def parse(self, response):
        business_urls = [business_url.extract() for
                business_url in response.xpath('//a[@class="biz-name"]/@href')[1:]
                ]
        for business_url in business_urls:
            yield Request(url=ROOT_URL + business_url, callback="scrape_reviews")

        if response.url.find('?start=') == -1:
            self.createRestaurantPageLinks(response)

    def scrape_reviews(self, response):
        reviews = response.xpath('//meta[@itemprop="datePublished"]/@content')
        item = YelpReview()

        for review in reviews:
            item['zip_code'] = "02557"
            item['review_date'] = review.extract()
            yield item

        if response.url.find('?start=') == -1:
            self.createReviewPageLinks(response)

    def createRestaurantPageLinks(self, response):
        raw_num_results = response.xpath('//span[@class="pagination-results-window"]/text()').extract()[0]
        num_business_results = int(re.findall(" of (\d+)", raw_num_results)[0])
        BUSINESSES_PER_PAGE = 10
        restaurant_page_links = [Request(url=response.url + '?start=' + str(BUSINESSES_PER_PAGE*(n+1)),
            callback="parse") for n in range(num_business_results/BUSINESSES_PER_PAGE)]

        return restaurant_page_links

    def createReviewsPageLinks(self, response):
        REVIEWS_PER_PAGE = 40
        num_review_results = int(response.xpath('//span[@itemprop="reviewCount"]/text()').extract()[0])
        review_page_links = [Request(url=response.url + '?start=' + str(REVIEWS_PER_PAGE*(n+1)),
            callback="scrape_reviews") for n in range(num_review_results/REVIEWS_PER_PAGE)]

        return review_page_links
if response.url.find('?start=') == -1:
    return self.createRestaurantPageLinks(response)