Python Scarpy回调不'；行不通_Python_Scrapy_Web Crawler

Python Scarpy回调不'；行不通

python scrapy web-crawler

Python Scarpy回调不'；行不通,python,scrapy,web-crawler,Python,Scrapy,Web Crawler,我是Scrapy的新手，我编写了如下的爬虫程序，但我不知道为什么parse def中的parse_项不被回调调用欢迎任何帮助。提前谢谢 class ManualSpider(Spider): name = "manual" allowed_domains = ["https://www.gumtree.com"] start_urls = ['https://www.gumtree.com/flats-houses/london'] def parse_ite

我是Scrapy的新手，我编写了如下的爬虫程序，但我不知道为什么parse def中的parse_项不被回调调用

欢迎任何帮助。提前谢谢

class ManualSpider(Spider):
    name = "manual"
    allowed_domains = ["https://www.gumtree.com"]
    start_urls = ['https://www.gumtree.com/flats-houses/london']

    def parse_item(self, response):  
        # Create the loader using the response
        l = ItemLoader(item=StackItem(), response=response)

        l.add_xpath('title', '//main/div[2]/header/h1/text()', MapCompose(unicode.strip, unicode.title))
        l.add_xpath('price', '//header/span/strong/text()', MapCompose(lambda i: i.replace(',', ''), float),
                    re='[,.0-9]+', )
        l.add_xpath('description', '//p[@itemprop="description"]'
                                   '[1]/text()', Join(), MapCompose(unicode.strip))
        l.add_xpath('address', '//*[@itemtype="http://schema.org/'
                               'Place"][1]/text()', MapCompose(unicode.strip))
        l.add_xpath('location', '//header/strong/span/text()', MapCompose(unicode.strip))
        l.add_xpath('image_urls', '//*[@itemprop="image"][1]/@src', MapCompose(
            lambda i: urljoin(response.url, i)))

        l.add_value('url', response.url)
        l.add_value('project', "example")
        l.add_value('spider', self.name)
        l.add_value('server', socket.gethostname())
        l.add_value('date', datetime.datetime.now())

        yield l.load_item()

    def parse(self, response):

        # Get the next index URLs and yield Requests
        next_selector = response.xpath('//*[@class="pagination-next"]//@href')
        for url in next_selector.extract():
            yield Request(urljoin(response.url, url))

        # Get item URLs and yield Requests
        item_selector = response.xpath('//div[@id="srp-results"]//article//@href')
        for url in item_selector.extract():
            if url != "":
                print(urljoin(response.url, url))
                yield Request(urljoin(response.url, url), callback=self.parse_item)

将

callback=“parse_item”

更改为

callback=self。parse_item

无效，因为您在

callback=“parse_item

中为callback指定了一个

字符串
您应该给出函数的一个实例，如下所示：callback=self.parse\u item

还可以删除允许的\u域中的“https://”