Python scrapy请求不产生任何输出_Python_Scrapy

Python scrapy请求不产生任何输出

python scrapy

Python scrapy请求不产生任何输出,python,scrapy,Python,Scrapy,我正在尝试将“以下链接”示例改编为我自己的spider： import scrapy from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from funda.items import FundaItem class PropertyLinksSpider(CrawlSpider): name = "property_links" allowe

我正在尝试将“以下链接”示例改编为我自己的spider：

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from funda.items import FundaItem

class PropertyLinksSpider(CrawlSpider):

    name = "property_links"
    allowed_domains = ["funda.nl"]

    def __init__(self, place='amsterdam', page='1'):
        self.start_urls = ["http://www.funda.nl/koop/%s/p%s/" % (place, page)]
        self.base_url = "http://www.funda.nl/koop/%s/" % place
        self.le1 = LinkExtractor(allow=r'%s+huis|appartement-\d{8}' % self.base_url)

    def parse(self, response):
        links = self.le1.extract_links(response)
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):
                item = FundaItem()
                item['url'] = link.url
                yield scrapy.Request(link.url, callback=self.parse_dir_contents)

    def parse_dir_contents(self, response):
        item['title'] = response.xpath('//title').extract()
        yield item

但是，如果我尝试使用以下命令运行此命令

scrapy crawl property_links -a place=amsterdam -a page=1 -o property_links_test.json

我得到一个空的.json文件：

在这个爬行器的早期版本中，我使用了

parse

方法，只需

yield item

爬行器就会生成一个带有预期链接的.json文件。我还使用Scrapy shell检查了页面是否有标题。所以我不明白为什么我没有得到任何输出？

您没有将项目解析为第二个函数这段代码对我来说很好

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor

class FundaItem(scrapy.Item):
    url = scrapy.Field()
    title = scrapy.Field()

class PropertyLinksSpider(CrawlSpider):

    name = "property_links"
    allowed_domains = ["funda.nl"]

    def __init__(self, place='amsterdam', page='1'):
        self.start_urls = ["http://www.funda.nl/koop/%s/p%s/" % (place, page)]
        self.base_url = "http://www.funda.nl/koop/%s/" % place
        self.le1 = LinkExtractor(allow=r'%s+huis|appartement-\d{8}' % self.base_url)

    def parse(self, response):
        links = self.le1.extract_links(response)
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):
                item = FundaItem()
                item['url'] = link.url
                yield scrapy.Request(link.url, callback=self.parse_dir_contents, meta={'item': item})

    def parse_dir_contents(self, response):
        new_item = response.request.meta['item']
        new_item['title'] = response.xpath('//title').extract()
        yield new_item

您没有将项解析为第二个函数。这段代码对我来说很好

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor

class FundaItem(scrapy.Item):
    url = scrapy.Field()
    title = scrapy.Field()

class PropertyLinksSpider(CrawlSpider):

    name = "property_links"
    allowed_domains = ["funda.nl"]

    def __init__(self, place='amsterdam', page='1'):
        self.start_urls = ["http://www.funda.nl/koop/%s/p%s/" % (place, page)]
        self.base_url = "http://www.funda.nl/koop/%s/" % place
        self.le1 = LinkExtractor(allow=r'%s+huis|appartement-\d{8}' % self.base_url)

    def parse(self, response):
        links = self.le1.extract_links(response)
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):
                item = FundaItem()
                item['url'] = link.url
                yield scrapy.Request(link.url, callback=self.parse_dir_contents, meta={'item': item})

    def parse_dir_contents(self, response):
        new_item = response.request.meta['item']
        new_item['title'] = response.xpath('//title').extract()
        yield new_item