Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/301.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python scrapy请求不产生任何输出_Python_Scrapy - Fatal编程技术网

Python scrapy请求不产生任何输出

Python scrapy请求不产生任何输出,python,scrapy,Python,Scrapy,我正在尝试将“以下链接”示例改编为我自己的spider: import scrapy from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from funda.items import FundaItem class PropertyLinksSpider(CrawlSpider): name = "property_links" allowe

我正在尝试将“以下链接”示例改编为我自己的spider:

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from funda.items import FundaItem

class PropertyLinksSpider(CrawlSpider):

    name = "property_links"
    allowed_domains = ["funda.nl"]

    def __init__(self, place='amsterdam', page='1'):
        self.start_urls = ["http://www.funda.nl/koop/%s/p%s/" % (place, page)]
        self.base_url = "http://www.funda.nl/koop/%s/" % place
        self.le1 = LinkExtractor(allow=r'%s+huis|appartement-\d{8}' % self.base_url)

    def parse(self, response):
        links = self.le1.extract_links(response)
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):
                item = FundaItem()
                item['url'] = link.url
                yield scrapy.Request(link.url, callback=self.parse_dir_contents)

    def parse_dir_contents(self, response):
        item['title'] = response.xpath('//title').extract()
        yield item
但是,如果我尝试使用以下命令运行此命令

scrapy crawl property_links -a place=amsterdam -a page=1 -o property_links_test.json
我得到一个空的.json文件:

[

在这个爬行器的早期版本中,我使用了
parse
方法,只需
yield item
爬行器就会生成一个带有预期链接的.json文件。我还使用Scrapy shell检查了页面是否有标题。所以我不明白为什么我没有得到任何输出?

您没有将项目解析为第二个函数这段代码对我来说很好

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor

class FundaItem(scrapy.Item):
    url = scrapy.Field()
    title = scrapy.Field()

class PropertyLinksSpider(CrawlSpider):

    name = "property_links"
    allowed_domains = ["funda.nl"]

    def __init__(self, place='amsterdam', page='1'):
        self.start_urls = ["http://www.funda.nl/koop/%s/p%s/" % (place, page)]
        self.base_url = "http://www.funda.nl/koop/%s/" % place
        self.le1 = LinkExtractor(allow=r'%s+huis|appartement-\d{8}' % self.base_url)

    def parse(self, response):
        links = self.le1.extract_links(response)
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):
                item = FundaItem()
                item['url'] = link.url
                yield scrapy.Request(link.url, callback=self.parse_dir_contents, meta={'item': item})

    def parse_dir_contents(self, response):
        new_item = response.request.meta['item']
        new_item['title'] = response.xpath('//title').extract()
        yield new_item

您没有将项解析为第二个函数。这段代码对我来说很好

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor

class FundaItem(scrapy.Item):
    url = scrapy.Field()
    title = scrapy.Field()

class PropertyLinksSpider(CrawlSpider):

    name = "property_links"
    allowed_domains = ["funda.nl"]

    def __init__(self, place='amsterdam', page='1'):
        self.start_urls = ["http://www.funda.nl/koop/%s/p%s/" % (place, page)]
        self.base_url = "http://www.funda.nl/koop/%s/" % place
        self.le1 = LinkExtractor(allow=r'%s+huis|appartement-\d{8}' % self.base_url)

    def parse(self, response):
        links = self.le1.extract_links(response)
        for link in links:
            if link.url.count('/') == 6 and link.url.endswith('/'):
                item = FundaItem()
                item['url'] = link.url
                yield scrapy.Request(link.url, callback=self.parse_dir_contents, meta={'item': item})

    def parse_dir_contents(self, response):
        new_item = response.request.meta['item']
        new_item['title'] = response.xpath('//title').extract()
        yield new_item