Python 使用XMLITEMEXPORTER在XML文件中没有输出
我是python的初学者,正在使用scrapy。我已经使用xmlitemexporter将我收集的数据导出到xml文件中。但我只得到“好的!我发现了问题。我所做的只是在spider.py的最后一行加上一个“return”Python 使用XMLITEMEXPORTER在XML文件中没有输出,python,xml,scrapy,Python,Xml,Scrapy,我是python的初学者,正在使用scrapy。我已经使用xmlitemexporter将我收集的数据导出到xml文件中。但我只得到“好的!我发现了问题。我所做的只是在spider.py的最后一行加上一个“return” return WorkwithitemsItem(title = title[2:], link = link[2:], publish = publish, description = description[
return WorkwithitemsItem(title = title[2:], link = link[2:],
publish = publish, description = description[1:]
)
显示您的
settings.py
和spider。@我编辑过的alecxe看起来您的parse()
方法没有返回项。我现在在这里添加了settings.py@alecxeI无法获取parse()方法没有返回项的原因。。。
from scrapy import log
from scrapy.spider import BaseSpider
from scrapy.selector import Selector
from workwithitems.items import WorkwithitemsItem
class MySpider(BaseSpider):
name = 'spidey'
allowed_domains = ['ekantipur.com']
start_urls = [
'http://www.ekantipur.com/en/rss',
]
def parse(self, response):
self.log('A response from %s just arrived!' % response.url)
sel = Selector(response)
title = sel.xpath('//title/text()').extract()
link = sel.xpath('//link/text()').extract()
publish = sel.xpath('//pubDate/text()').extract()
description = sel.xpath('//description/text()').extract()
WorkwithitemsItem(title = title[2:], link = link[2:],
publish = publish, description = description[1:])
from scrapy import signals
from scrapy.contrib.exporter import XmlItemExporter
class XmlExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_products.xml' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
BOT_NAME = 'workwithitems'
SPIDER_MODULES = ['workwithitems.spiders']
NEWSPIDER_MODULE = 'workwithitems.spiders'
FEED_EXPORTERS_BASE = {
'xml': 'scrapy.contrib.exporter.XmlItemExporter',
}
ITEM_PIPELINES = {
'workwithitems.pipelines.XmlExportPipeline': 800,
}
return WorkwithitemsItem(title = title[2:], link = link[2:],
publish = publish, description = description[1:]
)