Scrapy csv输出的刮擦问题

Scrapy csv输出的刮擦问题,scrapy,Scrapy,这是我的蜘蛛: from scrapy.contrib.spiders import CrawlSpider,Rule from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor from scrapy.selector import HtmlXPathSelector from vrisko.items import VriskoItem from scrapy.http import

这是我的蜘蛛:

    from scrapy.contrib.spiders import CrawlSpider,Rule
    from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
    from scrapy.selector import HtmlXPathSelector
    from vrisko.items import VriskoItem
    from scrapy.http import Request

    class vriskoSpider(CrawlSpider):
        name = 'vrisko'
        allowed_domains = ['vrisko.gr']
        start_urls = ['http://www.vrisko.gr/search/%CE%B3%CE%B9%CE%B1%CF%84%CF%81%CE%BF%CF%82/%CE%BA%CE%BF%CF%81%CE%B4%CE%B5%CE%BB%CE%B9%CE%BF']
        rules = (Rule(SgmlLinkExtractor(allow=('\?page=\d')),'parse_start_url',follow=True),)

        def parse_start_url(self, response):
         hxs = HtmlXPathSelector(response)
         subpages = hxs.select('//a[@class="detailsHyper_class"]/@href').extract()

         ep = hxs.select('//a[@itemprop="name"]/text()').extract()
         ad = hxs.select('//div[@class="results_address_class"]/text()').extract()
         for eponimia,address,subpage in zip(ep,ad,subpages):
          vriskoit = VriskoItem()
          vriskoit['eponimia'] = eponimia
          vriskoit['address'] = address 
          request = Request(subpage,callback = self.subPage)
          request.meta['vriskoit'] = vriskoit
          yield request

        def subPage(self,response):
         vriskoit = response.meta['vriskoit']
         hxs = HtmlXPathSelector(response)
         vriskoit['category'] = hxs.select('//div[@class="category_class"]/span/text()').extract()
         yield vriskoit
这是我的pipiline:

    import csv

    class myExporter(object):

    def __init__(self):
        self.brandCategoryCsv = csv.writer(open('brandCategoryTable.csv', 'wb'))
        self.brandCategoryCsv.writerow(['eponimia', 'address','category'])

    def process_item(self, item, spider):
            for e,a,c in zip(item['eponimia'],item['address'],item['category']):          
                 self.brandCategoryCsv.writerow([e.encode('utf-8'), a.encode('utf-8'), c.encode('utf-8')])
                 return item
我的问题是,对于前两个字段(eponimia,address),只有第一个字符写入输出csv文件,我找不到原因


如果有任何帮助,我将不胜感激。

myExporter.process\u项目中删除
zip
函数

def过程\项目(自身、项目、蜘蛛):
self.brandCategorySv.writerow([item['eponimia'].encode('utf-8'),
项目['address']编码('utf-8'),
项目['category'].编码('utf-8'))
退货项目
您已经将项目列表转换为
vriskoSpider.parse_start_url
中的单个项目

zip
迭代字符串:

[1]中的
a='test1'
在[2]中:b='test2'
在[3]中:对于拉链中的x,y(a,b):
…:打印x,y
...:
t t
e e
s s
t t
1 2

谢谢你的回答,当我这样做时,我得到一个错误:“太多的值无法解包”@mindcast,粘贴了错误的代码片段。尝试更新答案中的代码。嗯,我不能使用encode()。“'list'对象没有属性'encode'”@mindcast,看起来像,
item['category']
是list。请尝试
vriskoit['category']=hxs。选择('//div[@class=“category\u class”]/span/text())。提取()[0]