Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/322.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python Scrapy mysql管道错误_Python_Mysql_Scrapy - Fatal编程技术网

Python Scrapy mysql管道错误

Python Scrapy mysql管道错误,python,mysql,scrapy,Python,Mysql,Scrapy,我正在使用scrapy,我正在尝试将从spider收集的数据保存到MySql数据库。我正在使用管道来实现这一点,但运气不好。以下是我的管道代码: from scrapy import log from scrapy.core.exceptions import DropItem from twisted.enterprise import adbapi import time import MySQLdb.cursors class FilterWordsPipeline(object):

我正在使用scrapy,我正在尝试将从spider收集的数据保存到MySql数据库。我正在使用管道来实现这一点,但运气不好。以下是我的管道代码:

from scrapy import log
from scrapy.core.exceptions import DropItem
from twisted.enterprise import adbapi

import time
import MySQLdb.cursors

class FilterWordsPipeline(object):
"""A pipeline for filtering out items which contain certain words in their
description"""

# put all words in lowercase
words_to_filter = ['politics', 'religion']

def process_item(self, spider, item):
    print spider
    for word in self.words_to_filter:
        if word in unicode(item['description']).lower():
            raise DropItem("Contains forbidden word: %s" % word)
    else:
        return item

class MySQLStorePipeline(object):

def __init__(self):
    # @@@ hardcoded db settings
    # TODO: make settings configurable through settings
    self.dbpool = adbapi.ConnectionPool('adress_to_db',
            db='my_db',
            user='my_user',
            passwd='my_pw',
            cursorclass=MySQLdb.cursors.DictCursor,
            charset='utf8',
            use_unicode=True
        )

def process_item(self, spider, item):
    # run db query in thread pool
    query = self.dbpool.runInteraction(self._conditional_insert, item)
    query.addErrback(self.handle_error)

    return item

def _conditional_insert(self, tx, item):
    # create record if doesn't exist. 
    # all this block run on it's own thread
    tx.execute("select * from scrapytest where link = %s", (item['link'][0], ))
    result = tx.fetchone()
    if result:
        log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
    else:
        tx.execute(\
            "insert into scrapytest (title, link, desc) "
            "values (%s, %s, %s)",
            (item['title'][0],
             item['link'][0],
             item['desc'][0]
        )
        log.msg("Item stored in db: %s" % item, level=log.DEBUG)

def handle_error(self, e):
    log.err(e) 
下面是我收到的错误消息:

SyntaxError: invalid syntax
PS C:\Python27\testscrapy\tutorial> scrapy crawl dmoz
2012-05-03 16:03:11+0200 [scrapy] INFO: Scrapy 0.14.3 started (bot: tutorial)
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole,                        
CloseSpider, WebService, CoreStats
 , SpiderState
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware,                           
DownloadTimeoutMiddleware,
UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, RedirectMiddleware,     
CookiesMiddleware, HttpCompressionMi
ddleware, ChunkedTransferMiddleware, DownloaderStats
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware,        
OffsiteMiddleware, RefererMidd
leware, UrlLengthMiddleware, DepthMiddleware
Traceback (most recent call last):
File "C:\Python27\Scripts\scrapy", line 5, in <module>
pkg_resources.run_script('Scrapy==0.14.3', 'scrapy')
File "C:\Python27\lib\site-packages\pkg_resources.py", line 489, in run_script
self.require(requires)[0].run_script(script_name, ns)
File "C:\Python27\lib\site-packages\pkg_resources.py", line 1207, in run_script
execfile(script_filename, namespace, namespace)
File "c:\python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\EGG-INFO\scripts\scrapy", line   
4, in <module>
execute()
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 132,   
in execute
run_print_help(parser, _run_command, cmd, args, opts)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 97, in   
_run_print_help
func(*a, **kw)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 139,   
in _run_command
cmd.run(args, opts)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\commands\crawl.py", line   
43, in run
spider = self.crawler.spiders.create(spname, **opts.spargs)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\command.py", line 34,   
in crawler
self._crawler.configure()
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\crawler.py", line 37, in   
configure
self.engine = ExecutionEngine(self, self._spider_closed)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\engine.py", line   
62, in __init__
self.scraper = Scraper(crawler)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\scraper.py", line   
68, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py", line 48,   
in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py", line 29,   
in from_settings
mwcls = load_object(clspath)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\utils\misc.py", line 37,  
in load_object
mod = __import__(module, {}, {}, [''])
File "C:\Python27\testscrapy\tutorial\tutorial\pipelines.py", line 64
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
  ^
SyntaxError: invalid syntax
SyntaxError:无效语法
PS C:\Python27\testscrapy\tutorial>scrapy crawl-dmoz
2012-05-03 16:03:11+0200[scrapy]信息:scrapy 0.14.3已启动(机器人:教程)
2012-05-03 16:03:12+0200[scrapy]调试:启用的扩展:LogStats、TelnetConsole、,
CloseSpider、Web服务、CoreStats
,蜘蛛状态
2012-05-03 16:03:12+0200[scrapy]调试:启用下载程序中间件:HttpAuthMiddleware,
下载TimeoutMiddleware,
UserAgentMiddleware、RetryMiddleware、DefaultHeadersMiddleware、RedirectMiddleware、,
Cookies中间件,HttpCompressionMi
ddle软件,chunkedTransfermiddle软件,下载程序状态
2012-05-03 16:03:12+0200[scrapy]调试:启用蜘蛛中间件:HttpErrorMiddleware,
OffsiteMiddleware,referermdd
leware、UrlLengthMiddleware、DepthMiddleware
回溯(最近一次呼叫最后一次):
文件“C:\Python27\Scripts\scrapy”,第5行,在
pkg_resources.run_脚本('Scrapy==0.14.3','Scrapy')
文件“C:\Python27\lib\site packages\pkg\u resources.py”,第489行,在run\u脚本中
self.require(requires)[0]。运行脚本(脚本名称,ns)
文件“C:\Python27\lib\site packages\pkg\u resources.py”,第1207行,在run\u脚本中
execfile(脚本文件名、命名空间、命名空间)
文件“c:\python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\egg-INFO\scripts\scrapy”,第行
4,在
执行()
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py”,第132行,
执行中
运行\u打印\u帮助(解析器、\u运行\u命令、cmd、args、opts)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py”,第97行,在
_运行\u打印\u帮助
func(*a,**千瓦)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py”,第139行,
in_run_命令
cmd.run(参数、选项)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\commands\crawl.py”,第行
43岁,在逃
spider=self.crawler.spider.create(spname,**opts.spargs)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\command.py”,第34行,
履带式起重机
self.\u crawler.configure()
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\crawler.py”,第37行,在
配置
self.engine=ExecutionEngine(self,self.\u spider\u关闭)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\engine.py”,第行
62,在初始__
self.scraper=铲运机(履带式)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\scraper.py”,第行
68,在初始__
self.itemproc=itemproc\u cls.from\u爬虫程序(爬虫程序)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py”,第48行,
从爬虫进入
返回cls.from_设置(crawler.settings,crawler)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py”,第29行,
在from_设置中
mwcls=加载对象(clspath)
文件“C:\Python27\lib\site packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\utils\misc.py”,第37行,
装入对象
mod=uuu导入(模块,{},{},['')
文件“C:\Python27\testscrapy\tutorial\tutorial\pipelines.py”,第64行
log.msg(“存储在数据库中的项:%s”%Item,level=log.DEBUG)
^
SyntaxError:无效语法
我不知道从哪里开始,所以非常感谢您的帮助

tx.execute(\
            "insert into scrapytest (title, link, desc) "
            "values (%s, %s, %s)",
            (item['title'][0],
             item['link'][0],
             item['desc'][0])
        )
需要右括号^^


一个好的起点通常是错误指向的行或之前的行

谢谢提示!我毫不犹豫地没有解决这个问题。现在我得到了以下错误:文件“C:\Python27\testscrapy\tutorial\tutorial\pipelines.py”,第64行message=“项已存储在数据库中:%s”%(项)^syntaxer错误:无效syntax@user1009453我看错了log.msg,没有行号对不起!你漏掉了一个括号你完全正确,我漏掉了一个括号。现在,我得到了以下错误:“ImportError:加载对象“tutorial.pipelines.MySQLStorePipeline”时出错:没有名为exceptions的模块”我已选中,并且管道在settings.py中具有相同的名称。这是否意味着我需要导入一个名为exceptions的模块?谢谢你在这里帮助我!你知道这是什么吗@dm03514