Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/319.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 正在加载heroku pg数据库,其中包含来自scrapy spider的刮取数据\_Python_Postgresql_Heroku_Scrapy_Heroku Postgres - Fatal编程技术网

Python 正在加载heroku pg数据库,其中包含来自scrapy spider的刮取数据\

Python 正在加载heroku pg数据库,其中包含来自scrapy spider的刮取数据\,python,postgresql,heroku,scrapy,heroku-postgres,Python,Postgresql,Heroku,Scrapy,Heroku Postgres,我是heroku pg的新闻。我在这里做的是我写了一个运行没有任何错误的刮擦爬虫。问题是我想把所有的数据都放到我的heroku postgres数据库中。因为那样做,我有点顺从 当我使用scrapy crawl spidername在本地机器上运行crawler时,它会成功运行,但不会插入刮取的数据,也不会在heroku数据库上创建任何表。我甚至没有在本地终端上收到任何错误。这就是我的代码 设置.py BOT_NAME = 'crawlerconnectdatabase' SPIDER_MOD

我是heroku pg的新闻。我在这里做的是我写了一个运行没有任何错误的刮擦爬虫。问题是我想把所有的数据都放到我的heroku postgres数据库中。因为那样做,我有点顺从

当我使用
scrapy crawl spidername
在本地机器上运行crawler时,它会成功运行,但不会插入刮取的数据,也不会在heroku数据库上创建任何表。我甚至没有在本地终端上收到任何错误。这就是我的代码

设置.py

BOT_NAME = 'crawlerconnectdatabase'

SPIDER_MODULES = ['crawlerconnectdatabase.spiders']
NEWSPIDER_MODULE = 'crawlerconnectdatabase.spiders'

DATABASE = {'drivername': 'postgres',
        'host': 'ec2-54-235-250-41.compute-1.amazonaws.com',
        'port': '5432',
        'username': 'dtxwjcycsaweyu',
        'password': '***',
        'database': 'ddcir2p1u2vk07'}
from scrapy.item import Item, Field

class CrawlerconnectdatabaseItem(Item):
    name = Field()
    url = Field()
    title = Field()
    link = Field()
    page_title = Field()
    desc_link = Field()
    body = Field()
    news_headline = Field()
    pass
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL
import settings

DeclarativeBase = declarative_base()


def db_connect():

    return create_engine(URL(**settings.DATABASE))


def create_deals_table(engine):

    DeclarativeBase.metadata.create_all(engine)


class Deals(DeclarativeBase):
"""Sqlalchemy deals model"""
    __tablename__ = "news_data"

    id = Column(Integer, primary_key=True)
    body = Column('body', String)
from sqlalchemy.orm import sessionmaker
from models import Deals, db_connect, create_deals_table

class CrawlerconnectdatabasePipeline(object):

    def __init__(self):
        engine = db_connect()
        create_deals_table(engine)
        self.Session = sessionmaker(bind=engine)

    def process_item(self, item, spider):
        session = self.Session()
        deal = Deals(**item)

        try:
            session.add(deal)
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return item
items.py

BOT_NAME = 'crawlerconnectdatabase'

SPIDER_MODULES = ['crawlerconnectdatabase.spiders']
NEWSPIDER_MODULE = 'crawlerconnectdatabase.spiders'

DATABASE = {'drivername': 'postgres',
        'host': 'ec2-54-235-250-41.compute-1.amazonaws.com',
        'port': '5432',
        'username': 'dtxwjcycsaweyu',
        'password': '***',
        'database': 'ddcir2p1u2vk07'}
from scrapy.item import Item, Field

class CrawlerconnectdatabaseItem(Item):
    name = Field()
    url = Field()
    title = Field()
    link = Field()
    page_title = Field()
    desc_link = Field()
    body = Field()
    news_headline = Field()
    pass
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL
import settings

DeclarativeBase = declarative_base()


def db_connect():

    return create_engine(URL(**settings.DATABASE))


def create_deals_table(engine):

    DeclarativeBase.metadata.create_all(engine)


class Deals(DeclarativeBase):
"""Sqlalchemy deals model"""
    __tablename__ = "news_data"

    id = Column(Integer, primary_key=True)
    body = Column('body', String)
from sqlalchemy.orm import sessionmaker
from models import Deals, db_connect, create_deals_table

class CrawlerconnectdatabasePipeline(object):

    def __init__(self):
        engine = db_connect()
        create_deals_table(engine)
        self.Session = sessionmaker(bind=engine)

    def process_item(self, item, spider):
        session = self.Session()
        deal = Deals(**item)

        try:
            session.add(deal)
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return item
型号.py

BOT_NAME = 'crawlerconnectdatabase'

SPIDER_MODULES = ['crawlerconnectdatabase.spiders']
NEWSPIDER_MODULE = 'crawlerconnectdatabase.spiders'

DATABASE = {'drivername': 'postgres',
        'host': 'ec2-54-235-250-41.compute-1.amazonaws.com',
        'port': '5432',
        'username': 'dtxwjcycsaweyu',
        'password': '***',
        'database': 'ddcir2p1u2vk07'}
from scrapy.item import Item, Field

class CrawlerconnectdatabaseItem(Item):
    name = Field()
    url = Field()
    title = Field()
    link = Field()
    page_title = Field()
    desc_link = Field()
    body = Field()
    news_headline = Field()
    pass
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL
import settings

DeclarativeBase = declarative_base()


def db_connect():

    return create_engine(URL(**settings.DATABASE))


def create_deals_table(engine):

    DeclarativeBase.metadata.create_all(engine)


class Deals(DeclarativeBase):
"""Sqlalchemy deals model"""
    __tablename__ = "news_data"

    id = Column(Integer, primary_key=True)
    body = Column('body', String)
from sqlalchemy.orm import sessionmaker
from models import Deals, db_connect, create_deals_table

class CrawlerconnectdatabasePipeline(object):

    def __init__(self):
        engine = db_connect()
        create_deals_table(engine)
        self.Session = sessionmaker(bind=engine)

    def process_item(self, item, spider):
        session = self.Session()
        deal = Deals(**item)

        try:
            session.add(deal)
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return item
管道。py

BOT_NAME = 'crawlerconnectdatabase'

SPIDER_MODULES = ['crawlerconnectdatabase.spiders']
NEWSPIDER_MODULE = 'crawlerconnectdatabase.spiders'

DATABASE = {'drivername': 'postgres',
        'host': 'ec2-54-235-250-41.compute-1.amazonaws.com',
        'port': '5432',
        'username': 'dtxwjcycsaweyu',
        'password': '***',
        'database': 'ddcir2p1u2vk07'}
from scrapy.item import Item, Field

class CrawlerconnectdatabaseItem(Item):
    name = Field()
    url = Field()
    title = Field()
    link = Field()
    page_title = Field()
    desc_link = Field()
    body = Field()
    news_headline = Field()
    pass
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL
import settings

DeclarativeBase = declarative_base()


def db_connect():

    return create_engine(URL(**settings.DATABASE))


def create_deals_table(engine):

    DeclarativeBase.metadata.create_all(engine)


class Deals(DeclarativeBase):
"""Sqlalchemy deals model"""
    __tablename__ = "news_data"

    id = Column(Integer, primary_key=True)
    body = Column('body', String)
from sqlalchemy.orm import sessionmaker
from models import Deals, db_connect, create_deals_table

class CrawlerconnectdatabasePipeline(object):

    def __init__(self):
        engine = db_connect()
        create_deals_table(engine)
        self.Session = sessionmaker(bind=engine)

    def process_item(self, item, spider):
        session = self.Session()
        deal = Deals(**item)

        try:
            session.add(deal)
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return item
蜘蛛网


scrapy spider的代码您将发现它

您需要将项目_PIPELINES={'crawlerConnectionDatabase.PIPELINES.crawlerConnectionDatabasePipeline':300,}添加到您的settings.py中