Postgresql 扭曲(发痒)和发根

Postgresql 扭曲(发痒)和发根,postgresql,twisted,psycopg2,scrapy-pipeline,Postgresql,Twisted,Psycopg2,Scrapy Pipeline,我使用Scrapy和Postgres作为数据库 之后,我的连接似乎填满了,然后我的脚本被卡住了。我用这个查询从pg_stat_活动中选择*;读到这是因为Postgres没有连接池 我读过关于txpostgres和PGBouncer的文章,遗憾的是Bouncer不是一个选项,我还能做些什么来避免这个问题 到目前为止,我使用以下方法: import psycopg2 from twisted.enterprise import adbapi import logging from datetime

我使用Scrapy和Postgres作为数据库

之后,我的连接似乎填满了,然后我的脚本被卡住了。我用这个查询从pg_stat_活动中选择*;读到这是因为Postgres没有连接池

我读过关于txpostgres和PGBouncer的文章,遗憾的是Bouncer不是一个选项,我还能做些什么来避免这个问题

到目前为止,我使用以下方法:

import psycopg2
from twisted.enterprise import adbapi
import logging
from datetime import datetime
import scrapy
from scrapy.exceptions import DropItem


class PostgreSQLPipeline(object):
    """ PostgreSQL pipeline class """

    def __init__(self, dbpool):
        self.logger = logging.getLogger(__name__)
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, settings):
        dbargs = dict(
                host=settings['POSTGRESQL_HOST'],
                database=settings['POSTGRESQL_DATABASE'],
                user=settings['POSTGRESQL_USER'],
                password=settings['POSTGRESQL_PASSWORD'],
        )
        dbpool = adbapi.ConnectionPool('psycopg2', **dbargs)
        return cls(dbpool)

    def process_item(self, item, spider):
        d = self.dbpool.runInteraction(self._insert_item, item, spider)
        d.addErrback(self._handle_error, item, spider)
        d.addBoth(lambda _: item)

        return d

    def _insert_item(self, txn, item, spider):
        """Perform an insert or update."""
        now = datetime.utcnow().replace(microsecond=0).isoformat(' ')

        txn.execute(
                """
                  SELECT EXISTS(
                    SELECT 1
                    FROM expose
                    WHERE expose_id = %s
                  )
                """, (
                    item['expose_id'],
                )
        )
        ret = txn.fetchone()[0]

        if ret:
            self.logger.info("Item already in db: %r" % (item))

            txn.execute(
                    """
                        UPDATE expose
                        SET last_seen=%s, offline=0
                        WHERE expose_id=%s
                    """, (
                        now,
                        item['expose_id']
                    )
            )
        else:
            self.logger.info("Item stored in db: %r" % (item))
            txn.execute("""
                          INSERT INTO expose (
                            expose_id,
                            title
                          ) VALUES (%s, %s)
                        """, (
                            item['expose_id'],
                            item['title']
                        )
            )

            # Write image info (path, original url, ...) to db, CONSTRAIN to expose.expose_id
            for image in item['images']:
                txn.execute(
                        """
                          INSERT INTO image (
                            expose_id,
                            name
                          ) VALUES (%s, %s)
                        """, (
                            item['expose_id'],
                            image['path'].replace('full/', '')  
                        )
                )

    def _handle_error(self, failure, item, spider):
        """Handle occurred on db interaction."""
        # do nothing, just log
        self.logger.error(failure, failure.printTraceback())