Python 使用sqlalchemy和PostgreSQL查询二进制数据

Python 使用sqlalchemy和PostgreSQL查询二进制数据,python,postgresql,sqlalchemy,Python,Postgresql,Sqlalchemy,我有一个简单的数据库,将附件存储为blob 创建表public.attachment id整数不为空, 附件:, 约束附件\u pkey主键id -导入文件 插入附件id,附件数据值1,lo_import'C:\\temp\blob_import.txt' -导出回文件。 从id=1的附件中选择lo_exportattachdata,'C:\temp\blob_export_postgres.txt' 我可以直接用psycopg2读回这个文件 从psycopg2导入连接 con=connectd

我有一个简单的数据库,将附件存储为blob

创建表public.attachment id整数不为空, 附件:, 约束附件\u pkey主键id -导入文件 插入附件id,附件数据值1,lo_import'C:\\temp\blob_import.txt' -导出回文件。 从id=1的附件中选择lo_exportattachdata,'C:\temp\blob_export_postgres.txt' 我可以直接用psycopg2读回这个文件

从psycopg2导入连接 con=connectdbname=blobtest,user=postgres,password=postgres,host=localhost cur=con.cursor cur.executes从id=1的附件中选择attachdata oid=cur.fetchone[0] obj=正圆形 对象导出'C:\\temp\\blob\u export\u psycopg.txt' 当我使用sqlalchemy尝试同样的方法时,attachdata是一个由零组成的ByTestString。 我已经用BLOB、LargeBinary和BINARY等类型测试了以下代码。 attachdata bytstring的大小似乎是OID值

从sqlalchemy导入创建引擎 从sqlalchemy导入列,整数,二进制 从sqlalchemy.ext.declarative导入声明性基础 从sqlalchemy.orm导入sessionmaker Base=声明性的\u Base 会话=会话生成器 引擎=创建引擎'postgresql://postgres:postgres@localhost:5432/blobtest',echo=True Base.metadata.create_allengine Session.configurebind=engine 类AttachmentBase: __表名\附件 id=ColumnInteger,主键=True attachdata=ColumnBinary 会话=会话 附件=session.queryAttachment.get1 使用open'C:\\temp\\blob\u export\u sqlalchemy.txt,'wb'作为f: f、 writeattachment.attachdata
我搜索了sqlalchemy文档和各种来源,但找不到如何使用sqlalchemy导出二进制数据的解决方案

我也有同样的问题。似乎没有办法通过ORM获取大型对象数据。因此,我将ORM和psycopg2引擎组合如下:

from sqlalchemy import create_engine
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.dialects.postgresql import OID

Base = declarative_base()
session_factory = sessionmaker()

engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/postgres', echo=True)
Base.metadata.create_all(engine)
session_factory.configure(bind=engine)
Session = scoped_session(session_factory)


class Attachment(Base):
    __tablename__ ="attachment"
    id = Column(Integer, primary_key=True)
    oid = Column(OID)

    @classmethod
    def insert_file(cls, filename):
        conn = engine.raw_connection()
        l_obj = conn.lobject(0, 'wb', 0)
        with open(filename, 'rb') as f:
            l_obj.write(f.read())
        conn.commit()
        conn.close()
        session = Session()
        attachment = cls(oid=l_obj.oid)
        session.add(attachment)
        session.commit()
        return attachment.id

    @classmethod
    def get_file(cls, attachment_id, filename):
        session = Session()
        attachment = session.query(Attachment).get(attachment_id)
        conn = engine.raw_connection()
        l_obj = conn.lobject(attachment.oid, 'rb')
        with open(filename, 'wb') as f:
            f.write(l_obj.read())
        conn.close()


if __name__ == '__main__':
    my_id = Attachment.insert_file(r'C:\path\to\file')
    Attachment.get_file(my_id, r'C:\path\to\file_out')
不是很优雅,但似乎很管用

更新:

我现在正在使用事件

from sqlalchemy import create_engine, event
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.dialects.postgresql import OID

Base = declarative_base()
session_factory = sessionmaker()

engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/postgres', echo=True)
Base.metadata.create_all(engine)
session_factory.configure(bind=engine)
Session = scoped_session(session_factory)

class Data(Base):
    __tablename__ = "attachment"
    id = Column(Integer, primary_key=True)
    oid = Column(OID)


@event.listens_for(Data, 'after_delete')
def remove_large_object_after_delete(_, connection, target):
    raw_connection = connection.connection
    l_obj = raw_connection.lobject(target.oid, 'n')
    l_obj.unlink()
    raw_connection.commit()


@event.listens_for(Data, 'before_insert')
def add_large_object_before_insert(_, connection, target):
    raw_connection = connection.connection
    l_obj = raw_connection.lobject(0, 'wb', 0)
    target.oid = l_obj.oid
    l_obj.write(target.ldata)
    raw_connection.commit()


@event.listens_for(Data, 'load')
def inject_large_object_after_load(target, _):
    session = object_session(target)
    conn = session.get_bind().raw_connection()
    l_obj = conn.lobject(target.oid, 'rb')
    target.ldata = l_obj.read()

if __name__ == '__main__':
   session = Session()  
   # Put
   data = Data()
   data.ldata = 'your large data'
   session.add(data)
   session.commit()

   id = data.id

   # Get
   data2 = session.query(Data).get(id)
   print(data.ldata) # Your large data is here

   # Delete
   session.delete(data)
   session.delete(data2)
   session.commit()   

   session.flush()
   session.close()
到目前为止效果很好


我不明白为什么postgres的大型物体这些天被忽视了。我经常用它们。或者说我想这样做,但这很有挑战性,尤其是在asyncio中……

嘿,好了。您的回答帮助我决定在我的项目中使用PSQL大型对象。它是一个简单的数据库,用于存储来自用户的注册信息,并且可能需要一些与用户相关联的任意文件上传来存储。我正在使用fastapi和asyncio。关于异步IO和大文件,您有什么特别的意见吗?