Database 在python中用于创建数据库表比较工具的包

Database 在python中用于创建数据库表比较工具的包,database,python-3.x,tuples,comparison,Database,Python 3.x,Tuples,Comparison,我的任务是开发一个工具,该工具将接受一些参数,然后根据表列表查询2个数据库 有3种可能的数据库选项,一种是连接到Netezza,一种是连接到Oracle,另一种是连接到DB2大型机。理论上,他们会向我传递连接类型、主机名、端口、数据库名、用户名和密码 查询将从列表中获取一个表,查询两个数据库,并跨2个数据库比较表中的数据 对于到Netezza的连接,我使用pyodbc;对于到Oracle的连接,我使用cx_Oracle;对于到DB2的连接,我使用ibm_db 此时,我能够连接到每个数据库,并且能

我的任务是开发一个工具,该工具将接受一些参数,然后根据表列表查询2个数据库

有3种可能的数据库选项,一种是连接到Netezza,一种是连接到Oracle,另一种是连接到DB2大型机。理论上,他们会向我传递连接类型、主机名、端口、数据库名、用户名和密码

查询将从列表中获取一个表,查询两个数据库,并跨2个数据库比较表中的数据

对于到Netezza的连接,我使用pyodbc;对于到Oracle的连接,我使用cx_Oracle;对于到DB2的连接,我使用ibm_db

此时,我能够连接到每个数据库,并且能够返回每个数据库中表的列元数据以及每个数据库中的结果集

有几件事我正在努力完成

如果该列属于某一数据类型(即decimal、integer),我希望对表中该列的所有值求和;如果该列属于任何其他数据类型(即string、date),我希望进行计数()

我希望对两个DBs中的表执行此操作,然后对列计数/总计进行比较,并在excel中显示比较结果

最后,我想对两个DBs中表中的每一行进行逐列比较。如果每行的字段值存在任何差异,则整行将显示在excel电子表格中

我想知道的是,python中是否有任何包可以用来执行这些类似于表的操作

请参阅下面的代码,了解我到目前为止的情况

import pyodbc
import ibm_db
import cx_Oracle
import collections

class DatabaseConnection(object):

    def __init__(self, connection_type, hostname_or_ip, port, database_or_sid, username, password):
        self.port = port
        self.connection_type = connection_type
        self.hostname_or_ip = hostname_or_ip
        self.database_or_sid = database_or_sid
        self.username = username
        self.password = password
        self.dsn = "GEMPROD"
        self.connection_string = ""
        self.conn = ""

    def __enter__(self):
        if self.connection_type == "Netezza":
            self.connection_string = "DRIVER={NetezzaSQL};SERVER=" + self.hostname_or_ip + ";PORT="+ self.port + \
                                    ";DATABASE=" + self.database_or_sid + ";UID=" + self.username + ";PWD=" + self.password
            self.conn = pyodbc.connect(self.connection_string)
            return self.conn
        elif self.connection_type == "Oracle":
            dsn_tns = cx_Oracle.makedsn(self.hostname_or_ip, self.port, self.database_or_sid)
            self.conn = cx_Oracle.connect(user=self.username, password=self.password, dsn=dsn_tns)
            return self.conn
        elif self.connection_type == "DB2":
            self.connection_string = "Database=" + self.database_or_sid + ";HOSTNAME=" + self.hostname_or_ip + \
                                     ";PORT=" + self.port + ";PROTOCOL=TCPIP;UID=" + self.username + ";PWD=" + \
                                     self.password + ";"
            #self.conn = ibm_db.connect(self.connection_string, "", "")
            self.conn = ibm_db.connect('DSN=' + self.dsn, self.username, self.password)
            return self.conn
        pass

    def __exit__(self, type, value, traceback):
        if self.connection_type == "Netezza":
            self.conn.close()
        elif self.connection_type == "DB2":
            ibm_db.close(self.conn)
        elif self.connection_type == "Oracle":
            self.conn.close
        pass

    def __repr__(self):
            return '%s%s' % (self.__class__.__name__, self.dsn)

    def query(self, query, params):
        pass


#database_column_metadata = collections.namedtuple('DatabaseColumnMetadata','index column_name data_type')
#database_field = collections.namedtuple('', '')

table_list = ['BNR_CIF_25DAY_RPT', table2]
sort_column = None
with DatabaseConnection('Netezza', ip, port, database, username, pwd) as connection_one:
    print('Netezza Query:')
    for table in table_list:
        cursor = connection_one.cursor()
        netezza_rows = cursor.execute("SELECT * FROM BNR_CIF_25DAY_RPT LIMIT 1")
        column_list = netezza_rows.description
        sort_column = str(column_list[0][0])
        netezza_query = "SELECT * FROM BNR_CIF_25DAY_RPT ORDER BY " + sort_column + " ASC LIMIT 10"
        netezza_rows = cursor.execute(netezza_query)
        print(column_list)
        netezza_column_list = []
        for idx, column in enumerate(column_list):
            column_name, data_type, *rest = column
            netezza_column_list.append((idx, column_name, data_type))
        for row in netezza_rows:
            print(row, end='\n')
        for tup in netezza_column_list:
            print(tup, end='\n')
        print('Netezza row count:', str(netezza_rows.rowcount) + '\n')
        cursor.close()

with DatabaseConnection('Oracle', hostname, port, SID, username, pwd) as connection_two:
    print('Oracle Query:')
    for table in table_list:
        try:
            cursor = connection_two.cursor()
            oracle_rows = cursor.execute("SELECT * FROM BNR_CIF_25DAY_RPT WHERE ROWNUM <= 1")
            column_list = oracle_rows.description
            sort_column = column_list[0][0]
            oracle_query = "SELECT * FROM (SELECT * FROM BNR_CIF_25DAY_RPT ORDER BY " + sort_column + " ASC) WHERE ROWNUM <=10"
            oracle_rows = cursor.execute(oracle_query)
            print(column_list)
            oracle_column_list = []
            for idx, column in enumerate(column_list):
                column_name, data_type, *rest = column
                oracle_column_list.append((idx, column_name, data_type))
            for row in oracle_rows:
                print(row, end='\n')
            for tup in oracle_column_list:
                print(tup, end='\n')
            print('Oracle row count:', str(oracle_rows.rowcount) + '\n')
        except cx_Oracle.DatabaseError as e:
            print(str(e))
        finally:
            cursor.close()
导入pyodbc
导入ibm_数据库
导入cx_Oracle
导入集合
类数据库连接(对象):
def uuu init uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
self.port=端口
self.connection\u type=连接类型
self.hostname\u或\u ip=主机名\u或\u ip
self.database_或_sid=数据库_或_sid
self.username=用户名
self.password=密码
self.dsn=“GEMPROD”
self.connection_string=“”
self.conn=“”
定义输入(自我):
如果self.connection_type==“Netezza”:
self.connection_string=“DRIVER={NetezzaSQL};SERVER=“+self.hostname_或_ip+”;PORT=“+self.PORT+\
“DATABASE=“+self.DATABASE”或“sid+”UID=“+self.username+”PWD=“+self.password
self.conn=pyodbc.connect(self.connection\u字符串)
返回自我控制
elif self.connection_type==“Oracle”:
dsn\u tns=cx\u Oracle.makedsn(self.hostname\u或\u ip、self.port、self.database\u或\u sid)
self.conn=cx\u Oracle.connect(user=self.username,password=self.password,dsn=dsn\u tns)
返回自我控制
elif self.connection_type==“DB2”:
self.connection_string=“Database=“+self.Database_或_sid+”;HOSTNAME=“+self.HOSTNAME_或_ip+\
“PORT=“+self.PORT+”;协议=TCPIP;UID=“+self.username+”PWD=“+\
self.password+“;”
#self.conn=ibm_db.connect(self.connection_字符串,“,”)
self.conn=ibm_db.connect('DSN='+self.DSN,self.username,self.password)
返回自我控制
通过
定义退出(自身、类型、值、回溯):
如果self.connection_type==“Netezza”:
self.conn.close()
elif self.connection_type==“DB2”:
ibm_db.close(self.conn)
elif self.connection_type==“Oracle”:
自我控制关闭
通过
定义报告(自我):
返回“%s%s%”(self.\u class.\u.\u名称\u,self.dsn)
def查询(自我、查询、参数):
通过
#database\u column\u metadata=collections.namedtuple('DatabaseColumnMetadata','index column\u name data\u type')
#database_field=collections.namedtuple(“”,“”)
表1=['BNR\U CIF\U 25天,表2]
排序列=无
数据库连接('Netezza',ip,端口,数据库,用户名,pwd)作为连接:
打印('Netezza查询:')
对于表_列表中的表:
cursor=connection\u one.cursor()
netezza_rows=cursor.execute(“从BNR_CIF_25DAY_RPT LIMIT 1中选择*)
column_list=netezza_rows.description
排序列=str(列列表[0][0])
netezza_query=“选择*从BNR\u CIF\u 25天\u RPT订单,按“+排序\u列+”ASC限制10”
netezza_rows=cursor.execute(netezza_查询)
打印(列列表)
netezza_列_列表=[]
对于idx,枚举中的列(列列表):
列名称,数据类型,*rest=column
netezza_column_list.append((idx,column_name,data_type))
对于netezza_行中的行:
打印(行,结束=“\n”)
对于netezza_列_列表中的tup:
打印(tup,end='\n')
打印('Netezza行计数:',str(Netezza_rows.rowcount)+'\n')
cursor.close()
数据库连接('Oracle',主机名,端口,SID,用户名,pwd)作为连接\u 2:
打印('Oracle查询:')
对于表_列表中的表:
尝试:
cursor=connection\u two.cursor()

oracle_rows=cursor.execute("选择*FROM BNR_CIF_25DAY_RPT,其中ROWNUM这不完全是基于python的解决方案,但我们曾在我们的商店使用fluid query比较netezza和Oracle。

感谢您的回复。fluid query似乎很有用。不确定它是否能从自动监控的角度工作。我相信我正在寻找的是像熊猫之类的东西。不确定这是不是最好的选择。