Python与sqlalchemy |批量插入错误

Python与sqlalchemy |批量插入错误,python,mysql,csv,pandas,sqlalchemy,Python,Mysql,Csv,Pandas,Sqlalchemy,我试图通过使用Python|Pandas和sqlalchemy将我的数百万条记录从CSV文件插入MySQL数据库。有时,此插入会在完成之前中断,甚至不会向数据库中插入单行 我的代码是: import pandas as pd from sqlalchemy import create_engine df = pd.read_csv('/home/shankar/LAB/Python/Rough/*******.csv') # 2nd argument replaces where condi

我试图通过使用Python|Pandassqlalchemy将我的数百万条记录从CSV文件插入MySQL数据库。有时,此插入会在完成之前中断,甚至不会向数据库中插入单行

我的代码是:

import pandas as pd 
from sqlalchemy import create_engine

df = pd.read_csv('/home/shankar/LAB/Python/Rough/*******.csv')
# 2nd argument replaces where conditions is False
df = df.where(pd.notnull(df), None) 
df.head()
conn_str = "mysql+pymysql://root:MY_PASS@localhost/MY_DB?charset=utf8&use_unicode=0"
engine = create_engine(conn_str)
conn = engine.raw_connection()
df.to_sql(name='table_name', con=conn, 
      if_exists='append')
conn.close()
错误:

---------------------------------------------------------------------------
 TypeError                                 Traceback (most recent call last)
 /home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
 1563             else:
 -> 1564                 cur.execute(*args)
 1565             return cur

 /home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args)
 164 
 --> 165         query = self.mogrify(query, args)
 166 

 /home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args)
 143         if args is not None:
 --> 144             query = query % self._escape_args(args, conn)
 145 

 TypeError: not all arguments converted during string formatting

 During handling of the above exception, another exception occurred:

 DatabaseError                             Traceback (most recent call last)
 <ipython-input-6-bb91db9eb97e> in <module>()
 11 df.to_sql(name='company', con=conn, 
 12           if_exists='append',
 ---> 13           chunksize=10000)
 14 conn.close()

 /home/shankar/.local/lib/python3.5/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
 1163         sql.to_sql(self, name, con, flavor=flavor, schema=schema,
 1164                    if_exists=if_exists, index=index, index_label=index_label,
 -> 1165                    chunksize=chunksize, dtype=dtype)
 1166 
 1167     def to_pickle(self, path):

 /home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
 569     pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
 570                       index_label=index_label, schema=schema,
 --> 571                       chunksize=chunksize, dtype=dtype)
 572 
 573 

 /home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1659                             if_exists=if_exists, index_label=index_label,
1660                             dtype=dtype)
-> 1661         table.create()
1662         table.insert(chunksize)
1663 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in create(self)
 688 
 689     def create(self):
 --> 690         if self.exists():
 691             if self.if_exists == 'fail':
 692                 raise ValueError("Table '%s' already exists." % self.name)

 /home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in exists(self)
 676 
 677     def exists(self):
 --> 678         return self.pd_sql.has_table(self.name, self.schema)
 679 
 680     def sql_schema(self):

 /home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in has_table(self, name, schema)
1674         query = flavor_map.get(self.flavor)
1675 
-> 1676         return len(self.execute(query, [name, ]).fetchall()) > 0
1677 
1678     def get_table(self, table_name, schema=None):

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1574             ex = DatabaseError(
1575                 "Execution failed on sql '%s': %s" % (args[0], exc))
-> 1576             raise_with_traceback(ex)
1577 
1578     @staticmethod

/home/shankar/.local/lib/python3.5/site-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback)
 331         if traceback == Ellipsis:
 332             _, _, traceback = sys.exc_info()
 --> 333         raise exc.with_traceback(traceback)
 334 else:
 335     # this version of raise is a syntax error in Python 3

 /home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1562                 cur.execute(*args, **kwargs)
1563             else:
-> 1564                 cur.execute(*args)
1565             return cur
1566         except Exception as exc:

 /home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args)
 163             pass
 164 
 --> 165         query = self.mogrify(query, args)
 166 
 167         result = self._query(query)

 /home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args)
 142 
 143         if args is not None:
 --> 144             query = query % self._escape_args(args, conn)
 145 
 146         return query

  DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
---------------------------------------------------------------------------
TypeError回溯(最近一次调用上次)
/执行中的home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py(self,*args,**kwargs)
1563其他:
->1564当前执行(*args)
1565返回电流
/执行中的home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py(self、query、args)
164
-->165 query=self.mogrify(query,args)
166
/mogrify中的home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py(self、query、args)
143如果args不是None:
-->144 query=查询%self.\u escape\u args(args,conn)
145
TypeError:在字符串格式化过程中并非所有参数都已转换
在处理上述异常期间,发生了另一个异常:
数据库错误回溯(上次最近调用)
在()
11 df.to_sql(name='company',con=conn,
12如果_存在class='append',
--->13(尺寸=10000)
14康涅狄格州关闭
/home/shankar/.local/lib/python3.5/site-packages/pandas/core/generic.py in to_sql(self、name、con、flavor、schema、if_存在、index、index_标签、chunksize、dtype)
1163 sql.to_sql(self,name,con,flavor=flavor,schema=schema,
1164如果存在=如果存在,索引=索引,索引标签=索引标签,
->1165 chunksize=chunksize,dtype=dtype)
1166
1167 def到_pickle(自身,路径):
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(框架、名称、con、风格、模式、如果_存在、索引、索引标签、chunksize、数据类型)
569.to_sql(框架,名称,如果_存在=如果_存在,索引=索引,
570索引标签=索引标签,模式=模式,
-->571 chunksize=chunksize,dtype=dtype)
572
573
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(self、frame、name、如果_存在、索引、索引标签、模式、chunksize、数据类型)
1659如果存在=如果存在,索引标签=索引标签,
1660数据类型=数据类型)
->1661 table.create()
1662表.插入(块大小)
1663
/create(self)中的home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py
688
689 def创建(自):
-->690如果self.exists():
691如果self.if_存在==“失败”:
692提升值错误(“表'%s'已存在。”%self.name)
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py存在(self)
676
677 def存在(自身):
-->678返回self.pd_sql.has_表(self.name,self.schema)
679
680 def sql_模式(自):
/has_表中的home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py(self、name、schema)
1674 query=flavor\u map.get(self.flavor)
1675
->1676返回len(self.execute(query,[name,]).fetchall())>0
1677
1678 def get_table(self,table_name,schema=None):
/执行中的home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py(self,*args,**kwargs)
1574 ex=数据库错误(
1575“在sql'%s'上执行失败:%s%”(args[0],exc))
->1576带回溯的raise_(ex)
1577
1578@staticmethod
/home/shankar/.local/lib/python3.5/site packages/pandas/compat/\uuuuuu init\uuuuuuuuu.py in raise\u带回溯(exc,回溯)
331如果回溯==省略号:
332 u,u,traceback=sys.exc_info()
-->333带回溯的提升exc(回溯)
334其他:
335#此版本的raise在Python3中是一个语法错误
/执行中的home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py(self,*args,**kwargs)
1562当前执行(*args,**kwargs)
1563其他:
->1564当前执行(*args)
1565返回电流
1566例外情况除外,作为exc:
/执行中的home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py(self、query、args)
163通行证
164
-->165 query=self.mogrify(query,args)
166
167结果=self.\u查询(查询)
/mogrify中的home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py(self、query、args)
142
143如果args不是None:
-->144 query=查询%self.\u escape\u args(args,conn)
145
146返回查询
DatabaseError:对sql“从sqlite_master中选择名称,其中type='table'和name=?;”执行失败:并非所有参数都在字符串格式化期间转换
此错误仅在某种CSV文件中发生。 请通知我的错误


提前感谢。

从引发的错误中,您在查询中的参数并不都是字符串,因此您需要做的是将数据帧的每个元素转换为字符串
df=df.astype(str)

从引发的错误中,您在查询中的参数并不都是字符串,因此您需要做的是将数据帧的每个元素转换为字符串
df=df.astype(str)

我得到错误,因为“name'creaqte_engine'未定义”--------------------------------------------------------------------------------------name错误回溯(最近一次调用)在()7 df.head()8 conn_str=“mysql+pymysql://root:root@localhost/globalTracker?charset=utf8&use\u unicode=0“-->9 engine=creaqte\u engine(conn\u str)10 conn=engine.raw\u connection()11#df=df.astype(str)name错误:未定义名称“creaqte\u engine”只需修复键入-删除
q