如何修复Python中MySql查询的方面
我试图编写以下脚本,从远程服务器调用大约30万个文件。 它通常工作正常,但最多只能工作65到70个文件。在这之后,它只是 打印文件名而不处理任何内容。如果有人能告诉我我是什么 做错了什么如何修复Python中MySql查询的方面,python,mysql,windows,python-2.7,Python,Mysql,Windows,Python 2.7,我试图编写以下脚本,从远程服务器调用大约30万个文件。 它通常工作正常,但最多只能工作65到70个文件。在这之后,它只是 打印文件名而不处理任何内容。如果有人能告诉我我是什么 做错了什么 import pymysql import pymysql.cursors import os import win32com.client from gensim.models import Word2Vec import nltk from nltk.corpus import stopwords impo
import pymysql
import pymysql.cursors
import os
import win32com.client
from gensim.models import Word2Vec
import nltk
from nltk.corpus import stopwords
import pyPdf
from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.plaintext.writer import PlaintextWriter
import nltk
import zipfile, re
import time
#READING DOC FILE FROM REMOTE LOCATION
def readfilesq9(n):
connection = pymysql.connect(host='xxx.xxx.x.xxx',
user='abcd',
passwd='pwd1',
db='rep_db',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
list1=[]
with connection.cursor() as cursor:
# Read a single record
sql = "SELECT candidateid,cnd.FirstName, cnd.LastName,Concat('\\xxx.xxx.x.xxx\File\Cand_Res/',orgguid,'/',DATE_FORMAT(cnd.createddate,'%Y%m'),'/',candidateguid,'/',Resume) as ResumePath from candidate cnd join mstorganization org on cnd.orgid = org.OrgId where Resume <> '' and Resume is not null order by cnd.modifieddate limit 100000"
cursor.execute(sql)
result = cursor.fetchall()
#print result
#list1=[]
for i in result:
try:
#print i
item_1=i.items()
item_2=item_1[2][1]
print item_2
item_3=item_2.index("/")
file1=item_2[item_2:]
string1='\\\\xxx.xxx.x.xxx\\Resumes\\Cand_Res'
file1e=file1.encode('ascii', 'ignore')
urls=file1e.replace("/","\\")
file_full=string1+urls
time.sleep(1)
#osp="C:\\Python27"
os1=os.path.abspath(os.curdir)
osp2=os.path.join(os1,file_full)
print "Path1:",osp2
file_name1=osp2
print "Path:",file_name1
#IDENTIFICATION OF FILE KIND
#DOC CONVERSION
if ".doc" in file_name1:
#EXTRACTING ONLY .DOC FILES
if ".docx" not in file_name1:
#print "It is A Doc file$$:",file_name
try:
doc = win32com.client.GetObject(file_name1)
text = doc.Range().Text
text1=text.encode('ascii','ignore')
text_word=text1.split()
#print "The Text Word is:",text_word
#print "Text for Document File Is:",text1
list1.append(text_word)
#print "List for Doc File Is:",list3
#print "It is a Doc file"
except:
print "DOC ISSUE"
#EXTRACTING ONLY .DOCX FILES
elif ".docx" in file_name1:
#print "It is DOCX FILE:",file_name
docx1=zipfile.ZipFile(file_name1)
content = docx1.read('word/document.xml').decode('utf-8')
cleaned = re.sub('<(.|\n)*?>','',content).encode('ascii','ignore')
cleaned_word=cleaned.split()
#print "The Cleaned Document Is:",cleaned
list1.append(cleaned_word)
#print "List for DocX file Is:",list4
else:
print "NONE1"
else:
print "It is not a Doc file"
except:
print "OOPS1"
我正在使用Python2.7.6对Enthound天蓬进行测试。这不是我默认的Python。我的默认Python位于位置C:\Python27。
我正在使用MySql和Windows7专业版。对任何缩进错误表示歉意。尝试在while语句中使用cursor.fetchone而不是cursor.fetchall,这样您就可以一次提取一行,在没有更多结果时中断。不看pymysql的文档,我假设cursor.fetchone在没有更多结果时将返回None。
如果这不起作用,请直接在MySQL中运行查询并验证输出