Python 从文件夹中提取多个xml文件
我正在处理ncbi pubmed ftp数据,我必须解析xml 我在文件夹中存储了xml文件大约1500(250 gb)文件存储在文件夹中我正在从文件夹中读取文件,但在我的代码中,当我运行它时,我的系统被冻结。如何解决它Python 从文件夹中提取多个xml文件,python,xml,sqlite,Python,Xml,Sqlite,我正在处理ncbi pubmed ftp数据,我必须解析xml 我在文件夹中存储了xml文件大约1500(250 gb)文件存储在文件夹中我正在从文件夹中读取文件,但在我的代码中,当我运行它时,我的系统被冻结。如何解决它 import sqlite3 import os import lxml.etree as ET path = '/home/shayez/Desktop/Ex' listfile = [] files = os.listdir(path) for name in
import sqlite3
import os
import lxml.etree as ET
path = '/home/shayez/Desktop/Ex'
listfile = []
files = os.listdir(path)
for name in files:
listfile.append(name)
pmdata = []
for name2 in listfile:
full_file = os.path.abspath(os.path.join('Ex',name2))
dom = ET.parse(full_file)
pmdat = dom.findall('PubmedArticle')
pmdata.append(pmdat)
conn = sqlite3.connect('/home/shayez/Desktop/NewNCBI')
co = conn.cursor()
print ("Opened database successfully")
for d in pmdata:
for c in d :
PMID = c.find('MedlineCitation/PMID').text
title = c.find('MedlineCitation/Article/ArticleTitle').text
Date =
c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/Year')
Date2 =
c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate')
Abstract = c.find('MedlineCitation/Article/Abstract/AbstractText')
if Abstract is None :
pass
elif Date is None:
example =[(PMID,Date2.text,Abstract.text)]
co.executemany('INSERT INTO pubdata(PMID,Date,Abstract) VALUES (?,?,?)',example)
else:
example =[(PMID,Date.text,Abstract.text)]
co.executemany('INSERT INTO pubdata(PMID,Date,Abstract) VALUES (?,?,?)',example)
conn.commit()
print("Records Save Successfully")
conn.close()