Python Openpyxl将新行追加到Excel
我使用元素树在XML文档中查找标记的值。然后我想将其附加到Excel工作表中。我还有一些寻找值的正则表达式。我想把它附加到同一张纸上。我还想将文件名附加到工作表中。下面是我附加文件名的代码(请至少尝试)Python Openpyxl将新行追加到Excel,python,xml,excel,openpyxl,Python,Xml,Excel,Openpyxl,我使用元素树在XML文档中查找标记的值。然后我想将其附加到Excel工作表中。我还有一些寻找值的正则表达式。我想把它附加到同一张纸上。我还想将文件名附加到工作表中。下面是我附加文件名的代码(请至少尝试) 此代码使用elementTree从标记中获取值: from openpyxl import Workbook import xml.etree.ElementTree as ET import os for filename in os.listdir('xml'): element_tre
此代码使用elementTree从标记中获取值:
from openpyxl import Workbook
import xml.etree.ElementTree as ET
import os
for filename in os.listdir('xml'):
element_tree = ET.parse(filename)
root = element_tree.getroot()
agreement = root.find(".//title").text
print (agreement)
wb = Workbook()
#kevin = ["1", "2", "3"]
# grab the active worksheet
ws = wb.active
# Data can be assigned directly to cells
#ws['A1'] = 42
# Rows can also be appended
ws.append([agreement])
#ws.append(kevin)
# Save the file
wb.save("sample.xlsx")`
这是我查找正则表达式的代码
import re
import os
import openpyxl
#regular expressions to find the data we want to count
regexPattern1 = ">Data\s+\d*\s*\=*\s*</content>"
regexPattern2 = ">Some Data\s+\d*\s*\=*\s*</content>"
regexPattern3 = ">More Data\s+\d*\s*\=*\s*</content>"
regexPattern4 = ">Data More\s+\d*\s*\=*\s*</content>"
regexPattern5 = ">Some More Data\s+\d*\s*\=*\s*</content>"
#function to get the values of the various regular expressions above
def get_values(filepath):
#empty list to hold the values we find.
values = []
another = []
more = []
mores = []
smore = []
for line in open(filepath, encoding="utf8").readlines():
matchValue1 = re.search(regexPattern1, line)
matchValue2 = re.search(regexPattern2, line)
matchValue3 = re.search(regexPattern3, line)
matchValue4 = re.search(regexPattern4, line)
matchValue5 = re.search(regexPattern5, line)
if matchValue1:
values.append(matchValue1)
if matchValue2:
another.append(matchValue2)
if matchValue3:
more.append(matchValue3)
if matchValue4:
mores.append(matchValue4)
if matchValue5:
smore.append(matchValue5)
# Now we want to calculate highest number in all the lists.
try:
maxVal = len(values)
except:
maxVal = '' # This case will handle if there are NO values at all
try:
maxAnother = len(another)
except:
maxAnother = ''
try:
maxmore = len(more)
except:
maxmore = ''
try:
maxmores = len(mores)
except:
maxmores = ''
try:
maxsmore = len(smore)
except:
maxsmore = ''
return maxVal, maxAnother, maxmore, maxmores, maxsmore
def process_folder(folder, output_xls_path):
files = [folder+'/'+f for f in os.listdir(folder) if ".xml" in f]
writable_lines = []
writable_lines.append(("Data1","Data1", "Data3", "Data4", "Data5")) # Header in the excel
for file in files:
values = get_values(file)
writable_lines.append((str(values[0]),str(values[1]), str(values[2]), str(values[3]), str(values[4])))
wb = openpyxl.Workbook()
sheet = wb.active
for i in range(len(writable_lines)):
sheet['A' + str(i+1)].value = writable_lines[i][0]
sheet['B' + str(i+1)].value = writable_lines[i][1]
sheet['C' + str(i+1)].value = writable_lines[i][2]
sheet['D' + str(i+1)].value = writable_lines[i][3]
sheet['E' + str(i+1)].value = writable_lines[i][4]
wb.save(output_xls_path)
if __name__ == '__main__':
process_folder("xmls", "xml.xlsx")
重新导入
导入操作系统
导入openpyxl
#正则表达式来查找要计数的数据
regexpater1=“>数据\s+\d*\s*\=*\s*”
regexpater2=“>某些数据\s+\d*\s*\=*\s*”
regexpater3=“>更多数据\s+\d*\s*\=*\s*”
regexpater4=“>数据更多\s+\d*\s*\=*\s*”
regexpater5=“>更多数据\s+\d*\s*\=*\s*”
#函数获取上述各种正则表达式的值
def get_值(文件路径):
#空列表保存我们找到的值。
值=[]
另一个=[]
更多=[]
mores=[]
smore=[]
对于打开的行(filepath,encoding=“utf8”)。readlines():
matchValue1=重新搜索(reg1,第行)
matchValue2=重新搜索(第2行)
matchValue3=重新搜索(第3行)
matchValue4=重新搜索(第4行)
matchValue5=重新搜索(第5行)
如果匹配值1:
values.append(matchValue1)
如果匹配值2:
另一个.append(matchValue2)
如果匹配值3:
更多。追加(matchValue3)
如果匹配值4:
附加说明(matchValue4)
如果匹配值为5:
smore.append(匹配值5)
#现在我们要计算所有列表中的最高数字。
尝试:
maxVal=len(值)
除:
maxVal=''#如果根本没有值,则处理此情况
尝试:
maxAnother=len(另一个)
除:
MaxOther=''
尝试:
maxmore=len(更多)
除:
maxmore=''
尝试:
maxmores=len(mores)
除:
maxmores=''
尝试:
maxsmore=len(smore)
除:
maxsmore=''
返回maxVal、MaxOther、maxmore、maxmores、maxsmore
def进程文件夹(文件夹、输出路径):
files=[folder+'/'+f表示os.listdir(folder)中的f,如果f中的“.xml”]
可写_行=[]
可写_行。在excel中追加((“Data1”、“Data1”、“Data3”、“Data4”、“Data5”))#标题
对于文件中的文件:
值=获取值(文件)
可写_行。追加((str(值[0])、str(值[1])、str(值[2])、str(值[3])、str(值[4]))
wb=openpyxl.Workbook()
工作表=wb.active
对于范围内的i(len(可写_行)):
活页['A'+str(i+1)]。值=可写_行[i][0]
活页['B'+str(i+1)]。值=可写_行[i][1]
活页['C'+str(i+1)]。值=可写_行[i][2]
活页['D'+str(i+1)]。值=可写_行[i][3]
活页['E'+str(i+1)]。值=可写行[i][4]
wb.save(输出路径)
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu':
process_文件夹(“xmls”、“xml.xlsx”)
您遇到了什么问题?提示如果要以编程方式添加单元格,请使用sheet.cell(row=i+1,col\u idx=1)
。
import re
import os
import openpyxl
#regular expressions to find the data we want to count
regexPattern1 = ">Data\s+\d*\s*\=*\s*</content>"
regexPattern2 = ">Some Data\s+\d*\s*\=*\s*</content>"
regexPattern3 = ">More Data\s+\d*\s*\=*\s*</content>"
regexPattern4 = ">Data More\s+\d*\s*\=*\s*</content>"
regexPattern5 = ">Some More Data\s+\d*\s*\=*\s*</content>"
#function to get the values of the various regular expressions above
def get_values(filepath):
#empty list to hold the values we find.
values = []
another = []
more = []
mores = []
smore = []
for line in open(filepath, encoding="utf8").readlines():
matchValue1 = re.search(regexPattern1, line)
matchValue2 = re.search(regexPattern2, line)
matchValue3 = re.search(regexPattern3, line)
matchValue4 = re.search(regexPattern4, line)
matchValue5 = re.search(regexPattern5, line)
if matchValue1:
values.append(matchValue1)
if matchValue2:
another.append(matchValue2)
if matchValue3:
more.append(matchValue3)
if matchValue4:
mores.append(matchValue4)
if matchValue5:
smore.append(matchValue5)
# Now we want to calculate highest number in all the lists.
try:
maxVal = len(values)
except:
maxVal = '' # This case will handle if there are NO values at all
try:
maxAnother = len(another)
except:
maxAnother = ''
try:
maxmore = len(more)
except:
maxmore = ''
try:
maxmores = len(mores)
except:
maxmores = ''
try:
maxsmore = len(smore)
except:
maxsmore = ''
return maxVal, maxAnother, maxmore, maxmores, maxsmore
def process_folder(folder, output_xls_path):
files = [folder+'/'+f for f in os.listdir(folder) if ".xml" in f]
writable_lines = []
writable_lines.append(("Data1","Data1", "Data3", "Data4", "Data5")) # Header in the excel
for file in files:
values = get_values(file)
writable_lines.append((str(values[0]),str(values[1]), str(values[2]), str(values[3]), str(values[4])))
wb = openpyxl.Workbook()
sheet = wb.active
for i in range(len(writable_lines)):
sheet['A' + str(i+1)].value = writable_lines[i][0]
sheet['B' + str(i+1)].value = writable_lines[i][1]
sheet['C' + str(i+1)].value = writable_lines[i][2]
sheet['D' + str(i+1)].value = writable_lines[i][3]
sheet['E' + str(i+1)].value = writable_lines[i][4]
wb.save(output_xls_path)
if __name__ == '__main__':
process_folder("xmls", "xml.xlsx")