Python Openpyxl将新行追加到Excel_Python_Xml_Excel_Openpyxl

Python Openpyxl将新行追加到Excel

python xml excel

Python Openpyxl将新行追加到Excel,python,xml,excel,openpyxl,Python,Xml,Excel,Openpyxl,我使用元素树在XML文档中查找标记的值。然后我想将其附加到Excel工作表中。我还有一些寻找值的正则表达式。我想把它附加到同一张纸上。我还想将文件名附加到工作表中。下面是我附加文件名的代码（请至少尝试）此代码使用elementTree从标记中获取值： from openpyxl import Workbook import xml.etree.ElementTree as ET import os for filename in os.listdir('xml'): element_tre

我使用元素树在XML文档中查找标记的值。然后我想将其附加到Excel工作表中。我还有一些寻找值的正则表达式。我想把它附加到同一张纸上。我还想将文件名附加到工作表中。下面是我附加文件名的代码（请至少尝试）

此代码使用elementTree从标记中获取值：

from openpyxl import Workbook
import xml.etree.ElementTree as ET
import os

for filename in os.listdir('xml'):

element_tree = ET.parse(filename)
root = element_tree.getroot()
agreement = root.find(".//title").text
print (agreement)


wb = Workbook()
#kevin = ["1", "2", "3"]

# grab the active worksheet
ws = wb.active

# Data can be assigned directly to cells
#ws['A1'] = 42

# Rows can also be appended
ws.append([agreement])
#ws.append(kevin)


# Save the file
wb.save("sample.xlsx")`

这是我查找正则表达式的代码

import re
import os
import openpyxl

#regular expressions to find the data we want to count
regexPattern1 = ">Data\s+\d*\s*\=*\s*</content>"
regexPattern2 = ">Some Data\s+\d*\s*\=*\s*</content>"
regexPattern3 = ">More Data\s+\d*\s*\=*\s*</content>"
regexPattern4 = ">Data More\s+\d*\s*\=*\s*</content>"
regexPattern5 = ">Some More Data\s+\d*\s*\=*\s*</content>"


#function to get the values of the various regular expressions above
def get_values(filepath):
    #empty list to hold the values we find.
    values = []
    another = []
    more = []
    mores = []
    smore = []



    for line in open(filepath, encoding="utf8").readlines():

        matchValue1 = re.search(regexPattern1, line)
        matchValue2 = re.search(regexPattern2, line)
        matchValue3 = re.search(regexPattern3, line)
        matchValue4 = re.search(regexPattern4, line)
        matchValue5 = re.search(regexPattern5, line)

        if matchValue1:
            values.append(matchValue1)
        if matchValue2:
            another.append(matchValue2)
        if matchValue3:
            more.append(matchValue3)
        if matchValue4:
            mores.append(matchValue4)
        if matchValue5:
            smore.append(matchValue5)


    # Now we want to calculate highest number in all the lists.
    try:
        maxVal = len(values)
    except:
        maxVal = '' # This case will handle if there are NO values at all

    try:
        maxAnother = len(another)
    except:
        maxAnother = ''

    try:
        maxmore = len(more)
    except:
        maxmore = ''

    try:
        maxmores = len(mores)
    except:
        maxmores = ''

    try:
        maxsmore = len(smore)
    except:
        maxsmore = ''


    return maxVal, maxAnother, maxmore, maxmores, maxsmore

def process_folder(folder, output_xls_path):
    files = [folder+'/'+f for f in os.listdir(folder) if ".xml" in f]
    writable_lines = []
    writable_lines.append(("Data1","Data1", "Data3", "Data4", "Data5")) # Header in the excel

    for file in files:
        values = get_values(file)
        writable_lines.append((str(values[0]),str(values[1]), str(values[2]), str(values[3]), str(values[4])))

    wb = openpyxl.Workbook()
    sheet = wb.active
    for i in range(len(writable_lines)):
        sheet['A' + str(i+1)].value = writable_lines[i][0]
        sheet['B' + str(i+1)].value = writable_lines[i][1]
        sheet['C' + str(i+1)].value = writable_lines[i][2]
        sheet['D' + str(i+1)].value = writable_lines[i][3]
        sheet['E' + str(i+1)].value = writable_lines[i][4]


    wb.save(output_xls_path)


if __name__ == '__main__':
    process_folder("xmls", "xml.xlsx")

重新导入
导入操作系统
导入openpyxl
#正则表达式来查找要计数的数据
regexpater1=“>数据\s+\d*\s*\=*\s*”
regexpater2=“>某些数据\s+\d*\s*\=*\s*”
regexpater3=“>更多数据\s+\d*\s*\=*\s*”
regexpater4=“>数据更多\s+\d*\s*\=*\s*”
regexpater5=“>更多数据\s+\d*\s*\=*\s*”
#函数获取上述各种正则表达式的值
def get_值（文件路径）：
#空列表保存我们找到的值。
值=[]
另一个=[]
更多=[]
mores=[]
smore=[]
对于打开的行（filepath，encoding=“utf8”）。readlines（）：
matchValue1=重新搜索（reg1，第行）
matchValue2=重新搜索（第2行）
matchValue3=重新搜索（第3行）
matchValue4=重新搜索（第4行）
matchValue5=重新搜索（第5行）
如果匹配值1：
values.append（matchValue1）
如果匹配值2：
另一个.append（matchValue2）
如果匹配值3：
更多。追加（matchValue3）
如果匹配值4：
附加说明（matchValue4）
如果匹配值为5：
smore.append（匹配值5）
#现在我们要计算所有列表中的最高数字。
尝试：
maxVal=len（值）
除：
maxVal=''#如果根本没有值，则处理此情况
尝试：
maxAnother=len（另一个）
除：
MaxOther=''
尝试：
maxmore=len（更多）
除：
maxmore=''
尝试：
maxmores=len（mores）
除：
maxmores=''
尝试：
maxsmore=len（smore）
除：
maxsmore=''
返回maxVal、MaxOther、maxmore、maxmores、maxsmore
def进程文件夹（文件夹、输出路径）：
files=[folder+'/'+f表示os.listdir（folder）中的f，如果f中的“.xml”]
可写_行=[]
可写_行。在excel中追加（（“Data1”、“Data1”、“Data3”、“Data4”、“Data5”））#标题
对于文件中的文件：
值=获取值（文件）
可写_行。追加（（str（值[0]）、str（值[1]）、str（值[2]）、str（值[3]）、str（值[4]））
wb=openpyxl.Workbook（）
工作表=wb.active
对于范围内的i（len（可写_行））：
活页['A'+str（i+1）]。值=可写_行[i][0]
活页['B'+str（i+1）]。值=可写_行[i][1]
活页['C'+str（i+1）]。值=可写_行[i][2]
活页['D'+str（i+1）]。值=可写_行[i][3]
活页['E'+str（i+1）]。值=可写行[i][4]
wb.save（输出路径）
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu'：
process_文件夹（“xmls”、“xml.xlsx”）

您遇到了什么问题？提示如果要以编程方式添加单元格，请使用

sheet.cell（row=i+1，col\u idx=1）

。

import re
import os
import openpyxl

#regular expressions to find the data we want to count
regexPattern1 = ">Data\s+\d*\s*\=*\s*</content>"
regexPattern2 = ">Some Data\s+\d*\s*\=*\s*</content>"
regexPattern3 = ">More Data\s+\d*\s*\=*\s*</content>"
regexPattern4 = ">Data More\s+\d*\s*\=*\s*</content>"
regexPattern5 = ">Some More Data\s+\d*\s*\=*\s*</content>"


#function to get the values of the various regular expressions above
def get_values(filepath):
    #empty list to hold the values we find.
    values = []
    another = []
    more = []
    mores = []
    smore = []



    for line in open(filepath, encoding="utf8").readlines():

        matchValue1 = re.search(regexPattern1, line)
        matchValue2 = re.search(regexPattern2, line)
        matchValue3 = re.search(regexPattern3, line)
        matchValue4 = re.search(regexPattern4, line)
        matchValue5 = re.search(regexPattern5, line)

        if matchValue1:
            values.append(matchValue1)
        if matchValue2:
            another.append(matchValue2)
        if matchValue3:
            more.append(matchValue3)
        if matchValue4:
            mores.append(matchValue4)
        if matchValue5:
            smore.append(matchValue5)


    # Now we want to calculate highest number in all the lists.
    try:
        maxVal = len(values)
    except:
        maxVal = '' # This case will handle if there are NO values at all

    try:
        maxAnother = len(another)
    except:
        maxAnother = ''

    try:
        maxmore = len(more)
    except:
        maxmore = ''

    try:
        maxmores = len(mores)
    except:
        maxmores = ''

    try:
        maxsmore = len(smore)
    except:
        maxsmore = ''


    return maxVal, maxAnother, maxmore, maxmores, maxsmore

def process_folder(folder, output_xls_path):
    files = [folder+'/'+f for f in os.listdir(folder) if ".xml" in f]
    writable_lines = []
    writable_lines.append(("Data1","Data1", "Data3", "Data4", "Data5")) # Header in the excel

    for file in files:
        values = get_values(file)
        writable_lines.append((str(values[0]),str(values[1]), str(values[2]), str(values[3]), str(values[4])))

    wb = openpyxl.Workbook()
    sheet = wb.active
    for i in range(len(writable_lines)):
        sheet['A' + str(i+1)].value = writable_lines[i][0]
        sheet['B' + str(i+1)].value = writable_lines[i][1]
        sheet['C' + str(i+1)].value = writable_lines[i][2]
        sheet['D' + str(i+1)].value = writable_lines[i][3]
        sheet['E' + str(i+1)].value = writable_lines[i][4]


    wb.save(output_xls_path)


if __name__ == '__main__':
    process_folder("xmls", "xml.xlsx")