如何使用python拆分excel工作簿中的合并单元格_Python

如何使用python拆分excel工作簿中的合并单元格

python

如何使用python拆分excel工作簿中的合并单元格,python,Python,是否有任何方法可以使用python拆分/取消合并excel工作簿中的单元格？我想要的解释如下- 结果应该是一个包含以下条目的新excel文件- 我使用xlrd为所有合并列复制相同字符串的解决方案如下所示- [注意：“formatted_info=True”标志尚未在我使用的xlrd中实现，因此我无法直接获取合并单元格的列表。我不应该在安装程序中升级xlrd。] def xlsx_to_dict(): workbook = xlrd.open_workbook(xlsfile)

是否有任何方法可以使用python拆分/取消合并excel工作簿中的单元格？我想要的解释如下-

结果应该是一个包含以下条目的新excel文件-

我使用xlrd为所有合并列复制相同字符串的解决方案如下所示-

[注意：“formatted_info=True”标志尚未在我使用的xlrd中实现，因此我无法直接获取合并单元格的列表。我不应该在安装程序中升级xlrd。]

def xlsx_to_dict():
    workbook = xlrd.open_workbook(xlsfile)
    worksheet_names = workbook.sheet_names()
    for worksheet_name in worksheet_names:
        worksheet = workbook.sheet_by_name(worksheet_name)
        num_rows = worksheet.nrows - 1
        num_cells = worksheet.ncols - 1
        curr_row = -1
        header_row = worksheet.row(0)
        columns = []
        for cell in range(len(header_row)):
            value = worksheet.cell_value(0, cell)
            columns.append(value)

        cities = []

        for row in range(1,num_rows):
            value = worksheet.cell_value(row,0)
            type = worksheet.cell_type(row,0)
            if  not value == "":
                cities.append(value)

        names = []
        for row in range(1,num_rows):
            value = worksheet.cell_value(row,1)
            type = worksheet.cell_type(row,1)
            if  not value == "":
                names.append(value)

            current_city = cities[0]
            result_dict = {}
            for curr_row in range(1,num_rows):
                row = worksheet.row(curr_row)
                curr_cell = -1
                curr_name = names[0]
                while curr_cell < num_cells:
                    curr_cell += 1
                    cell_value = worksheet.cell_value(curr_row, curr_cell)
                    if cell_value in cities and curr_cell == 0:
                        current_city = cell_value
                        if not result_dict.has_key(current_city):
                            result_dict[current_city] = {}
                        continue
                    if cell_value == "" and curr_cell == 0:
                        continue
                    if cell_value in names and curr_cell == 1:
                        curr_name = cell_value
                        if not result_dict[current_city].has_key(curr_name):
                            result_dict[current_city][curr_name] = {}
                        continue
                    if cell_value == "" and curr_cell == 1:
                        continue
                    try:
                        result_dict[current_city][curr_name]['Phone'].append(cell_Value)
                    except:
                        result_dict[current_city][curr_name]['Phone'] = [cell_value]

然后我将遍历目录并编写新的excel

但是，我希望有一些通用的分割合并单元格的方法。

如果您的文件中间没有空单元格，这可以帮助您读取文件，做一些工作，重写它。

def read_merged_xls(file_contents):
    book = xlrd.open_workbook(file_contents=file_contents)
    data = []
    sheet = book.sheet_by_index(0)
    for rx in range(sheet.nrows):
        line = [] 
        for ry in range(sheet.ncols):
            cell = sheet.cell_value(rx,ry)
            if not cell:
                cell = data[-1][ry] if data else ''
            line.append(cell)
        data.append(line)
    return data

此函数获取“真实”单元值，即，如果坐标位于合并单元内的任何位置，则合并单元的值

def unmergedValue(rowx,colx,thesheet):
    for crange in thesheet.merged_cells:
        rlo, rhi, clo, chi = crange
        if rowx in xrange(rlo, rhi):
            if colx in xrange(clo, chi):
                return thesheet.cell_value(rlo,clo)
    #if you reached this point, it's not in any merged cells
    return thesheet.cell_value(rowx,colx)

松散地基于

非常不高效，但对于小型电子表格来说应该是可以接受的。

请分享您到目前为止所做的尝试？否则人们会继续投票欢迎使用StackOverflow。当发布代码作为答案时，最好的做法是附带简短的解释。这是一本关于旅游的指南

import xlrd
import xlsxwriter
import numpy as np
import pandas as pd
def rep(l,i):
    j= i
    while(j>=0):
        if not l[j-1] == u'':
            return l[j-1]
        else:
            j = j-1
def write_df2xlsx(df,filename):
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pd.ExcelWriter(filename,engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    df.to_excel(writer, sheet_name='Sheet1', index = False)

    # Close the Pandas Excel writer and output the Excel file.
    writer.save()

def csv_from_excel(filename):

    wb = xlrd.open_workbook(filename)
    worksheet_names = wb.sheet_names()
    for worksheet_name in worksheet_names:
        sh = wb.sheet_by_name(worksheet_name)
        #To find the headers/column names of the xlsx file

        header_index = 0
        for i in range(sh.nrows):
            if(len(filter(lambda x: not (x.value == xlrd.empty_cell.value), sh.row(i))) == len(sh.row(i))):
                header_row = sh.row(i)
                header_index = i
                break
        columns = []
        for cell in range(len(header_row)):
            value = sh.cell_value(header_index, cell)
            columns.append(value)
        rows = []
        for rownum in range(header_index+1,sh.nrows):
            rows.append(sh.row_values(rownum))
        data = pd.DataFrame(rows,columns = columns)
        cols = [col for col in data.columns if u'' in list(data[col])]
        res = []
        for col in cols:
            t_list = list(data[col])
            res.append(map(lambda x,y: rep(list(data[col]),y[0]) if x == u'' else x,t_list,enumerate(t_list)))
        for (col,r) in zip(cols,res):
            data[col] = pd.core.series.Series(r)
        write_df2xlsx(data,'ResultFile.xlsx')

import xlrd
import xlsxwriter
import numpy as np
import pandas as pd
def rep(l,i):
    j= i
    while(j>=0):
        if not l[j-1] == u'':
            return l[j-1]
        else:
            j = j-1
def write_df2xlsx(df,filename):
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pd.ExcelWriter(filename,engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    df.to_excel(writer, sheet_name='Sheet1', index = False)

    # Close the Pandas Excel writer and output the Excel file.
    writer.save()

def csv_from_excel(filename):

    wb = xlrd.open_workbook(filename)
    worksheet_names = wb.sheet_names()
    for worksheet_name in worksheet_names:
        sh = wb.sheet_by_name(worksheet_name)
        #To find the headers/column names of the xlsx file

        header_index = 0
        for i in range(sh.nrows):
            if(len(filter(lambda x: not (x.value == xlrd.empty_cell.value), sh.row(i))) == len(sh.row(i))):
                header_row = sh.row(i)
                header_index = i
                break
        columns = []
        for cell in range(len(header_row)):
            value = sh.cell_value(header_index, cell)
            columns.append(value)
        rows = []
        for rownum in range(header_index+1,sh.nrows):
            rows.append(sh.row_values(rownum))
        data = pd.DataFrame(rows,columns = columns)
        cols = [col for col in data.columns if u'' in list(data[col])]
        res = []
        for col in cols:
            t_list = list(data[col])
            res.append(map(lambda x,y: rep(list(data[col]),y[0]) if x == u'' else x,t_list,enumerate(t_list)))
        for (col,r) in zip(cols,res):
            data[col] = pd.core.series.Series(r)
        write_df2xlsx(data,'ResultFile.xlsx')