Python 将DataFrame写入Excel:如何自动调整列宽
我正在尝试将一系列数据框写入Excel工作表,以便:Python 将DataFrame写入Excel:如何自动调整列宽,python,excel,pandas,dataframe,openpyxl,Python,Excel,Pandas,Dataframe,Openpyxl,我正在尝试将一系列数据框写入Excel工作表,以便: 工作表的现有内容不会被覆盖或删除,以及 Excel列宽会根据列条目的长度进行调整(这样我就不必在Excel中手动执行此操作) 对于1),我找到了一个极好的解决方案,它是@MaxU编写的helper函数。例如,我找到了一个看起来不错的解决方案。但是,当我尝试将这些解决方案放在一起时,列宽根本不会改变。以下是我的完整代码: import pandas as pd import os from openpyxl import load_workbo
import pandas as pd
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
@param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
@param df: DataFrame to save to workbook
@param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
@param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
@param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
@param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
@return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
"""
Now attempt to adjust the column widths as necessary so that all the cell contents are visible
in Excel. The code below is taken from https://towardsdatascience.com/how-to-auto-adjust-the-width-of-excel-columns-with-pandas-excelwriter-60cee36e175e.
"""
for column in df:
column_width = max(df[column].astype(str).map(len).max(), len(column))
col_idx = df.columns.get_loc(column)
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)
writer.save()
现在我尝试测试该函数:
df = pd.DataFrame({'A_Very_Long_Column_Name': [10, 20, 30, 20, 15, 30, 45]})
append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df, "Sheet1")
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Union, Optional, List, Tuple
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def append_df_to_excel(
filename: Union[str, Path],
df: pd.DataFrame,
sheet_name: str = 'Sheet1',
startrow: int = None,
max_col_width: int = 40,
autofilter: bool = False,
fmt_int: str = "#,##0",
fmt_float: str = "#,##0.00",
fmt_date: str = "yyyy-mm-dd",
fmt_datetime: str = "yyyy-mm-dd hh:mm",
truncate_sheet: bool = False,
**to_excel_kwargs
) -> None:
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
@param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
@param df: DataFrame to save to workbook
@param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
@param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
@param max_col_width: maximum column width in Excel. Default: 30
@param autofilter: boolean - whether add Excel autofilter or not. Default: True
@param fmt_int: Excel format for integer numbers
@param fmt_float: Excel format for float numbers
@param fmt_date: Excel format for dates
@param fmt_datetime: Excel format for datetime's
@param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
@param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
@return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df, autofilter=True,
freeze_panes=(1,0))
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
>>> append_df_to_excel('d:/temp/test.xlsx', df, index=False,
fmt_datetime="dd.mm.yyyy hh:mm")
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
def set_column_format(ws, column_letter, fmt):
for cell in ws[column_letter]:
cell.number_format = fmt
filename = Path(filename)
file_exists = filename.is_file()
# process parameters
first_col = int(to_excel_kwargs.get("index", True)) + 1
sheet_name = to_excel_kwargs.get("sheet_name", "Sheet1")
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
with pd.ExcelWriter(
filename.with_suffix(".xlsx"),
engine="openpyxl",
mode="a" if file_exists else "w",
date_format=fmt_date,
datetime_format=fmt_datetime,
**to_excel_kwargs
) as writer:
if file_exists:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
else:
# file doesn't exist, we are creating a new one
startrow = 0
# write out the DataFrame to an ExcelWriter
df.to_excel(writer, sheet_name=sheet_name, startrow=startrow,
**to_excel_kwargs)
# automatically set columns' width
worksheet = writer.sheets[sheet_name]
for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
col_no = xl_col_no - first_col
width = max(df.iloc[:, col_no].astype(str).str.len().max(),
len(df.columns[col_no]) + 6)
width = min(max_col_width, width)
# print(f"column: [{df.columns[col_no]} ({dtyp.name})]\twidth:\t[{width}]")
column_letter = get_column_letter(xl_col_no)
worksheet.column_dimensions[column_letter].width = width
if np.issubdtype(dtyp, np.integer):
set_column_format(worksheet, column_letter, fmt_int)
if np.issubdtype(dtyp, np.floating):
set_column_format(worksheet, column_letter, fmt_float)
if autofilter:
worksheet.auto_filter.ref = worksheet.dimensions
将创建名为test.xlsx的新Excel工作簿以及名为Sheet1的工作表,并将df
的内容写入Sheet1,但列宽完全不受影响:
奇怪的是,当我第二次尝试执行函数时(不更改参数),我得到了一个错误:
runcell(2, 'C:/Users/Leonidas/Documents/write_to_excel2.py')
Traceback (most recent call last):
File "C:\Users\Leonidas\Documents\write_to_excel2.py", line 125, in <module>
append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df,
File "C:\Users\Leonidas\Documents\write_to_excel2.py", line 100, in append_df_to_excel
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)
AttributeError: 'Worksheet' object has no attribute 'set_column'
runcell(2,'C:/Users/Leonidas/Documents/write_to_excel2.py')
回溯(最近一次呼叫最后一次):
文件“C:\Users\Leonidas\Documents\write_to_excel2.py”,第125行,在
将_df_添加到_excel(“C:/Users/Leonidas/Documents/test.xlsx”,df,
文件“C:\Users\Leonidas\Documents\write_to_excel2.py”,第100行,在附录_df_to_excel中
writer.sheets[sheet\u name].set\u列(列idx、列idx、列宽度)
AttributeError:“工作表”对象没有属性“set\u column”
我现在很困惑…任何关于如何修复代码的建议都将不胜感激。尝试使用此帮助函数:
df = pd.DataFrame({'A_Very_Long_Column_Name': [10, 20, 30, 20, 15, 30, 45]})
append_df_to_excel("C:/Users/Leonidas/Documents/test.xlsx", df, "Sheet1")
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Union, Optional, List, Tuple
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def append_df_to_excel(
filename: Union[str, Path],
df: pd.DataFrame,
sheet_name: str = 'Sheet1',
startrow: int = None,
max_col_width: int = 40,
autofilter: bool = False,
fmt_int: str = "#,##0",
fmt_float: str = "#,##0.00",
fmt_date: str = "yyyy-mm-dd",
fmt_datetime: str = "yyyy-mm-dd hh:mm",
truncate_sheet: bool = False,
**to_excel_kwargs
) -> None:
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
@param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
@param df: DataFrame to save to workbook
@param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
@param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
@param max_col_width: maximum column width in Excel. Default: 30
@param autofilter: boolean - whether add Excel autofilter or not. Default: True
@param fmt_int: Excel format for integer numbers
@param fmt_float: Excel format for float numbers
@param fmt_date: Excel format for dates
@param fmt_datetime: Excel format for datetime's
@param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
@param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
@return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df, autofilter=True,
freeze_panes=(1,0))
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
>>> append_df_to_excel('d:/temp/test.xlsx', df, index=False,
fmt_datetime="dd.mm.yyyy hh:mm")
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
def set_column_format(ws, column_letter, fmt):
for cell in ws[column_letter]:
cell.number_format = fmt
filename = Path(filename)
file_exists = filename.is_file()
# process parameters
first_col = int(to_excel_kwargs.get("index", True)) + 1
sheet_name = to_excel_kwargs.get("sheet_name", "Sheet1")
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
with pd.ExcelWriter(
filename.with_suffix(".xlsx"),
engine="openpyxl",
mode="a" if file_exists else "w",
date_format=fmt_date,
datetime_format=fmt_datetime,
**to_excel_kwargs
) as writer:
if file_exists:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
else:
# file doesn't exist, we are creating a new one
startrow = 0
# write out the DataFrame to an ExcelWriter
df.to_excel(writer, sheet_name=sheet_name, startrow=startrow,
**to_excel_kwargs)
# automatically set columns' width
worksheet = writer.sheets[sheet_name]
for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
col_no = xl_col_no - first_col
width = max(df.iloc[:, col_no].astype(str).str.len().max(),
len(df.columns[col_no]) + 6)
width = min(max_col_width, width)
# print(f"column: [{df.columns[col_no]} ({dtyp.name})]\twidth:\t[{width}]")
column_letter = get_column_letter(xl_col_no)
worksheet.column_dimensions[column_letter].width = width
if np.issubdtype(dtyp, np.integer):
set_column_format(worksheet, column_letter, fmt_int)
if np.issubdtype(dtyp, np.floating):
set_column_format(worksheet, column_letter, fmt_float)
if autofilter:
worksheet.auto_filter.ref = worksheet.dimensions
您还可以尝试使用openpyxl bestFit属性,该属性将列宽设置为双击列边框时的宽度。它应该可以做到这一点。请尝试执行以下操作:
for column in df:
ws.column_dimensions[column].bestFit = True
根据导出到Excel的原因,您还可以查看许多不同的基于Python的电子表格。我是一个名为的电子表格的作者。它可以让您将pandas dataframe显示为交互式电子表格。谢谢您的帮助!这几乎满足了我的要求:当我尝试并排写入dataframes时,列宽将调整为ent似乎对第一个df非常有效,但似乎对第二个df没有影响。我将尝试调整代码,看看是否可以修复它……根据你提到的文章,你是通过
pip安装xlsxwriter
来安装模块xlsxwriter,还是如果你使用Windows并在其中安装了Office,你可以使用AutoFi来尝试win32com
t
,有关详细信息,请参阅。