Python 迭代我的目录中的多个文件
目前我正在用Python从文件夹中抓取一个excel文件;在下面的代码中。。并通过selenium将其推送到web表单 但是,我正在尝试修改它,以便继续通过多个文件的目录。(我的“目录”或“文件夹”中将有许多excel文件) main.pyPython 迭代我的目录中的多个文件,python,pandas,Python,Pandas,目前我正在用Python从文件夹中抓取一个excel文件;在下面的代码中。。并通过selenium将其推送到web表单 但是,我正在尝试修改它,以便继续通过多个文件的目录。(我的“目录”或“文件夹”中将有许多excel文件) main.py config.py 查找_pending_records.py 一种方法如下(伪代码) 你的主要任务应该是 if __name__ == "__main__": try: for PENDING_RECORDS in Find
config.py 查找_pending_records.py
一种方法如下(伪代码) 你的主要任务应该是
if __name__ == "__main__":
try:
for PENDING_RECORDS in FindPendingRecords().get_excel_data():
# Do operations on PENDING_RECORDS
print (PENDING_RECORDS)
print("All done, Bill")
except Exception as exc:
print(exc)
您的查找文件方法将是
@classmethod
def find_file(cls):
all_files = list()
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
all_files.append(archive.extract(file.filename, config.UNZIP_LOCATION))
return all_files
FindPendingRecords().get\u excel\u data()的作用是什么?每次调用都会返回不同文件的数据吗?只添加了FindPendingRecords()代码-所有文件都将通过excel格式化为相同格式;仅使用不同的单元格/数据值。谢谢!他们会以这种方式一个接一个地处理吗?我不想让它们同时运行并发生冲突?@PeterGibbons当然它们是按顺序一个接一个地处理的。哦,太好了;谢谢我必须定义这样一个数组中的文件数量?”return[“file1”、“file2”、“file3”]?不必,检查我提供的find_文件您两次提到find_files方法?第二个建议;我收到错误“无效的文件路径或缓冲区对象类型:”
"""Module used to find records that need to be inserted into Horrible website"""
from zipfile import ZipFile
import math
import pandas
import config
class FindPendingRecords:
"""Class used to find records that need to be inserted into Site"""
@classmethod
def find_file(cls):
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
return archive.extract(file.filename, config.UNZIP_LOCATION)
return FileNotFoundError
def get_excel_data(self):
"""Places excel data into pandas dataframe"""
excel_data = pandas.read_excel(self.find_file())
columns = pandas.DataFrame(columns=excel_data.columns.tolist())
excel_data = pandas.concat([excel_data, columns])
excel_data.columns = excel_data.columns.str.strip()
excel_data.columns = excel_data.columns.str.replace("/", "_")
excel_data.columns = excel_data.columns.str.replace(" ", "_")
num_valid_records = 0
for row in excel_data.itertuples():
person = row.PERSON
if person in ("", " ", None) or math.isnan(mrn):
print(f"Invalid record: {row}")
excel_data = excel_data.drop(excel_data.index[row.Index])
else:
num_valid_records += 1
print(f"Processing #{num_valid_records} records")
return self.clean_data_frame(excel_data)
def clean_data_frame(self, data_frame):
"""Cleans up dataframes"""
for col in data_frame.columns:
if "date" in col.lower():
data_frame[col] = pandas.to_datetime(data_frame[col],
errors='coerce', infer_datetime_format=True)
data_frame[col] = data_frame[col].dt.date
data_frame['PERSON'] = data_frame['PERSON'].astype(int).astype(str)
return data_frame
def get_mapping_data(self):
map_data = pandas.read_excel(config.MAPPING_DOC, sheet_name='main')
columns = pandas.DataFrame(columns=map_data.columns.tolist())
return pandas.concat([map_data, columns])
class FindPendingRecords:
@classmethod
def find_file(cls):
return ["file1", "file2", "file3"]
def __init__(self):
self.files = self.find_file()
def get_excel_data(self):
for excel_data in self.files:
# process your excel_data
yield excel_data
if __name__ == "__main__":
try:
for PENDING_RECORDS in FindPendingRecords().get_excel_data():
# Do operations on PENDING_RECORDS
print (PENDING_RECORDS)
print("All done, Bill")
except Exception as exc:
print(exc)
@classmethod
def find_file(cls):
all_files = list()
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
all_files.append(archive.extract(file.filename, config.UNZIP_LOCATION))
return all_files