Python 将txt文件转换为数据帧

Python 将txt文件转换为数据帧,python,pandas,Python,Pandas,我有一个包含以下日志项的txt文件: -------------------> 2020-03-04 14:41:11.578 Unable to process update. Multiple Entries <------------------- 2020-03-04 14:41:16.000 我尝试过以下代码: log_list = [] with open(path_to_file) as file_object: for line in file_objec

我有一个包含以下日志项的txt文件:

-------------------> 2020-03-04 14:41:11.578 
Unable to process update. Multiple Entries
<------------------- 2020-03-04 14:41:16.000
我尝试过以下代码:

log_list = []
with open(path_to_file) as file_object:
    for line in file_object:
        log_list.append(line)
df_log = pd.DataFrame(log_list, columns=['log_entries'])
df_log['start_time'] = df_log['log_entries'].str.extract(r'(?<=^\-{19}\>)\s(P<start_time>\d{4}\-\d{2}\-\d{2}\s\d{2}\:\d{2}\:\d{2}\.\d{3})')

df_log['event_desc'] = df_log['log_entries'].str.extract(r'(^\w.+)')

df_log['end_datetime'] = df_log['log_entries'].str.extract(r'(?<=^\<\-{19})\s(\d{4}\-\d{2}\-\d{2}\s\d{2}\:\d{2}\:\d{2}\.\d{3})')
log_list=[]
打开(路径到文件)作为文件对象:
对于文件\u对象中的行:
日志列表。追加(行)
df_log=pd.DataFrame(log_列表,列=['log_entries'])
df_log['start_time']=df_log['log_entries'].str.extract(r'(?)\s(P\d{4}-\d{2}-\d{2}\s\d{2}\:\d{2}\:\d{2}.\d{3})
df_log['event_desc']=df_log['log_entries'].str.extract(r'(^\w.+))

df_log['end_datetime']=df_log['log_entries'].str.extract(r'(?我会在解析时分割文件,而不是使用
读取csv
,因为文件不是csv格式:

start = re.compile(r'(?<=^\-{19}\>)\s(?P<start_time>\d{4}\-\d{2}\-\d{2}\s\d{2}\:\d{2}\:\d{2}\.\d{3})')
end = re.compile('(?<=^\<\-{19})\s(\d{4}\-\d{2}\-\d{2}\s\d{2}\:\d{2}\:\d{2}\.\d{3})')
word = re.compile('(^\w.+)')
data = []

for line in io.StringIO(t):
    match = start.search(line)
    if match:
        row = {'start_time': match.group('start_time')}
        data.append(row)
    else:
        match = end.search(line)
        if match:
            row['end_time'] = match.group(1)
        else:
            match = word.search(line)
            if match:
                row['event_desc'] = match.group(1)

df = pd.DataFrame(data, columns=['start_time', 'event_desc', 'end_time'])
start=re.compile(r'(?)\s(?P\d{4}-\d{2}-\d{2}\s\d{2}\:\d{2}\:\d{2}.\d{3})

end=re.compile(“(?从您的正则表达式来看,这可能是正确的,但entires是否总是在箭头之间?
-->[您的\u数据]是的,条目总是在箭头之间。开始和停止之间是否只有一个描述行?正确,开始和停止之间总是有一个描述。
start = re.compile(r'(?<=^\-{19}\>)\s(?P<start_time>\d{4}\-\d{2}\-\d{2}\s\d{2}\:\d{2}\:\d{2}\.\d{3})')
end = re.compile('(?<=^\<\-{19})\s(\d{4}\-\d{2}\-\d{2}\s\d{2}\:\d{2}\:\d{2}\.\d{3})')
word = re.compile('(^\w.+)')
data = []

for line in io.StringIO(t):
    match = start.search(line)
    if match:
        row = {'start_time': match.group('start_time')}
        data.append(row)
    else:
        match = end.search(line)
        if match:
            row['end_time'] = match.group(1)
        else:
            match = word.search(line)
            if match:
                row['event_desc'] = match.group(1)

df = pd.DataFrame(data, columns=['start_time', 'event_desc', 'end_time'])