用Python3概括正则表达式的模式
我尝试从文本文件中提取数据点,示例行如下所示:用Python3概括正则表达式的模式,python,regex,Python,Regex,我尝试从文本文件中提取数据点,示例行如下所示: Feb 12 10:49:40 UTC 2020 # 1 # S0=22.97 S1=28.47 S2=29.12 S3=31.50 S4=32.57 S5=30.03 S6=24.62 S7=28.21 S8=25.41 import re def parse_data(filename): pattern = r'.*\s(\d*:\d*:\d*).*S0=(\d*\.\d*)\sS1=(\d*\.\d*)\sS2=(\d*\.\
Feb 12 10:49:40 UTC 2020 # 1 # S0=22.97 S1=28.47 S2=29.12 S3=31.50 S4=32.57 S5=30.03 S6=24.62 S7=28.21 S8=25.41
import re
def parse_data(filename):
pattern = r'.*\s(\d*:\d*:\d*).*S0=(\d*\.\d*)\sS1=(\d*\.\d*)\sS2=(\d*\.\d*)\sS3=(\d*\.\d*)\sS4=(\d*\.\d*)\sS5=(\d*\.\d*)\sS6=(\d*\.\d*)\sS7=(\d*\.\d*)\sS8=(\d*\.\d*).*'
list = []
with open(filename, 'r') as read_file:
for l in read_file.readlines():
match = re.match(pattern, l)
if match:
s_list = []
for i in range(2,11):
s_list.append(match.group(i))
tuple = (match.group(1), s_list)
list.append(tuple)
return list
def main():
file1 = 'PythonTest.txt'
list1 = parse_data(file1)
if __name__ == '__main__':
main()
我提出的(非常)混乱的模式如下所示:
Feb 12 10:49:40 UTC 2020 # 1 # S0=22.97 S1=28.47 S2=29.12 S3=31.50 S4=32.57 S5=30.03 S6=24.62 S7=28.21 S8=25.41
import re
def parse_data(filename):
pattern = r'.*\s(\d*:\d*:\d*).*S0=(\d*\.\d*)\sS1=(\d*\.\d*)\sS2=(\d*\.\d*)\sS3=(\d*\.\d*)\sS4=(\d*\.\d*)\sS5=(\d*\.\d*)\sS6=(\d*\.\d*)\sS7=(\d*\.\d*)\sS8=(\d*\.\d*).*'
list = []
with open(filename, 'r') as read_file:
for l in read_file.readlines():
match = re.match(pattern, l)
if match:
s_list = []
for i in range(2,11):
s_list.append(match.group(i))
tuple = (match.group(1), s_list)
list.append(tuple)
return list
def main():
file1 = 'PythonTest.txt'
list1 = parse_data(file1)
if __name__ == '__main__':
main()
但是,如果有趣的数据点的数量发生了变化,我希望不必专门为这种情况重写模式。例如,如果我只有一个S0和一个S1,或者如果我有一直到S12的点。但这超出了我的能力,有可能写出一个更一般化的模式吗?如果是这样,它们将如何在组中编制索引?您可以根据需要使用此模式:
重新导入
txt=”“”
S0=22.97 S1=28.47 S2=29.12 S3=31.50 S4=32.57 S5=30.03 S6=24.62 S7=28.21 S8=25.41
"""
pattern=re.compile(r'(S\d+=(\d+\.\d+))
对于pattern.finditer(txt)中的匹配:
打印(match.group(1),“=”,match.group(2))
>>>S0=22.97
>>>S1=28.47
>>>S2=29.12
>>>S3=31.50
>>>S4=32.57
>>>S5=30.03
>>>S6=24.62
>>>S7=28.21
>>>S8=25.41
您可以根据需要使用此模式:
重新导入
txt=”“”
S0=22.97 S1=28.47 S2=29.12 S3=31.50 S4=32.57 S5=30.03 S6=24.62 S7=28.21 S8=25.41
"""
pattern=re.compile(r'(S\d+=(\d+\.\d+))
对于pattern.finditer(txt)中的匹配:
打印(match.group(1),“=”,match.group(2))
>>>S0=22.97
>>>S1=28.47
>>>S2=29.12
>>>S3=31.50
>>>S4=32.57
>>>S5=30.03
>>>S6=24.62
>>>S7=28.21
>>>S8=25.41