Python 如何将数据与正则表达式匹配
我有一个数组列表,如:Python 如何将数据与正则表达式匹配,python,python-3.x,algorithm,split,match,Python,Python 3.x,Algorithm,Split,Match,我有一个数组列表,如: data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R', 'R85_R86=10K(9,11K)R'] data=['-TEST BEGA'、'R8=11K(10,15A)B'、'R9=1K(0,3A)B'、'R10_R84=13MEG(7,14K)R'、'R85_R84我相信其他人会想出一个更好的解决方案,但
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R',
'R85_R86=10K(9,11K)R']
data=['-TEST BEGA'、'R8=11K(10,15A)B'、'R9=1K(0,3A)B'、'R10_R84=13MEG(7,14K)R'、'R85_R84我相信其他人会想出一个更好的解决方案,但接下来就是了
from collections import defaultdict
def get_operator(string):
'''
returns the operator found in the string
'''
operators = '=><'
for i in operators:
if i in string:
return i
return None
def createTreeStandardBloc(data):
# parsed is a default dict of lists which will default
# to an empty list if a new key is added
parsed = defaultdict(list)
# this loop does a few things
for line in data[1:]:
# it gets the operator
oper = get_operator(line)
# splits the line based on the operator
split_line = line.split(oper)
prefixes = split_line[0].split('_')
# if there aren't 2 prefixes
# it sets the child to the first and only prefix
# otherwise it sets it to the second
if len(prefixes) == 1:
child = prefixes[0]
else:
child = prefixes[1]
# then it adds it preformatted to the defaultdict
# this means that any additional items found with
# the same step prefix will just get added onto that step
# as a child
parsed[prefixes[0]].append('CHILD ' + child)
parsed[prefixes[0]].append('Operator ' + oper)
parsed[prefixes[0]].append('MEASURE_CHILD ' + split_line[1])
# here we start the final formatting of data
formatted = []
formatted.append('SCN: ' + data[0].strip('- '))
for key, items in parsed.items():
formatted.append(' ')
# we get the first child prefix here
child_prefix = items[0][6:]
# if the child is different from the step
# and there are only 3 items
# we should join them back together
# I know mutating a collection were iterating over
# is sinful but I did it anyway ;)
if len(items) == 3 and key != child_prefix:
key = key + '_' + child_prefix
items[0] = 'CHILD ' + key
# now we can safely add our step to the formatted list
formatted.append('STEP ' + key)
# and the items
for item in items:
formatted.append(item)
return formatted
data = ['- TEST BEGA',
'R8=11K(10,15A)B',
'R9=1K(0,3A)B',
'R10_R84=13MEG(7,14K)R',
'R85_R84<100K(970,1000K)R',
'R85_R86=10K(9,11K)R']
new_data = createTreeStandardBloc(data)
for line in new_data:
print(line)
从集合导入defaultdict
def get_运算符(字符串):
'''
返回在字符串中找到的运算符
'''
operators='=>我相信其他人会想出更好的解决方案,但现在开始
from collections import defaultdict
def get_operator(string):
'''
returns the operator found in the string
'''
operators = '=><'
for i in operators:
if i in string:
return i
return None
def createTreeStandardBloc(data):
# parsed is a default dict of lists which will default
# to an empty list if a new key is added
parsed = defaultdict(list)
# this loop does a few things
for line in data[1:]:
# it gets the operator
oper = get_operator(line)
# splits the line based on the operator
split_line = line.split(oper)
prefixes = split_line[0].split('_')
# if there aren't 2 prefixes
# it sets the child to the first and only prefix
# otherwise it sets it to the second
if len(prefixes) == 1:
child = prefixes[0]
else:
child = prefixes[1]
# then it adds it preformatted to the defaultdict
# this means that any additional items found with
# the same step prefix will just get added onto that step
# as a child
parsed[prefixes[0]].append('CHILD ' + child)
parsed[prefixes[0]].append('Operator ' + oper)
parsed[prefixes[0]].append('MEASURE_CHILD ' + split_line[1])
# here we start the final formatting of data
formatted = []
formatted.append('SCN: ' + data[0].strip('- '))
for key, items in parsed.items():
formatted.append(' ')
# we get the first child prefix here
child_prefix = items[0][6:]
# if the child is different from the step
# and there are only 3 items
# we should join them back together
# I know mutating a collection were iterating over
# is sinful but I did it anyway ;)
if len(items) == 3 and key != child_prefix:
key = key + '_' + child_prefix
items[0] = 'CHILD ' + key
# now we can safely add our step to the formatted list
formatted.append('STEP ' + key)
# and the items
for item in items:
formatted.append(item)
return formatted
data = ['- TEST BEGA',
'R8=11K(10,15A)B',
'R9=1K(0,3A)B',
'R10_R84=13MEG(7,14K)R',
'R85_R84<100K(970,1000K)R',
'R85_R86=10K(9,11K)R']
new_data = createTreeStandardBloc(data)
for line in new_data:
print(line)
从集合导入defaultdict
def get_运算符(字符串):
'''
返回在字符串中找到的运算符
'''
操作符='=>我稍微修改了您的解决方案,在\uuu
上拆分s
和c
以下是我提出的解决方案:
def createTreeStandardBloc():
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R', 'R85_R86=10K(9,11K)R']
last_s = None
for i, line in enumerate(data):
if i == 0:
print("SCN:", line.strip("- "))
elif line.strip():
s_c, op, mc = re.match("(.*)([=<>])(.*)", line).groups()
s_c_list = s_c.split('_')
s = s_c_list[0]
if(len(s_c_list) > 1):
c = s_c_list[1]
else:
c = s
if s != last_s:
print("STEP", s)
print("CHILD", c)
print("Operator",op)
print("MEASURE_CHILD", mc)
last_s = s
def createTreeStandardBloc():
数据=['-TEST BEGA'、'R8=11K(10,15A)B'、'R9=1K(0,3A)B'、'R10_R84=13MEG(7,14K)R'、'R85_R84我稍微修改了您的解决方案,在上拆分s
和c
以下是我提出的解决方案:
def createTreeStandardBloc():
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R', 'R85_R86=10K(9,11K)R']
last_s = None
for i, line in enumerate(data):
if i == 0:
print("SCN:", line.strip("- "))
elif line.strip():
s_c, op, mc = re.match("(.*)([=<>])(.*)", line).groups()
s_c_list = s_c.split('_')
s = s_c_list[0]
if(len(s_c_list) > 1):
c = s_c_list[1]
else:
c = s
if s != last_s:
print("STEP", s)
print("CHILD", c)
print("Operator",op)
print("MEASURE_CHILD", mc)
last_s = s
def createTreeStandardBloc():
数据=['-TEST BEGA'、'R8=11K(10,15A)B'、'R9=1K(0,3A)B'、'R10_R84=13MEG(7,14K)R'、'R85_R84已更改代码以给出正确答案
import re
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R'
,'R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R', 'R92_R86=10K(9,12K)R']
last_s = last_c = last_op = last_mc = None
repeat = 0
for i, line in enumerate(data):
if i == 0:
print("SCN:", line.strip("- "))
elif i == 1:
last_s, last_c, last_op, last_mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
#last_c = str(last_c)[1:] if last_c != None else last_c
elif line.strip():
s, c, op, mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
#print(s, c, op, mc)
#print(last_s, last_c, last_op, last_mc)
if s != last_s:
if repeat > 0:
print("CHILD", last_c or last_s)
print("Operator",op)
print("MEASURE_CHILD", mc)
else:
print("")
print("STEP", ("" + last_s + last_c if last_c != None else last_s))
print("CHILD", ("" + last_s + last_c if last_c != None else last_s))
print("Operator",last_op)
print("MEASURE_CHILD", last_mc)
last_s = s
last_c = c
last_op = op
last_mc = mc
repeat = 0
else:
if repeat == 0:
print("")
print("STEP", last_s )
print("CHILD", str(last_c)[1:] if last_c != None else last_c or last_s)
print("Operator",op)
print("MEASURE_CHILD", mc)
last_s = s
last_c = str(c)[1:] if c != None else c
last_op = op
last_mc = mc
repeat += 1
if repeat == 0:
print("")
print("STEP", ("" + last_s + last_c if last_c != None else last_s))
print("CHILD", ("" + last_s + last_c if last_c != None else last_s))
print("Operator",last_op)
print("MEASURE_CHILD", last_mc)
else:
print("CHILD", str(last_c)[1:] if last_c != None else last_c or last_s)
print("Operator",op)
print("MEASURE_CHILD", mc)
已更改代码以给出正确答案
import re
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R'
,'R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R', 'R92_R86=10K(9,12K)R']
last_s = last_c = last_op = last_mc = None
repeat = 0
for i, line in enumerate(data):
if i == 0:
print("SCN:", line.strip("- "))
elif i == 1:
last_s, last_c, last_op, last_mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
#last_c = str(last_c)[1:] if last_c != None else last_c
elif line.strip():
s, c, op, mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
#print(s, c, op, mc)
#print(last_s, last_c, last_op, last_mc)
if s != last_s:
if repeat > 0:
print("CHILD", last_c or last_s)
print("Operator",op)
print("MEASURE_CHILD", mc)
else:
print("")
print("STEP", ("" + last_s + last_c if last_c != None else last_s))
print("CHILD", ("" + last_s + last_c if last_c != None else last_s))
print("Operator",last_op)
print("MEASURE_CHILD", last_mc)
last_s = s
last_c = c
last_op = op
last_mc = mc
repeat = 0
else:
if repeat == 0:
print("")
print("STEP", last_s )
print("CHILD", str(last_c)[1:] if last_c != None else last_c or last_s)
print("Operator",op)
print("MEASURE_CHILD", mc)
last_s = s
last_c = str(c)[1:] if c != None else c
last_op = op
last_mc = mc
repeat += 1
if repeat == 0:
print("")
print("STEP", ("" + last_s + last_c if last_c != None else last_s))
print("CHILD", ("" + last_s + last_c if last_c != None else last_s))
print("Operator",last_op)
print("MEASURE_CHILD", last_mc)
else:
print("CHILD", str(last_c)[1:] if last_c != None else last_c or last_s)
print("Operator",op)
print("MEASURE_CHILD", mc)
你必须使用正则表达式吗?我会发现编写一些代码来确定前缀是否重复要容易得多。如何使用它请你说你想在重复前缀时对数据进行分割。前缀是指R8
?另外,对于数组中的最后一个元素,你的预期输出中只有一个子元素有什么原因吗t?如果前缀的意思是R85
,它们是根据是否有其他类似“步骤”的内容进行分组的值,我同意。这不是你应该使用regex的地方。@Axe319,是的,我的意思是R10_R84将类似于R9和R8。对于最后一个元素,R85被除以,因为前缀R85重复了两次R85_R84和R85_r86。你必须使用regex吗?我会发现编写一些代码来确定前缀是否重复要容易得多。如何使用It请您说您希望在重复前缀时对数据进行分割。前缀是指R8
?另外,对于数组中的最后一个元素,是否有原因在您的预期输出中只有一个子元素?如果前缀是指R85
,它们根据是否有其他类似“步骤”的元素进行分组值,我同意。这不应该使用正则表达式。@Axe319,是的,我的意思是R10_R84将类似于R9和R8。对于最后一个元素R85,因为前缀R85重复了两次R85_R84和R85_r86。运算符有问题,R85_R84有<运算符,对于您的结果,它是s=print(“运算符”,op)print(“MEASURE_CHILD”,mc)应为打印(“操作员”,最后一次操作)打印(“操作员”,最后一次操作)打印(“操作员”,最后一次操作)打印(“操作员”,最后一次操作)打印(“测量儿童”,最后一次操作)打印(“测量儿童”,最后一次操作)打印(“测量儿童”,最后一次操作)打印不完全正确,因为R10_R84下面的行上没有重复前缀,需要是步骤R10_R84和子R10_R84。检查我的输出不完全正确,因为R10_R84下面的行上没有重复前缀,需要是步骤R10_R84和子R10_R84。检查我的输出在做任何事情之前尝试对数据进行排序。是否可能要禁用此功能?请在执行操作之前尝试对数据进行排序。是否有可能禁用此功能?
SCN: TEST BEGA
STEP R8
CHILD R8
Operator =
MEASURE_CHILD 11K(10,15A)B
STEP R9
CHILD R9
Operator =
MEASURE_CHILD 1K(0,3A)B
STEP R10_R84
CHILD R10_R84
Operator =
MEASURE_CHILD 13MEG(7,14K)R
STEP R85
CHILD R84
Operator =
MEASURE_CHILD 10K(9,11K)R
CHILD R85
Operator =
MEASURE_CHILD 10K(9,12K)R
STEP R92_R86
CHILD R92_R86
Operator =
MEASURE_CHILD 10K(9,12K)R