如何在python中查找和替换区分大小写的整词
考虑以下mcve:如何在python中查找和替换区分大小写的整词,python,regex,replace,Python,Regex,Replace,考虑以下mcve: import re import textwrap import traceback import unittest def replace_words(content, replacements): rc = re.compile(r"[A-Za-z_]\w*") def translate(match): word = match.group(0) return replacements.get(word, word
import re
import textwrap
import traceback
import unittest
def replace_words(content, replacements):
rc = re.compile(r"[A-Za-z_]\w*")
def translate(match):
word = match.group(0)
return replacements.get(word, word)
return rc.sub(translate, content, re.IGNORECASE | re.MULTILINE)
class class_name(unittest.TestCase):
def setUp(self):
self.replacements = [
{
'PLUS': '"+"',
'DASH': '"-"',
'BANG': '"!"',
'TILDE': '"~"',
'STAR': '"*"',
'SLASH': '"/"',
'PERCENT': '"%"',
'LEFT_PAREN': '"("',
'RIGHT_PAREN': '")"'
}, {
"IF": "fi",
"FOO": "oof",
"BAR": "rab",
"OP_FOO": "oof_op"
}
]
self.texts = [
textwrap.dedent("""\
variable_identifier :
IDENTIFIER
primary_expression :
foo1
foo2
foo3
LEFT_PAREN expression RIGHT_PAREN
unary_operator :
PLUS
DASH
BANG
TILDE
multiplicative_expression :
unary_expression
multiplicative_expression STAR unary_expression
multiplicative_expression SLASH unary_expression
multiplicative_expression PERCENT unary_expression\
"""),
textwrap.dedent("""\
IF identifier IDENTIFIER FOO BAR BARycentric
OP_FOO
""")
]
self.expected_results = [
textwrap.dedent("""\
variable_identifier :
IDENTIFIER
primary_expression :
foo1
foo2
foo3
"(" expression ")"
unary_operator :
"+"
"-"
"!"
"~"
multiplicative_expression :
unary_expression
multiplicative_expression "*" unary_expression
multiplicative_expression "/" unary_expression
multiplicative_expression "%" unary_expression\
"""),
textwrap.dedent("""\
fi identifier IDENTIFIER oof rab BARycentric
oof_op
""")
]
def _tester(self, f):
replacements = self.replacements
expected_results = self.expected_results
texts = self.texts
self.assertEqual(f(texts[0], replacements[0]), expected_results[0])
self.assertEqual(f(texts[1], replacements[1]), expected_results[1])
def test_replace_words(self):
self._tester(replace_words)
if __name__ == "__main__":
unittest.main()
replace_words
函数试图使用上述代码的替换字典搜索和替换给定文本中区分大小写的整词,但在self.assertEqual(f(text[0],replacements[0])、预期结果[0])行中失败,我不知道原因
因此,问题是,如何使用python中的替换词典查找和替换区分大小写的整词?您可以使用
re.sub
和re.findall
:
import re
def regex_string(d, to_lower = False):
if not to_lower:
return '|'.join(r'\b{}\b'.format(i) for i in d.keys())
return '|'.join([c for b in [[r'\b{}\b'.format(i.lower()), r'\b{}\b'.format(i)] for i in d.keys()] for c in b])
replacements = {
'PLUS': '"+"',
'DASH': '"-"',
'BANG': '"!"',
'TILDE': '"~"',
'STAR': '"*"',
'SLASH': '"/"',
'PERCENT': '"%"',
'LEFT_PAREN': '"("',
'RIGHT_PAREN': '")"'
}
replaced = re.sub(regex_string(replacements, True), '{}', content)
final_result = replaced.format(*[replacements.get(i, i) for i in re.findall(regex_string(replacements, True), content)])
输出(案例1
):
输出(情况2
):
或者更简短地说:
replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)
您用于“测试”解决方案的输入不再发布。请记住创建一个。只有当您在发布后尝试在许多不同的输入上测试它时,答案才是“buggy”,而没有首先明确指定所有标准。同样,在发布时,确保首先应用正确的mcve。就目前情况而言,我下面的回答返回了所需的输出。但是,如果我对输出有错误,请纠正我。
fi identifier IDENTIFIER oof rab BARycentric
oof_op
replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)