如何在python中查找和替换区分大小写的整词_Python_Regex_Replace

如何在python中查找和替换区分大小写的整词

python regex replace

如何在python中查找和替换区分大小写的整词,python,regex,replace,Python,Regex,Replace,考虑以下mcve： import re import textwrap import traceback import unittest def replace_words(content, replacements): rc = re.compile(r"[A-Za-z_]\w*") def translate(match): word = match.group(0) return replacements.get(word, word

考虑以下mcve：

import re
import textwrap

import traceback
import unittest


def replace_words(content, replacements):
    rc = re.compile(r"[A-Za-z_]\w*")

    def translate(match):
        word = match.group(0)
        return replacements.get(word, word)
    return rc.sub(translate, content, re.IGNORECASE | re.MULTILINE)


class class_name(unittest.TestCase):

    def setUp(self):
        self.replacements = [
            {
                'PLUS': '"+"',
                'DASH': '"-"',
                'BANG': '"!"',
                'TILDE': '"~"',
                'STAR': '"*"',
                'SLASH': '"/"',
                'PERCENT': '"%"',
                'LEFT_PAREN': '"("',
                'RIGHT_PAREN': '")"'
            }, {
                "IF": "fi",
                "FOO": "oof",
                "BAR": "rab",
                "OP_FOO": "oof_op"
            }
        ]
        self.texts = [
            textwrap.dedent("""\
                variable_identifier :
                    IDENTIFIER
                primary_expression :
                    foo1
                    foo2
                    foo3
                    LEFT_PAREN expression RIGHT_PAREN
                unary_operator :
                    PLUS
                    DASH
                    BANG
                    TILDE
                multiplicative_expression :
                    unary_expression
                    multiplicative_expression STAR unary_expression
                    multiplicative_expression SLASH unary_expression
                    multiplicative_expression PERCENT unary_expression\
            """),
            textwrap.dedent("""\
                IF identifier IDENTIFIER FOO BAR BARycentric
                OP_FOO
            """)
        ]
        self.expected_results = [
            textwrap.dedent("""\
                variable_identifier :
                    IDENTIFIER
                primary_expression :
                    foo1
                    foo2
                    foo3
                    "(" expression ")"
                unary_operator :
                    "+"
                    "-"
                    "!"
                    "~"
                multiplicative_expression :
                    unary_expression
                    multiplicative_expression "*" unary_expression
                    multiplicative_expression "/" unary_expression
                    multiplicative_expression "%" unary_expression\
            """),
            textwrap.dedent("""\
                fi identifier IDENTIFIER oof rab BARycentric
                oof_op
            """)
        ]

    def _tester(self, f):
        replacements = self.replacements
        expected_results = self.expected_results
        texts = self.texts
        self.assertEqual(f(texts[0], replacements[0]), expected_results[0])
        self.assertEqual(f(texts[1], replacements[1]), expected_results[1])

    def test_replace_words(self):
        self._tester(replace_words)


if __name__ == "__main__":
    unittest.main()

replace_words

函数试图使用上述代码的替换字典搜索和替换给定文本中区分大小写的整词，但在self.assertEqual（f（text[0]，replacements[0]）、预期结果[0]）行中失败，我不知道原因

因此，问题是，如何使用python中的替换词典查找和替换区分大小写的整词？

您可以使用

re.sub

和

re.findall

：

import re
def regex_string(d, to_lower = False):
   if not to_lower: 
     return '|'.join(r'\b{}\b'.format(i) for i in d.keys())
   return '|'.join([c for b in [[r'\b{}\b'.format(i.lower()), r'\b{}\b'.format(i)] for i in d.keys()] for c in b])

replacements = {
    'PLUS': '"+"',
    'DASH': '"-"',
    'BANG': '"!"',
    'TILDE': '"~"',
    'STAR': '"*"',
    'SLASH': '"/"',
    'PERCENT': '"%"',
    'LEFT_PAREN': '"("',
    'RIGHT_PAREN': '")"'
}
replaced = re.sub(regex_string(replacements, True), '{}', content)
final_result = replaced.format(*[replacements.get(i, i) for i in re.findall(regex_string(replacements, True), content)])

输出（

案例1

）：

输出（

情况2

）：

或者更简短地说：

replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)

您用于“测试”解决方案的输入不再发布。请记住创建一个。只有当您在发布后尝试在许多不同的输入上测试它时，答案才是“buggy”，而没有首先明确指定所有标准。同样，在发布时，确保首先应用正确的mcve。就目前情况而言，我下面的回答返回了所需的输出。但是，如果我对输出有错误，请纠正我。

fi identifier IDENTIFIER oof rab BARycentric
oof_op

replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)