Python正则表达式子类混淆
有四个关键词:Python正则表达式子类混淆,python,regex,Python,Regex,有四个关键词:标题,博客,标签,状态 将从其各自的匹配项中删除多余的关键字 示例: blog:blog状态标题标签和返回状态标题标签和而不是 博客状态标题标签和 sub函数在看到blog:后应该与+匹配,所以我不知道为什么它将blog视为+的例外 Regex: re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a) def n15(): import re a = """blog: b
标题
,博客
,标签
,状态
将从其各自的匹配项中删除多余的关键字 示例:
blog:blog状态标题标签和
返回状态标题标签和
而不是
博客状态标题标签和
sub
函数在看到blog:
后应该与+
匹配,所以我不知道为什么它将blog
视为+
的例外
Regex:
re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
def n15():
import re
a = """blog: blog: fooblog
state: private
title: this is atitle bun
and text"""
kwargs = {}
def matcher(string):
v = string.group(1).replace(string.group(2), '').replace(string.group(3), '').replace(string.group(4), '').replace(string.group(5), '')
if string.group(3) == 'title':
kwargs['title'] = v
elif string.group(3) == 'blog':
kwargs['blog_url'] = v
elif string.group(3) == 'tags':
kwargs['comma_separated_tags'] = v
elif string.group(3) == 'state':
kwargs['post_state'] = v
return ''
a = re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
a = a.replace('\n', '<br />')
a = a.replace('\r', '')
a = a.replace('"', r'\"')
a = '<p>' + a + '</p>'
kwargs['body'] = a
print kwargs
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'foo', 'title': 'this is a bun'}
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'fooblog', 'title': 'this is atitle bun'}
代码:
re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
def n15():
import re
a = """blog: blog: fooblog
state: private
title: this is atitle bun
and text"""
kwargs = {}
def matcher(string):
v = string.group(1).replace(string.group(2), '').replace(string.group(3), '').replace(string.group(4), '').replace(string.group(5), '')
if string.group(3) == 'title':
kwargs['title'] = v
elif string.group(3) == 'blog':
kwargs['blog_url'] = v
elif string.group(3) == 'tags':
kwargs['comma_separated_tags'] = v
elif string.group(3) == 'state':
kwargs['post_state'] = v
return ''
a = re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
a = a.replace('\n', '<br />')
a = a.replace('\r', '')
a = a.replace('"', r'\"')
a = '<p>' + a + '</p>'
kwargs['body'] = a
print kwargs
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'foo', 'title': 'this is a bun'}
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'fooblog', 'title': 'this is atitle bun'}
def n15():
进口稀土
a=“”博客:博客:fooblog
国家:私人
标题:这是一个小面包
和文本“”
kwargs={}
def匹配器(字符串):
v=string.group(1)。替换(string.group(2),“”)。替换(string.group(3),“”)。替换(string.group(4),“”)。替换(string.group(5),“”)
如果string.group(3)=“title”:
kwargs['title']=v
elif string.group(3)=“blog”:
kwargs['blog_url']=v
elif string.group(3)=“tags”:
kwargs['逗号分隔标记']=v
elif string.group(3)=“state”:
kwargs['后州]=v
返回“”
a=re.sub(r'((^ |\n |\s |\b)(标题|博客|标签|状态)(\:\s)。+(\n |$)”,matcher,a)
a=a.replace('\n','
'))
a=a.replace('\r','')
a=a.replace(“”,r“\”)
a=''+a+''
kwargs['body']=a
打印kwargs
输出:
re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
def n15():
import re
a = """blog: blog: fooblog
state: private
title: this is atitle bun
and text"""
kwargs = {}
def matcher(string):
v = string.group(1).replace(string.group(2), '').replace(string.group(3), '').replace(string.group(4), '').replace(string.group(5), '')
if string.group(3) == 'title':
kwargs['title'] = v
elif string.group(3) == 'blog':
kwargs['blog_url'] = v
elif string.group(3) == 'tags':
kwargs['comma_separated_tags'] = v
elif string.group(3) == 'state':
kwargs['post_state'] = v
return ''
a = re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
a = a.replace('\n', '<br />')
a = a.replace('\r', '')
a = a.replace('"', r'\"')
a = '<p>' + a + '</p>'
kwargs['body'] = a
print kwargs
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'foo', 'title': 'this is a bun'}
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'fooblog', 'title': 'this is atitle bun'}
{'body':'p>和text,'post_state':'private','blog_url':'foo','title':'this a bun'}
编辑:所需输出:
re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
def n15():
import re
a = """blog: blog: fooblog
state: private
title: this is atitle bun
and text"""
kwargs = {}
def matcher(string):
v = string.group(1).replace(string.group(2), '').replace(string.group(3), '').replace(string.group(4), '').replace(string.group(5), '')
if string.group(3) == 'title':
kwargs['title'] = v
elif string.group(3) == 'blog':
kwargs['blog_url'] = v
elif string.group(3) == 'tags':
kwargs['comma_separated_tags'] = v
elif string.group(3) == 'state':
kwargs['post_state'] = v
return ''
a = re.sub(r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s).+(\n|$))', matcher, a)
a = a.replace('\n', '<br />')
a = a.replace('\r', '')
a = a.replace('"', r'\"')
a = '<p>' + a + '</p>'
kwargs['body'] = a
print kwargs
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'foo', 'title': 'this is a bun'}
{'body': '<p>and text</p>', 'post_state': 'private', 'blog_url': 'fooblog', 'title': 'this is atitle bun'}
{'body':'p>和text,'post_state':'private','blog_url':'fooblog','title':'this is atitle bun'}
正在将所有出现的“博客”替换为“”
与其尝试替换匹配字符串的所有其他部分(这将很难获得正确的结果),我建议捕获原始匹配中实际需要的字符串
r'((^|\n|\s|\b)(title|blog|tags|state)(\:\s)(.+)(\n|$))'
在+
周围有()
来捕获字符串的该部分,然后
v = match.group(5)
在
匹配器的开头
应该寻找多行还是每行重复。您期望的输出是什么?