Python 用于在Beautifulsoup中搜索文本的即时标记_Python_Beautifulsoup

Python 用于在Beautifulsoup中搜索文本的即时标记

python

Python 用于在Beautifulsoup中搜索文本的即时标记,python,beautifulsoup,Python,Beautifulsoup,我正在搜索一个特定的字符串，它应该与标记的文本值完全匹配。如何仅使用术语“RESULTS”进行搜索并将标签“h4”返回给我 soup = BeautifulSoup('<table><tbody><tr><td class="fulltext-body-paragraph"><a name="44"></a><div class="fulltext-LEVEL1"><h4>RESULTS</h4

我正在搜索一个特定的字符串，它应该与标记的文本值完全匹配。如何仅使用术语“RESULTS”进行搜索并将标签“h4”返回给我

soup = BeautifulSoup('<table><tbody><tr><td class="fulltext-body-paragraph"><a name="44"></a><div class="fulltext-LEVEL1"><h4>RESULTS</h4></div></td></tr></tbody></table>')

soup.find(lambda el: el.text == 'RESULTS').name
Out: 'html' # I would like it to return 'h4'

soup=BeautifulSoup（'RESULTS'））
soup.find（lambda el:el.text=='RESULTS'）.name
Out:'html'#我希望它返回'h4'

这个（）可以解决您的问题吗

from bs4 import BeautifulSoup
from pprint import pprint
import re

html_text = """
<h2>this is cool #12345678901</h2>
<h2>this is nothing</h2>
<h2>this is interesting #126666678901</h2>
<h2>this is blah #124445678901</h2>
"""

soup = BeautifulSoup(html_text)

# Even though the OP was not looking for 'cool', it's more understandable to work with item zero.
pattern = re.compile(r'cool')

pprint(soup.find(text=pattern).__dict__)
#>> {'next': u'\n',
#>>  'nextSibling': None,
#>>  'parent': <h2>this is cool #12345678901</h2>,
#>>  'previous': <h2>this is cool #12345678901</h2>,
#>>  'previousSibling': None}

print soup.find('h2')
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern)
#>> this is cool #12345678901
print soup.find('h2', text=pattern).parent
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern) == soup.find('h2')
#>> False
print soup.find('h2', text=pattern) == soup.find('h2').text
#>> True
print soup.find('h2', text=pattern).parent == soup.find('h2')
#>> True

从bs4导入美化组
从pprint导入pprint
进口稀土
html_text=“”
这太酷了
这没什么
这很有趣
这是废话124445678901
"""
soup=BeautifulSoup（html_文本）
#尽管OP不是在寻找“酷”，但使用item zero更容易理解。
pattern=re.compile（r'cool'）
pprint（soup.find（text=pattern）。\uuuu dict\uuuuu
#>>{'next'：u'\n'，
#>>“nextSibling”：无，
#>>“家长”：这很酷#12345678901，
#>>“先前”：这很酷#12345678901，
#>>“以前的兄弟姐妹”：无}
打印soup.find（'h2'）
#>>这太酷了
打印soup.find（'h2'，文本=模式）
#>>这太酷了
打印soup.find（'h2'，text=pattern）.parent
#>>这太酷了
打印soup.find（'h2'，text=pattern）=soup.find（'h2'）
#>>假的
打印soup.find（'h2'，text=pattern）=soup.find（'h2'）.text
#>>真的
打印soup.find（'h2'，text=pattern）。parent==soup.find（'h2'））
#>>真的

这个（）可以解决您的问题吗

from bs4 import BeautifulSoup
from pprint import pprint
import re

html_text = """
<h2>this is cool #12345678901</h2>
<h2>this is nothing</h2>
<h2>this is interesting #126666678901</h2>
<h2>this is blah #124445678901</h2>
"""

soup = BeautifulSoup(html_text)

# Even though the OP was not looking for 'cool', it's more understandable to work with item zero.
pattern = re.compile(r'cool')

pprint(soup.find(text=pattern).__dict__)
#>> {'next': u'\n',
#>>  'nextSibling': None,
#>>  'parent': <h2>this is cool #12345678901</h2>,
#>>  'previous': <h2>this is cool #12345678901</h2>,
#>>  'previousSibling': None}

print soup.find('h2')
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern)
#>> this is cool #12345678901
print soup.find('h2', text=pattern).parent
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern) == soup.find('h2')
#>> False
print soup.find('h2', text=pattern) == soup.find('h2').text
#>> True
print soup.find('h2', text=pattern).parent == soup.find('h2')
#>> True

从bs4导入美化组
从pprint导入pprint
进口稀土
html_text=“”
这太酷了
这没什么
这很有趣
这是废话124445678901
"""
soup=BeautifulSoup（html_文本）
#尽管OP不是在寻找“酷”，但使用item zero更容易理解。
pattern=re.compile（r'cool'）
pprint（soup.find（text=pattern）。\uuuu dict\uuuuu
#>>{'next'：u'\n'，
#>>“nextSibling”：无，
#>>“家长”：这很酷#12345678901，
#>>“先前”：这很酷#12345678901，
#>>“以前的兄弟姐妹”：无}
打印soup.find（'h2'）
#>>这太酷了
打印soup.find（'h2'，文本=模式）
#>>这太酷了
打印soup.find（'h2'，text=pattern）.parent
#>>这太酷了
打印soup.find（'h2'，text=pattern）=soup.find（'h2'）
#>>假的
打印soup.find（'h2'，text=pattern）=soup.find（'h2'）.text
#>>真的
打印soup.find（'h2'，text=pattern）。parent==soup.find（'h2'））
#>>真的