Python 用于在Beautifulsoup中搜索文本的即时标记

Python 用于在Beautifulsoup中搜索文本的即时标记,python,beautifulsoup,Python,Beautifulsoup,我正在搜索一个特定的字符串,它应该与标记的文本值完全匹配。如何仅使用术语“RESULTS”进行搜索并将标签“h4”返回给我 soup = BeautifulSoup('<table><tbody><tr><td class="fulltext-body-paragraph"><a name="44"></a><div class="fulltext-LEVEL1"><h4>RESULTS</h4

我正在搜索一个特定的字符串,它应该与标记的文本值完全匹配。如何仅使用术语“RESULTS”进行搜索并将标签“h4”返回给我

soup = BeautifulSoup('<table><tbody><tr><td class="fulltext-body-paragraph"><a name="44"></a><div class="fulltext-LEVEL1"><h4>RESULTS</h4></div></td></tr></tbody></table>')

soup.find(lambda el: el.text == 'RESULTS').name
Out: 'html' # I would like it to return 'h4'
soup=BeautifulSoup('RESULTS'))
soup.find(lambda el:el.text=='RESULTS').name
Out:'html'#我希望它返回'h4'
这个()可以解决您的问题吗

from bs4 import BeautifulSoup
from pprint import pprint
import re

html_text = """
<h2>this is cool #12345678901</h2>
<h2>this is nothing</h2>
<h2>this is interesting #126666678901</h2>
<h2>this is blah #124445678901</h2>
"""

soup = BeautifulSoup(html_text)

# Even though the OP was not looking for 'cool', it's more understandable to work with item zero.
pattern = re.compile(r'cool')

pprint(soup.find(text=pattern).__dict__)
#>> {'next': u'\n',
#>>  'nextSibling': None,
#>>  'parent': <h2>this is cool #12345678901</h2>,
#>>  'previous': <h2>this is cool #12345678901</h2>,
#>>  'previousSibling': None}

print soup.find('h2')
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern)
#>> this is cool #12345678901
print soup.find('h2', text=pattern).parent
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern) == soup.find('h2')
#>> False
print soup.find('h2', text=pattern) == soup.find('h2').text
#>> True
print soup.find('h2', text=pattern).parent == soup.find('h2')
#>> True
从bs4导入美化组
从pprint导入pprint
进口稀土
html_text=“”
这太酷了
这没什么
这很有趣
这是废话124445678901
"""
soup=BeautifulSoup(html_文本)
#尽管OP不是在寻找“酷”,但使用item zero更容易理解。
pattern=re.compile(r'cool')
pprint(soup.find(text=pattern)。\uuuu dict\uuuuu
#>>{'next':u'\n',
#>>“nextSibling”:无,
#>>“家长”:这很酷#12345678901,
#>>“先前”:这很酷#12345678901,
#>>“以前的兄弟姐妹”:无}
打印soup.find('h2')
#>>这太酷了
打印soup.find('h2',文本=模式)
#>>这太酷了
打印soup.find('h2',text=pattern).parent
#>>这太酷了
打印soup.find('h2',text=pattern)=soup.find('h2')
#>>假的
打印soup.find('h2',text=pattern)=soup.find('h2').text
#>>真的
打印soup.find('h2',text=pattern)。parent==soup.find('h2'))
#>>真的
这个()可以解决您的问题吗

from bs4 import BeautifulSoup
from pprint import pprint
import re

html_text = """
<h2>this is cool #12345678901</h2>
<h2>this is nothing</h2>
<h2>this is interesting #126666678901</h2>
<h2>this is blah #124445678901</h2>
"""

soup = BeautifulSoup(html_text)

# Even though the OP was not looking for 'cool', it's more understandable to work with item zero.
pattern = re.compile(r'cool')

pprint(soup.find(text=pattern).__dict__)
#>> {'next': u'\n',
#>>  'nextSibling': None,
#>>  'parent': <h2>this is cool #12345678901</h2>,
#>>  'previous': <h2>this is cool #12345678901</h2>,
#>>  'previousSibling': None}

print soup.find('h2')
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern)
#>> this is cool #12345678901
print soup.find('h2', text=pattern).parent
#>> <h2>this is cool #12345678901</h2>
print soup.find('h2', text=pattern) == soup.find('h2')
#>> False
print soup.find('h2', text=pattern) == soup.find('h2').text
#>> True
print soup.find('h2', text=pattern).parent == soup.find('h2')
#>> True
从bs4导入美化组
从pprint导入pprint
进口稀土
html_text=“”
这太酷了
这没什么
这很有趣
这是废话124445678901
"""
soup=BeautifulSoup(html_文本)
#尽管OP不是在寻找“酷”,但使用item zero更容易理解。
pattern=re.compile(r'cool')
pprint(soup.find(text=pattern)。\uuuu dict\uuuuu
#>>{'next':u'\n',
#>>“nextSibling”:无,
#>>“家长”:这很酷#12345678901,
#>>“先前”:这很酷#12345678901,
#>>“以前的兄弟姐妹”:无}
打印soup.find('h2')
#>>这太酷了
打印soup.find('h2',文本=模式)
#>>这太酷了
打印soup.find('h2',text=pattern).parent
#>>这太酷了
打印soup.find('h2',text=pattern)=soup.find('h2')
#>>假的
打印soup.find('h2',text=pattern)=soup.find('h2').text
#>>真的
打印soup.find('h2',text=pattern)。parent==soup.find('h2'))
#>>真的