Python 按分支和祖先对同一类中的元素进行排序
我得到了以下html(所有元素名称*、名称**、名称***都是未知的):Python 按分支和祖先对同一类中的元素进行排序,python,python-3.x,beautifulsoup,html-parsing,Python,Python 3.x,Beautifulsoup,Html Parsing,我得到了以下html(所有元素名称*、名称**、名称***都是未知的): 你能帮我把这两个列表链接到字典里吗?你可以试试这个脚本。它利用itertools.groupby()将元素分组为键、值: data = '''<a class="one">nameA</a> <a class="two">nameAA</a> <a class="three">nameAAA</a> <a
你能帮我把这两个列表链接到字典里吗?你可以试试这个脚本。它利用
itertools.groupby
()将元素分组为键、值:
data = '''<a class="one">nameA</a>
<a class="two">nameAA</a>
<a class="three">nameAAA</a>
<a class="three">nameAAB</a>
<a class="two">nameAB</a>
<a class="three">nameABA</a>
<a class="three">nameABB</a>
<a class="one">nameB</a>
<a class="two">nameBA</a>
<a class="three">nameBAA</a>
<a class="three">nameBAB</a>
<a class="two">nameBB</a>
<a class="three">nameBBA</a>
<a class="three">nameBBB</a>'''
from bs4 import BeautifulSoup
from itertools import groupby
soup = BeautifulSoup(data, 'html.parser')
def get_key_values(soup):
current_key = None
for v, g in groupby(soup.select('.one, .three'), lambda k: 'one' in k['class']):
if v is True:
current_key = next(g).text
else:
yield current_key, [i.text for i in g]
out = dict(get_key_values(soup))
from pprint import pprint
pprint(out)
你可以试试这个脚本。它利用
itertools.groupby
()将元素分组为键、值:
data = '''<a class="one">nameA</a>
<a class="two">nameAA</a>
<a class="three">nameAAA</a>
<a class="three">nameAAB</a>
<a class="two">nameAB</a>
<a class="three">nameABA</a>
<a class="three">nameABB</a>
<a class="one">nameB</a>
<a class="two">nameBA</a>
<a class="three">nameBAA</a>
<a class="three">nameBAB</a>
<a class="two">nameBB</a>
<a class="three">nameBBA</a>
<a class="three">nameBBB</a>'''
from bs4 import BeautifulSoup
from itertools import groupby
soup = BeautifulSoup(data, 'html.parser')
def get_key_values(soup):
current_key = None
for v, g in groupby(soup.select('.one, .three'), lambda k: 'one' in k['class']):
if v is True:
current_key = next(g).text
else:
yield current_key, [i.text for i in g]
out = dict(get_key_values(soup))
from pprint import pprint
pprint(out)
请尝试以下代码
itemdict={}
soup=BeautifulSoup(data,'lxml')
for item in soup.select('.one'):
itemlist = []
name=item.contents[0].strip()
for child in item.select('.three'):
itemlist.append(child.text)
itemdict[name]=itemlist
print(itemdict)
这应该打印出来
{'nameA': ['nameAAA', 'nameAAB', 'nameABA', 'nameABB'], 'nameB': ['nameBAA', 'nameBAB', 'nameBBA', 'nameBBB']}
请尝试以下代码
itemdict={}
soup=BeautifulSoup(data,'lxml')
for item in soup.select('.one'):
itemlist = []
name=item.contents[0].strip()
for child in item.select('.three'):
itemlist.append(child.text)
itemdict[name]=itemlist
print(itemdict)
这应该打印出来
{'nameA': ['nameAAA', 'nameAAB', 'nameABA', 'nameABB'], 'nameB': ['nameBAA', 'nameBAB', 'nameBBA', 'nameBBB']}
@Mohammadreza您需要导入
pprint
模块:从pprint导入pprint
@Mohammadreza您需要导入pprint
模块:从pprint导入pprint