Python 漂亮的汤爬行和从中间提取文本<;br>;
我的html代码如下所示:Python 漂亮的汤爬行和从中间提取文本<;br>;,python,html,python-2.7,beautifulsoup,Python,Html,Python 2.7,Beautifulsoup,我的html代码如下所示: <br><a href="/drink12xy569.html">Alien Suicide</a> <br><a href="/drink792.html">All Jacked Up</a> <br><a href="/drink3805.html">All Night Hunter</a> <br><a href="/drink7
<br><a href="/drink12xy569.html">Alien Suicide</a>
<br><a href="/drink792.html">All Jacked Up</a>
<br><a href="/drink3805.html">All Night Hunter</a>
<br><a href="/drink796.html">Alley Shooter</a>
<br><a href="/drink10013.html">Alligator Sperm</a>
<br><a href="/drink804.html">Almond Delight</a>
<br><a href="/drink11135.html">Almond Gravy</a>
<br><a href="/drink7519.html">Almond Joy #2</a>
<br><a href="/drinks1r2563.html">Almond Kiss</a>
<br><a href="/drink12xy578.html">Amaretto Pie</a>
<br><a href="/drink11144.html">Amaretto Sourball</a>
<br><a href="/drinkp15q144.html">Ambuco Cinnamon Shooter</a>
<br><a href="/drink835.html">Amenie Mama</a>
<br><a href="/drink7521.html">American Death</a>
import urllib2
from bs4 import BeautifulSoup
url=[]
for i in range(28):
url="http://www.drinksmixer.com/cat/3/"
page = urllib2.urlopen("http://www.drinksmixer.com/cat/3/")
soup = BeautifulSoup(page.read())
links=soup.find_all('a')
for link in links:
if "drink" in link ['href']:
print link['href']
print "****\n\n"
url="http://drinksmixer.com"+link['href']
page1=urllib2.urlopen(url)
soup1=BeautifulSoup(page1.read())
divs=soup1.find('div', {"class":"ingredients"})
print divs.text.encode("utf-8")
import Tkinter
from Tkinter import *
def show_entry_fields():
print("Shot Name: %s" % (e1.get()))
master = Tk()
Label(master, text="Shot Name").grid(row=0)
e1 = Entry(master)
e1.grid(row=0, column=1)
Button(master, text='Search', command=show_entry_fields).grid(row=3, column=1, sticky=W, pady=4)
mainloop( )
我的gui界面如下所示:
<br><a href="/drink12xy569.html">Alien Suicide</a>
<br><a href="/drink792.html">All Jacked Up</a>
<br><a href="/drink3805.html">All Night Hunter</a>
<br><a href="/drink796.html">Alley Shooter</a>
<br><a href="/drink10013.html">Alligator Sperm</a>
<br><a href="/drink804.html">Almond Delight</a>
<br><a href="/drink11135.html">Almond Gravy</a>
<br><a href="/drink7519.html">Almond Joy #2</a>
<br><a href="/drinks1r2563.html">Almond Kiss</a>
<br><a href="/drink12xy578.html">Amaretto Pie</a>
<br><a href="/drink11144.html">Amaretto Sourball</a>
<br><a href="/drinkp15q144.html">Ambuco Cinnamon Shooter</a>
<br><a href="/drink835.html">Amenie Mama</a>
<br><a href="/drink7521.html">American Death</a>
import urllib2
from bs4 import BeautifulSoup
url=[]
for i in range(28):
url="http://www.drinksmixer.com/cat/3/"
page = urllib2.urlopen("http://www.drinksmixer.com/cat/3/")
soup = BeautifulSoup(page.read())
links=soup.find_all('a')
for link in links:
if "drink" in link ['href']:
print link['href']
print "****\n\n"
url="http://drinksmixer.com"+link['href']
page1=urllib2.urlopen(url)
soup1=BeautifulSoup(page1.read())
divs=soup1.find('div', {"class":"ingredients"})
print divs.text.encode("utf-8")
import Tkinter
from Tkinter import *
def show_entry_fields():
print("Shot Name: %s" % (e1.get()))
master = Tk()
Label(master, text="Shot Name").grid(row=0)
e1 = Entry(master)
e1.grid(row=0, column=1)
Button(master, text='Search', command=show_entry_fields).grid(row=3, column=1, sticky=W, pady=4)
mainloop( )
我只需要在我提取的信息中实现搜索的帮助。设计UI并不容易。你的代码几乎没问题。我将它分为多个函数,并添加了您要求的基本搜索
import urllib2
from bs4 import BeautifulSoup
import Tkinter
from Tkinter import *
e1 = None
links = []
def get_drinks():
global links
for i in range(28):
url="http://www.drinksmixer.com/cat/3/" + i
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
links.append(soup.find_all('a'))
def get_recipe(drink_name):
print drink_name
for link in links:
if "drink" in link ['href'] and drink_name in link.contents:
#print link['href']
print "****\n\n"
url="http://drinksmixer.com"+link['href']
page1=urllib2.urlopen(url)
soup1=BeautifulSoup(page1.read())
divs=soup1.find('div', {"class":"ingredients"})
recipe = divs.text.encode("utf-8")
return recipe
def show_entry_fields():
drink_name = e1.get()
print("Shot Name: %s" % drink_name)
recipe = get_recipe(drink_name)
print recipe # or better yet, popup
# tkMessageBox.showinfo(drink_name, recipe)
def main():
global e1
master = Tk()
Label(master, text="Shot Name").grid(row=0)
e1 = Entry(master)
e1.grid(row=0, column=1)
Button(master, text='Search', command=show_entry_fields).grid(row=3, column=1, sticky=W, pady=4)
mainloop()
if __name__ == "__main__":
get_drinks()
main()
用
try:except:
从1开始(http://www.drinksmixer.com/drink####.html
)如何在gui中而不是python中获得弹出响应?取消注释行tkMessageBox.showinfo(饮料名称、配方)
这应该可以。@robersam它能工作吗?如果是这样的话,那么你可能想关闭/接受这个答案。确实如此,但我仍然没有得到关于如何查看网站不同页面的帮助。在这里,我编辑了答案,以显示一种可能,即遍历多个页面。如果有很多页面,您应该检查Python的生成器,以便在应用程序运行时在后台加载更多页面/饮料。