Python 需要一些帮助来查看从网站拉链接的脚本吗_Python_Web Scraping_Beautifulsoup_Python Requests_Urllib

Python 需要一些帮助来查看从网站拉链接的脚本吗

python web-scraping

Python 需要一些帮助来查看从网站拉链接的脚本吗,python,web-scraping,beautifulsoup,python-requests,urllib,Python,Web Scraping,Beautifulsoup,Python Requests,Urllib,我正在使用一个来自Kodi插件的Python脚本，我已经测试成功地提取了音频，并且想让它与另一个网站一起工作，但不确定为什么它不工作我需要从以下网站为每集提取的内容示例如下（需要4个字段，以粗体突出显示）： & 我遇到的问题是，上面的网站对标题等的使用非常糟糕，而且混乱不堪。我看到的所有指南都提到了或class=headers，但这似乎不是这个网站上的一件事。任何帮助都将不胜感激我在这里使用的代码：导入操作系统导入系统导入URL库导入URL解析 #导入xbmcaddon #导入XB

我正在使用一个来自Kodi插件的Python脚本，我已经测试成功地提取了音频，并且想让它与另一个网站一起工作，但不确定为什么它不工作

我需要从以下网站为每集提取的内容示例如下（需要4个字段，以粗体突出显示）：
&
我遇到的问题是，上面的网站对标题等的使用非常糟糕，而且混乱不堪。我看到的所有指南都提到了或class=headers，但这似乎不是这个网站上的一件事。任何帮助都将不胜感激
我在这里使用的代码：

导入操作系统导入系统导入URL库导入URL解析 #导入xbmcaddon #导入XBMGUI #进口XBMPLUGIN 导入请求从bs4导入BeautifulSoup def生成url（查询）： base_url=sys.argv[0] 返回base_url+'？'+urllib.urlencode（查询） def get_页面（url）： #使用请求下载页面的源HTML #并使用BeautifulSoup解析页面返回BeautifulSoup（requests.get（url.text，'html.parser'） def解析_页面（第页）：歌曲={} 索引=1 #下面的示例是针对我们正在抓取的页面的 #您需要查看您正在访问的页面的来源 #计划刮取以查找要显示的内容 #这将返回所有对于第页中的项目。查找所有（'image'）： #该项包含指向相册封面的链接如果项['href'].find（'.jpg'）>1： #格式化相册封面的url，以包括站点url和对任何空间进行url编码相册封面='{0}{1}'。格式（示例页面，项目['href']。替换（''%20'）） #该项包含指向包含“.mp3”的歌曲的链接如果项['href'].find（'.mp3'）>1： #使用唱片封面url、歌曲文件名和歌曲url更新字典歌曲。更新（{index:{'album_cover'：album_cover，'title'：item['href']，'url'：{0}{1}）。格式（示例页面，item['href']）}）指数+=1 回音 def生成歌曲列表（歌曲）：歌曲列表=[] #迭代字典歌曲的内容以构建列表对于歌曲中的歌曲： #使用标签的歌曲文件名创建列表项 li=xbmgui.ListItem（label=songs[song]['title'，thumbnailImage=songs[song]['album\u cover']）） #将fanart设置为相册封面 li.setProperty（'fanart_image'，songs[song]['album_cover']）） #将列表项设置为可播放 li.setProperty（'IsPlayable'，'true'） #为Kodi构建插件url #例如：plugin://plugin.audio.example/?url=http%3A%2F%2Fwww.theaudiodb.com%2Ftestfiles%2F01-pablo_perez-your_ad_here.mp3&mode=stream&title=01-pablo_perez-your_ad_here.mp3 url=build_url（{'mode'：'stream'，'url'：songs[song]['url']，'title'：songs[song]['title']}） #将当前列表项添加到列表中 song_list.append（（url，li，False）） #根据Martijn向Kodi添加列表 # http://forum.kodi.tv/showthread.php?tid=209948&pid=2094170#pid2094170 xbmplugin.addDirectoryItems（加载项句柄、歌曲列表、len（歌曲列表）） #设置目录的内容 xbmplugin.setContent（插件句柄'songs'） Xbmplugin.endOfDirectory（加载项句柄） def播放歌曲（url）： #将歌曲的路径设置为列表项 play_item=xbmgui.ListItem（路径=url） #列表项已准备好由Kodi播放 XBMPlugin.setResolvedUrl（加载项句柄，True，列表项=播放项） def main（）： args=urlparse.parse_qs（sys.argv[2][1:] mode=args.get（'mode'，None） #附加组件的首次发布如果模式为无： #获取的HTMLhttp://www.theaudiodb.com/testfiles/ 页面=获取页面（示例页面） #从页面获取所需的内容内容=解析页面（第页） #以Kodi格式显示歌曲列表构建歌曲列表（内容） #已从列表中选择一首歌曲 elif模式[0]=“流”： #传递歌曲的url以播放歌曲播放歌曲（args['url'][0]）如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu'：样本(附页)https://thisiscriminal.com/wp-json/criminal/v1/episodes?posts=1000000&page=1' addon_handle=int（sys.argv[1]） main（）
您试图从中提取数据的页面是JSON，而不是HTML。使用像beautifulsoup这样的HTML解析器，您将一无所获。您试图从中提取数据的页面是JSON，而不是HTML。使用像beautifulsoup这样的HTML解析器，您将一无所获。
import os import sys import urllib import urlparse #import xbmcaddon #import xbmcgui #import xbmcplugin import requests from bs4 import BeautifulSoup def build_url(query): base_url = sys.argv[0] return base_url + '?' + urllib.urlencode(query) def get_page(url): # download the source HTML for the page using requests # and parse the page using BeautifulSoup return BeautifulSoup(requests.get(url).text, 'html.parser') def parse_page(page): songs = {} index = 1 # the sample below is specific for the page we are scraping # you will need to view the source of the page(s) you are # planning to scrape to find the content you want to display # this will return all the <a> elements on the page: # <a href="some_url">some_text</a> for item in page.find_all('image'): # the item contains a link to an album cover if item['href'].find('.jpg') > 1: # format the url for the album cover to include the site url and url encode any spaces album_cover = '{0}{1}'.format(sample_page, item['href'].replace(' ', '%20')) # the item contains a link to a song containing '.mp3' if item['href'].find('.mp3') > 1: # update dictionary with the album cover url, song filename, and song url songs.update({index: {'album_cover': album_cover, 'title': item['href'], 'url': '{0}{1}'.format(sample_page, item['href'])}}) index += 1 return songs def build_song_list(songs): song_list = [] # iterate over the contents of the dictionary songs to build the list for song in songs: # create a list item using the song filename for the label li = xbmcgui.ListItem(label=songs[song]['title'], thumbnailImage=songs[song]['album_cover']) # set the fanart to the albumc cover li.setProperty('fanart_image', songs[song]['album_cover']) # set the list item to playable li.setProperty('IsPlayable', 'true') # build the plugin url for Kodi # Example: plugin://plugin.audio.example/?url=http%3A%2F%2Fwww.theaudiodb.com%2Ftestfiles%2F01-pablo_perez-your_ad_here.mp3&mode=stream&title=01-pablo_perez-your_ad_here.mp3 url = build_url({'mode': 'stream', 'url': songs[song]['url'], 'title': songs[song]['title']}) # add the current list item to a list song_list.append((url, li, False)) # add list to Kodi per Martijn # http://forum.kodi.tv/showthread.php?tid=209948&pid=2094170#pid2094170 xbmcplugin.addDirectoryItems(addon_handle, song_list, len(song_list)) # set the content of the directory xbmcplugin.setContent(addon_handle, 'songs') xbmcplugin.endOfDirectory(addon_handle) def play_song(url): # set the path of the song to a list item play_item = xbmcgui.ListItem(path=url) # the list item is ready to be played by Kodi xbmcplugin.setResolvedUrl(addon_handle, True, listitem=play_item) def main(): args = urlparse.parse_qs(sys.argv[2][1:]) mode = args.get('mode', None) # initial launch of add-on if mode is None: # get the HTML for http://www.theaudiodb.com/testfiles/ page = get_page(sample_page) # get the content needed from the page content = parse_page(page) # display the list of songs in Kodi build_song_list(content) # a song from the list has been selected elif mode[0] == 'stream': # pass the url of the song to play_song play_song(args['url'][0]) if __name__ == '__main__': sample_page = 'https://thisiscriminal.com/wp-json/criminal/v1/episodes?posts=1000000&page=1' addon_handle = int(sys.argv[1]) main()