Python 如何使用urllib.request.urlopen从多个URL获取所有图像URL
AttributeError:“list”对象没有属性“timeout”您无法传递URL列表Python 如何使用urllib.request.urlopen从多个URL获取所有图像URL,python,web-scraping,beautifulsoup,Python,Web Scraping,Beautifulsoup,AttributeError:“list”对象没有属性“timeout”您无法传递URL列表 from bs4 import BeautifulSoup import urllib.request urls = [ "https://archillect.com/1", "https://archillect.com/2", "https://archillect.com/3", ] soup = BeautifulSoup(urllib.request.urlopen(urls)) fo
from bs4 import BeautifulSoup
import urllib.request
urls = [
"https://archillect.com/1",
"https://archillect.com/2",
"https://archillect.com/3",
]
soup = BeautifulSoup(urllib.request.urlopen(urls))
for u in urls:
for img in soup.find_all("img", src=True):
print(img["src"])
@奎师那已经给了你答案。我会给你另一个解决方案,仅供参考
for url in urls:
soup = BeautifulSoup(urllib.request.urlopen(url))
以下是更多的例子:
from simplified_scrapy import Spider, SimplifiedDoc, SimplifiedMain, utils
class ImageSpider(Spider):
name = 'archillect'
start_urls = ["https://archillect.com/1","https://archillect.com/2","https://archillect.com/3"]
def afterResponse(self, response, url, error=None, extra=None):
try:
# Create file name
end = url.find('?') if url.find('?')>0 else len(url)
name = 'data'+url[url.rindex('/',0,end):end]
# save image
if utils.saveResponseAsFile(response,name,'image'):
return None
else:
return Spider.afterResponse(self, response, url, error)
except Exception as err:
print (err)
def extract(self,url,html,models,modelNames):
doc = SimplifiedDoc(html)
urls = doc.listImg(url=url.url)
return {'Urls':urls}
SimplifiedMain.startThread(ImageSpider()) # Start