如何从有下载等待计时器的站点下载python文件
我今天有点奇怪。我正试图浏览本网站并下载与特定模式匹配的所有ROM(模式部分尚未包含在我的代码中): 我这样做的原因是因为我有一个RPI4运行的retropie,我想把一吨ROM的。稍后我将处理文件夹排序和所有事情 我有一个python脚本,可以迭代所有网页内容,并可以获得下载前链接。但是,我在尝试让python等待下载计数器按下,获取下载链接,然后下载文件时遇到了极大的困难: 我看到很多人建议从网站下载时使用以下内容:如何从有下载等待计时器的站点下载python文件,python,web,download,Python,Web,Download,我今天有点奇怪。我正试图浏览本网站并下载与特定模式匹配的所有ROM(模式部分尚未包含在我的代码中): 我这样做的原因是因为我有一个RPI4运行的retropie,我想把一吨ROM的。稍后我将处理文件夹排序和所有事情 我有一个python脚本,可以迭代所有网页内容,并可以获得下载前链接。但是,我在尝试让python等待下载计数器按下,获取下载链接,然后下载文件时遇到了极大的困难: 我看到很多人建议从网站下载时使用以下内容: import requests url = "https://
import requests
url = "https://romhustler.org/download/122039/RFloRzkzYjBxeUpmSXhmczJndVZvVXViV3d2bjExMUcwRmdhQzltaU1USXlNRE01ZkRJeE1TNHlOaTR4TVRFdU1qVXdmREUyTURJek9USXdOVFo4Wkc5M2JteHZZV1JmY0dGblpRPT0="
filename = "dummy.txt"
r = requests.get(url, allow_redirects=True)
with open(filename, 'wb') as f:
f.write(url)
但是,这假定您已经有一个静态下载链接。问题是像这样的网站会等待一定的时间,然后给你下载链接(可以理解,这样可以防止人们滥发下载)。我计划让我的代码在一夜之间运行,所以每次下载等待9秒不是问题。我遇到的唯一问题实际上是试图获得下载链接。我的代码如下:
import requests,time,urllib
#https://raspberrytips.com/download-retropie-roms/#Where_to_download_Retropie_ROMs
#https://raspberrytips.com/add-games-raspberry-pi/
#print("Site to download from: \"https://cvaddict.com/list.php\"")
#print("Site to download from: \"https://coolrom.com.au/roms/\"")
#print("Site to download from: \"https://www.freeroms.com/\"")
def splitTextToLines(text):
result = [""]
pos = 0
for i in text:
if (i != '\n'):
result[pos] += i
else:
result.append("")
pos += 1
return result
def getValues(valIn,key,endPoint="</div"):
divs = 0
running = False
myString = ""
results = []
for i in valIn:
if (key in i):
running = True
if (("<div" in i) and (running == True)):
divs += 1
if ((divs > 0) and (endPoint in i)):
divs -= 1
if (divs == 0):
running = False
results.append(myString)
myString = ""
if (divs > 0):
myString += i + "\n"
return results
def getLineOfValue(data,key):
looper = 0
for i in data:
if (key in i):
return looper
looper += 1
def getListSubstring(data,start,end):
looper = 0
result = []
for i in data:
if ((looper > start) and (looper < end)):
result.append(i)
looper += 1
return result
input("ready?")
for i in range(1,402):
print("Now scouring page:",i)
#print("Site to download from: \"https://romhustler.org/roms/index/page:" + str(i) + "\"")
rawSiteData = splitTextToLines(requests.get("https://romhustler.org/roms/index/page:" + str(i)).text)
scrapedSiteData = getLineOfValue(rawSiteData,"<div class=\"roms-listing w-console\"")
rowList = getValues(getListSubstring(rawSiteData,scrapedSiteData,len(rawSiteData) - 1),"<div class=\"row")
for rowSegment in rowList:
state = ""
for line in splitTextToLines(rowSegment):
#Get the class type
if ("<div class=\"row extend\">" in line):
state = "extended row"
if ("<div class=\"row \">" in line):
state = "standard row"
#If we have the class type, then get the download link
if (state == "extended row"):
pass
if (state == "standard row"):
for line in splitTextToLines(rowSegment):
if (("href=\"" in line) and ("/rom/" in line)):
startIndex = line.index("<a href=\"")+len("<a href=\"")
url = "https://romhustler.org" + line[startIndex : line.index("\">",startIndex)]
#We now have the download url.
siteData_2 = splitTextToLines(requests.get(url).text)
scrapedSiteData_2 = getLineOfValue(siteData_2,"<div class=\"overview info download_list")
rowList_2 = getListSubstring(siteData_2,scrapedSiteData_2,len(siteData_2) - 1)
running_2 = True
for i in rowList_2:
if (("href=\"" in i) and running_2):
running_2 = False
url_2 = "https://romhustler.org"
startIndex = i.index("<a href=\"") + len("<a href=\"")
url_2 += i[startIndex : i.index("\"",startIndex+1)]
if ("/download/" in url_2):
#We now have the download url.
siteData_3 = splitTextToLines(requests.get(url_2).text)
print(url_2)
for i in siteData_3:
if ("class=\"downloadLink\"" in i):
print(i)
#This is just here to stop it spamming console and pause the code
input()
if ((state != "") and (line != "")):
print(rowSegment)
导入请求、时间、URL库
#https://raspberrytips.com/download-retropie-roms/#Where_to_download_Retropie_ROMs
#https://raspberrytips.com/add-games-raspberry-pi/
#打印(“要下载的站点:\”https://cvaddict.com/list.php\"")
#打印(“要下载的站点:\”https://coolrom.com.au/roms/\"")
#打印(“要下载的站点:\”https://www.freeroms.com/\"")
def splitTextToLines(文本):
结果=[“”]
pos=0
对于文本中的i:
如果(i!='\n'):
结果[pos]+=i
其他:
结果。追加(“”)
pos+=1
返回结果
def getValues(有效值、密钥、端点=”