在下载文件中。。。无法打开URL。。。HTTP状态为';404未找到';
多亏了StackOverflow,我能够使用以下代码在公共网站上下载一系列照片在下载文件中。。。无法打开URL。。。HTTP状态为';404未找到';,r,R,多亏了StackOverflow,我能够使用以下代码在公共网站上下载一系列照片 urls <- c("https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0090/13", "https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/
urls <- c("https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0090/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0089/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0088/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0087/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0086/13"
)
for (url in 1:length(urls)) {
print(url)
webpage <- html_session(urls[url])
link.titles <- webpage %>% html_nodes("img")
img.url <- link.titles %>% html_attr("src")
for(j in 1:length(img.url)){
download.file(img.url[j], paste0(url,'.',j,".jpg"), mode = "wb")
}
}
urllibrary(purrr)
图书馆(rvest)
图书馆(httr)
URL%#提取img标记
展平()%>%#转换为简单列表
map_chr(html_attr,“src”)%>%#提取URL
每个URL的walk(~{#)
res只需使用函数“try”:
URL查看此链接:谢谢!这可能就可以了!感谢您的分享!它对“更大的集合”不起作用。但是,我肯定会将此作为参考:)到底什么对“更大的集合”不起作用设置?这是一个非常健壮的解决方案,所以我很好奇具体的错误情况是什么。是的,当然,这是一个非常好的解决方案,我感谢您的时间和努力。但是,它不适用于我正在处理的列表,它包含大约10000个URL。我正在重新运行您的脚本,但需要一段时间,因此将在上发布错误ce它已完成运行。下载这么多图像时,最好使用另一种习惯用法。一种测试是否存在的习惯用法,而不是使用curl::curl\u fetch\u multi()
library(purrr)
library(rvest)
library(httr)
urls <- c(
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0090/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0089/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0088/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0087/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0086/13"
)
sGET <- safely(GET) # make a "safe" version of httr::GET
map(urls, read_html) %>% # read each page
map(html_nodes, "img") %>% # extract img tags
flatten() %>% # convert to a simple list
map_chr(html_attr, "src") %>% # extract the URL
walk(~{ # for each URL
res <- sGET(.x) # try to retrieve it
if (!is.null(res$result)) { # if there were no fatal errors
if (status_code(res$result) == 200) { # and, if found
writeBin(content(res$result, as="raw"), basename(.x)) # save it to disk
}
}
})
urls <- c("https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0090/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0089/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0088/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0087/13",
"https://ec.europa.eu/consumers/consumers_safety/safety_products/rapex/alerts/?event=viewProduct&reference=A12/0086/13"
)
for (url in 1:length(urls)) {
print(url)
webpage <- html_session(urls[url])
link.titles <- webpage %>% html_nodes("img")
img.url <- link.titles %>% html_attr("src")
for(j in 1:length(img.url)){
try(download.file(img.url[j], paste0(url,'.',j,".jpg"), mode = "wb")
,silent = TRUE)
}
for (url in 1:length(urls)) {
print(url)
webpage <- html_session(urls[url])
link.titles <- webpage %>% html_nodes("img")
img.url <- link.titles %>% html_attr("src")
for(j in 1:length(img.url)){
try_download <- try(
download.file(img.url[j], paste0(url,'.',j,".jpg"), mode = "wb")
,silent = TRUE)
if(is(try_download,"try-error")){
print(paste0("ERROR: ", img.url[j]))
}else{
print(paste0("Downloaded: ", img.url[j]))
}
}