Java Youtube提取URL仅适用于某些视频
我这里的代码有点问题。我所做的基本上是解析视频页面的HTML,并获得一个编码的MP4URL。从那里我收集了键值对,并使用正确的键值对/格式重新创建了一个URL。此代码确实有效,但似乎只适用于未经许可的视频。我比较了工作地址和非工作地址的URL,两者的参数、结构完全相同,我看不出有什么不同。我没有使用的MP4字符串中的唯一数据是quality=和type=,我发现它们都不应该出现在实际的url中。我有点不知所措,因为这似乎适用于某些视频,而不适用于其他视频,而且URL中也没有其他数据可供使用。我错过了什么Java Youtube提取URL仅适用于某些视频,java,parsing,youtube,Java,Parsing,Youtube,我这里的代码有点问题。我所做的基本上是解析视频页面的HTML,并获得一个编码的MP4URL。从那里我收集了键值对,并使用正确的键值对/格式重新创建了一个URL。此代码确实有效,但似乎只适用于未经许可的视频。我比较了工作地址和非工作地址的URL,两者的参数、结构完全相同,我看不出有什么不同。我没有使用的MP4字符串中的唯一数据是quality=和type=,我发现它们都不应该出现在实际的url中。我有点不知所措,因为这似乎适用于某些视频,而不适用于其他视频,而且URL中也没有其他数据可供使用。我错
public static String getActualYTURL(String myURL) throws IOException {
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet(myURL);
CloseableHttpResponse response = httpclient.execute(httpget);
//establish connection
String html = "";
InputStream in = response.getEntity().getContent();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
StringBuilder str = new StringBuilder();
String line = null;
while ((line = reader.readLine()) != null) {
str.append(line.replace("\\u0026", "&"));
}
in.close();
html = str.toString();
//get HTML for Youtube page
Pattern p = Pattern.compile("url_encoded_fmt_stream_map\":\"(.*?)?\"");
Matcher m = p.matcher(html);
ArrayList<String> matches = new ArrayList<String>();
m.find();
String urls[] = m.group().split(",");
//get map of encoded URLs
String encodedMP4URL = null;
for (String ppUrl : urls) {
String url = URLDecoder.decode(ppUrl, "UTF-8");
Pattern p1 = Pattern.compile("type=video/mp4");
Matcher m1 = p1.matcher(url);
if (m1.find()) {
encodedMP4URL = url;
}
}
//get MP4 encoded URL
HashMap <String, String> pairs = new HashMap<String, String>();
String[] temp = encodedMP4URL.split("&");
for (int i = 0; i < temp.length; i ++)
if (!temp[i].contains("url="))
pairs.put(temp[i].split("=")[0], temp[i].split("=")[1]);
else {
String URLPart = temp[i].split("\\?")[0] + "?";
pairs.put(URLPart.split("=")[0], URLPart.split("=")[1]);
String otherPart = temp[i].split("\\?")[1];
pairs.put(otherPart.split("=")[0], otherPart.split("=")[1]);
//deal with special case of first pair after url
}
//decode String into key value pairs
pairs.remove("quality");
pairs.remove("type");
//remove pairs that aren't used
StringBuilder realURL = new StringBuilder(pairs.get("url"));
pairs.remove("url");
//add url base then remove it from map
for (String s : pairs.keySet())
if (s.equals("s"))
realURL.append("signature=" + pairs.get(s) + "&");
//deal with special case "s" key needs to be "signature" in actual url
else
realURL.append(s + "=" + pairs.get(s) + "&");
//encode URL properly with required params
return realURL.toString();
}
public静态字符串getActualYTURL(字符串myURL)引发IOException{
CloseableHttpClient httpclient=HttpClients.createDefault();
HttpGet HttpGet=新的HttpGet(myURL);
CloseableHttpResponse response=httpclient.execute(httpget);
//建立联系
字符串html=“”;
InputStream in=response.getEntity().getContent();
BufferedReader reader=新的BufferedReader(新的InputStreamReader(in));
StringBuilder str=新的StringBuilder();
字符串行=null;
而((line=reader.readLine())!=null){
str.append(第行替换(\\u0026“,”和“);
}
in.close();
html=str.toString();
//获取Youtube页面的HTML
Pattern p=Pattern.compile(“url\u-encoded\u-fmt\u-stream\u-map\”:\”(.*);
Matcher m=p.Matcher(html);
ArrayList matches=新的ArrayList();
m、 查找();
字符串URL[]=m.group().split(“,”);
//获取编码URL的映射
字符串encodedMP4URL=null;
for(字符串ppUrl:url){
字符串url=URLDecover.decode(ppUrl,“UTF-8”);
模式p1=Pattern.compile(“type=video/mp4”);
匹配器m1=p1.匹配器(url);
if(m1.find()){
encodedMP4URL=url;
}
}
//获取MP4编码的URL
HashMap pairs=新的HashMap();
字符串[]temp=encodedMP4URL.split(&);
对于(int i=0;i
示例URL输出:https://r16---sn-ab5l6nll.googlevideo.com/videoplayback?dur=298.608&mime=video%2Fmp4&source=youtube&ratebypass=yes&gir=yes&lmt=1479243873107622&id=o-AFZWFgdwCg66TqdZ2ZY823besbDXiB37zBB9ZwzPLwKe&key=yt6&itag=18&mm=31&mn=sn-ab5l6nll&ei=-uStWICxJ4TK8gT\u xoLwDw&ms=au&ip=47.19.92.83&mt=1487791178&initcwndbps=922500&ipbits=0&mv=m&sparams=clen%2Cdur%2Cei%2Cgir%2Cid%2Cinitcwndbps%2Cip%2Cipbits%2Citag%2Clmt%2Cmime%2Cmm%2Cmn%2Cms%2Cpl%2CRate Bypass%2Crequiressl%2Cource%2Cupn%2Cexpire&upn=MylZrcryNC&requiressl=yes&signature=12A12AC76CD7E14F402CC9EBE879103B2C5870C.D866D5D5D99C0732DEC66722E9378&EXire=1299417