Java Youtube提取URL仅适用于某些视频

Java Youtube提取URL仅适用于某些视频,java,parsing,youtube,Java,Parsing,Youtube,我这里的代码有点问题。我所做的基本上是解析视频页面的HTML,并获得一个编码的MP4URL。从那里我收集了键值对,并使用正确的键值对/格式重新创建了一个URL。此代码确实有效,但似乎只适用于未经许可的视频。我比较了工作地址和非工作地址的URL,两者的参数、结构完全相同,我看不出有什么不同。我没有使用的MP4字符串中的唯一数据是quality=和type=,我发现它们都不应该出现在实际的url中。我有点不知所措,因为这似乎适用于某些视频,而不适用于其他视频,而且URL中也没有其他数据可供使用。我错

我这里的代码有点问题。我所做的基本上是解析视频页面的HTML,并获得一个编码的MP4URL。从那里我收集了键值对,并使用正确的键值对/格式重新创建了一个URL。此代码确实有效,但似乎只适用于未经许可的视频。我比较了工作地址和非工作地址的URL,两者的参数、结构完全相同,我看不出有什么不同。我没有使用的MP4字符串中的唯一数据是quality=和type=,我发现它们都不应该出现在实际的url中。我有点不知所措,因为这似乎适用于某些视频,而不适用于其他视频,而且URL中也没有其他数据可供使用。我错过了什么

public static String getActualYTURL(String myURL) throws IOException {

        CloseableHttpClient httpclient = HttpClients.createDefault();
        HttpGet httpget = new HttpGet(myURL);
        CloseableHttpResponse response = httpclient.execute(httpget);
        //establish connection

        String html = "";
        InputStream in = response.getEntity().getContent();
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        StringBuilder str = new StringBuilder();
        String line = null;
        while ((line = reader.readLine()) != null) {
            str.append(line.replace("\\u0026", "&"));
        }
        in.close();
        html = str.toString();
        //get HTML for Youtube page

        Pattern p = Pattern.compile("url_encoded_fmt_stream_map\":\"(.*?)?\"");
        Matcher m = p.matcher(html);
        ArrayList<String> matches = new ArrayList<String>();
        m.find();
        String urls[] = m.group().split(",");
        //get map of encoded URLs

        String encodedMP4URL = null;
        for (String ppUrl : urls) {
            String url = URLDecoder.decode(ppUrl, "UTF-8");

            Pattern p1 = Pattern.compile("type=video/mp4");
            Matcher m1 = p1.matcher(url);

            if (m1.find()) {
                encodedMP4URL = url;
            }
        }
        //get MP4 encoded URL

        HashMap <String, String> pairs = new HashMap<String, String>();
        String[] temp = encodedMP4URL.split("&");

        for (int i = 0; i < temp.length; i ++)
            if (!temp[i].contains("url="))
                pairs.put(temp[i].split("=")[0], temp[i].split("=")[1]);
            else {
                String URLPart = temp[i].split("\\?")[0] + "?";
                pairs.put(URLPart.split("=")[0], URLPart.split("=")[1]);
                String otherPart = temp[i].split("\\?")[1];
                pairs.put(otherPart.split("=")[0], otherPart.split("=")[1]);
                //deal with special case of first pair after url
            }
        //decode String into key value pairs

        pairs.remove("quality");
        pairs.remove("type");
        //remove pairs that aren't used

        StringBuilder realURL = new StringBuilder(pairs.get("url"));
        pairs.remove("url");
        //add url base then remove it from map

        for (String s : pairs.keySet())
            if (s.equals("s"))
                realURL.append("signature=" + pairs.get(s) + "&");
                //deal with special case "s" key needs to be "signature" in actual url
            else
                realURL.append(s + "=" + pairs.get(s) + "&");
        //encode URL properly with required params

        return realURL.toString();
    }
public静态字符串getActualYTURL(字符串myURL)引发IOException{
CloseableHttpClient httpclient=HttpClients.createDefault();
HttpGet HttpGet=新的HttpGet(myURL);
CloseableHttpResponse response=httpclient.execute(httpget);
//建立联系
字符串html=“”;
InputStream in=response.getEntity().getContent();
BufferedReader reader=新的BufferedReader(新的InputStreamReader(in));
StringBuilder str=新的StringBuilder();
字符串行=null;
而((line=reader.readLine())!=null){
str.append(第行替换(\\u0026“,”和“);
}
in.close();
html=str.toString();
//获取Youtube页面的HTML
Pattern p=Pattern.compile(“url\u-encoded\u-fmt\u-stream\u-map\”:\”(.*);
Matcher m=p.Matcher(html);
ArrayList matches=新的ArrayList();
m、 查找();
字符串URL[]=m.group().split(“,”);
//获取编码URL的映射
字符串encodedMP4URL=null;
for(字符串ppUrl:url){
字符串url=URLDecover.decode(ppUrl,“UTF-8”);
模式p1=Pattern.compile(“type=video/mp4”);
匹配器m1=p1.匹配器(url);
if(m1.find()){
encodedMP4URL=url;
}
}
//获取MP4编码的URL
HashMap pairs=新的HashMap();
字符串[]temp=encodedMP4URL.split(&);
对于(int i=0;i
示例URL输出:
https://r16---sn-ab5l6nll.googlevideo.com/videoplayback?dur=298.608&mime=video%2Fmp4&source=youtube&ratebypass=yes&gir=yes&lmt=1479243873107622&id=o-AFZWFgdwCg66TqdZ2ZY823besbDXiB37zBB9ZwzPLwKe&key=yt6&itag=18&mm=31&mn=sn-ab5l6nll&ei=-uStWICxJ4TK8gT\u xoLwDw&ms=au&ip=47.19.92.83&mt=1487791178&initcwndbps=922500&ipbits=0&mv=m&sparams=clen%2Cdur%2Cei%2Cgir%2Cid%2Cinitcwndbps%2Cip%2Cipbits%2Citag%2Clmt%2Cmime%2Cmm%2Cmn%2Cms%2Cpl%2CRate Bypass%2Crequiressl%2Cource%2Cupn%2Cexpire&upn=MylZrcryNC&requiressl=yes&signature=12A12AC76CD7E14F402CC9EBE879103B2C5870C.D866D5D5D99C0732DEC66722E9378&EXire=1299417