Java 使用应用引擎url获取发布用户凭据时出现问题

Java 使用应用引擎url获取发布用户凭据时出现问题,java,google-app-engine,cookies,Java,Google App Engine,Cookies,我正在开发一个应用程序,它需要能够代表用户登录到一个网站,并进行一些html抓取。和许多其他开发者一样,AppEngine在cookie管理方面给我带来了麻烦。我登录的服务器在初始POST之后发送一个重定向,然后再发送另一个重定向到最终登录页。据我所知,其目的是让服务器验证cookie是否正常工作。我已经从其他答案中拼凑了下面的助手类 public class Utilities { public static String smartPost(String url, String d

我正在开发一个应用程序,它需要能够代表用户登录到一个网站,并进行一些html抓取。和许多其他开发者一样,AppEngine在cookie管理方面给我带来了麻烦。我登录的服务器在初始POST之后发送一个重定向,然后再发送另一个重定向到最终登录页。据我所知,其目的是让服务器验证cookie是否正常工作。我已经从其他答案中拼凑了下面的助手类

public class Utilities {

    public static String smartPost(String url, String data) throws IOException {
        // storage for cookies between redirects
        Map<String, String> cookies = new HashMap<String, String>();

        HttpURLConnection connection;
        StringBuilder response = new StringBuilder();
        response.append(url);
        URL resource = new URL(url);
        connection = (HttpURLConnection) resource.openConnection();
        connection.setRequestMethod("POST");
        connection.setRequestProperty("Content-Type",
                "application/x-www-form-urlencoded");

        connection.setRequestProperty("Content-Length",
                "" + Integer.toString(data.getBytes().length));
        connection.setRequestProperty("Content-Language", "en-US");

        connection.setUseCaches(false);
        connection.setInstanceFollowRedirects(false);
        connection.setDoInput(true);
        connection.setDoOutput(true);

        // Send request
        DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
        wr.writeBytes(data);
        wr.flush();
        wr.close();

        url = connection.getHeaderField("location");

        while (url != null) {
            // Get Cookies
            getCookiesFromConnection(connection, cookies);
            URL redirectResource = new URL(url);
            response.append(url);
            connection = (HttpURLConnection) redirectResource.openConnection();
            connection.setRequestMethod("GET");
            addCookiesToConnection(connection, cookies);
            connection.setInstanceFollowRedirects(false);

            connection.setUseCaches(false);
            connection.setDoInput(true);
            url = connection.getHeaderField("location");
            connection.disconnect();
        }

        // Arrived at final location
        InputStream is = connection.getInputStream();
        BufferedReader rd = new BufferedReader(new InputStreamReader(is));
        String line;
        while ((line = rd.readLine()) != null) {
            response.append(line);
            response.append('\r');
        }
        rd.close();

        return response.toString();
    }

    static void addCookiesToConnection(HttpURLConnection c,
            Map<String, String> storage) {
        StringBuilder cookieStringBuilder = new StringBuilder();
        for (Entry<String, String> e : storage.entrySet()) {
            cookieStringBuilder.append(e.getKey());
            cookieStringBuilder.append("=");
            cookieStringBuilder.append(e.getValue());
            cookieStringBuilder.append(";");
        }
        c.setRequestProperty("Cookies", cookieStringBuilder.toString());
    }

    static void getCookiesFromConnection(HttpURLConnection c,
            Map<String, String> storage) {
        Map<String, List<String>> headers = c.getHeaderFields();
        for (Entry<String, List<String>> e : headers.entrySet()) {
            if (e.getKey().equalsIgnoreCase("Set-Cookie")) {
                for (String cookieHeader : e.getValue()) {
                    String cookie = cookieHeader.substring(0,
                            cookieHeader.indexOf(";"));
                    String key = cookie.substring(0, cookie.indexOf("="));
                    String value = cookie.substring(cookie.indexOf("=") + 1);
                    storage.put(key, value);
                }
            }
        }
    }
}
公共类实用程序{
公共静态字符串smartPost(字符串url、字符串数据)引发IOException{
//重定向之间cookie的存储
Map cookies=新HashMap();
httpurl连接;
StringBuilder响应=新建StringBuilder();
append(url);
URL资源=新URL(URL);
connection=(HttpURLConnection)resource.openConnection();
connection.setRequestMethod(“POST”);
connection.setRequestProperty(“内容类型”,
“application/x-www-form-urlencoded”);
connection.setRequestProperty(“内容长度”,
“”+Integer.toString(data.getBytes().length));
connection.setRequestProperty(“内容语言”、“en-US”);
connection.setUseCaches(false);
connection.setInstanceFollowDirections(false);
connection.setDoInput(true);
connection.setDoOutput(真);
//发送请求
DataOutputStream wr=新的DataOutputStream(connection.getOutputStream());
wr.writeBytes(数据);
wr.flush();
wr.close();
url=connection.getHeaderField(“位置”);
while(url!=null){
//吃饼干
getCookiesFromConnection(连接、cookies);
URL重定向资源=新URL(URL);
append(url);
connection=(HttpURLConnection)重定向资源。openConnection();
connection.setRequestMethod(“GET”);
将cookies添加到连接(连接、cookies);
connection.setInstanceFollowDirections(false);
connection.setUseCaches(false);
connection.setDoInput(true);
url=connection.getHeaderField(“位置”);
连接断开();
}
//到达最终地点
InputStream is=connection.getInputStream();
BufferedReader rd=新的BufferedReader(新的InputStreamReader(is));
弦线;
而((line=rd.readLine())!=null){
响应。追加(行);
append('\r');
}
rd.close();
返回response.toString();
}
静态void addCookiesToConnection(HttpURLConnection,
地图存储){
StringBuilder cookieStringBuilder=新StringBuilder();
对于(条目e:storage.entrySet()){
append(e.getKey());
cookieStringBuilder.append(“”);
append(e.getValue());
cookieStringBuilder.append(“;”);
}
c、 setRequestProperty(“Cookies”,cookieStringBuilder.toString());
}
静态void getCookiesFromConnection(HttpURLConnection,
地图存储){
Map headers=c.getHeaderFields();
对于(条目e:headers.entrySet()){
if(例如getKey().equalsIgnoreCase(“设置Cookie”)){
for(字符串cookieHeader:e.getValue()){
字符串cookie=cookieHeader.substring(0,
cookieHeader.indexOf(“;”);
字符串key=cookie.substring(0,cookie.indexOf(“=”);
字符串值=cookie.substring(cookie.indexOf(“=”)+1);
存储。放置(键、值);
}
}
}
}
}
我的目标是手动处理重定向,并将cookies传递到最终页面。它在开发服务器上运行良好,但我认为这不是我的代码在起作用,而是本地服务器上的默认行为。有人有在生产服务器上实现此类功能的经验吗?我对java.net软件包非常缺乏经验,因此我可能离解决方案还很远


我最初尝试在围棋中实现这一点,但我得到了相同的结果,并认为这只是我对围棋完全缺乏经验。由于Jsoup,Java将更容易抓取html,但我并不反对使用python或继续使用它,如果这会使它变得更容易的话。这是一个大项目中相当小的一部分,我还没到可以转换的程度。

经过几天的努力,我发现了这个 这正是我想用python做的。我已经决定在这个项目中使用python,我将使用BeautifulSoup进行html抓取。仍然不确定最初我的代码出了什么问题