Android 从网页获取文本到字符串

Android 从网页获取文本到字符串,android,string,text,webpage,Android,String,Text,Webpage,我是Android新手,我想把网页上的全部文本转换成字符串。我发现了很多类似的问题,但正如我所说,我是Android新手,不知道如何在我的应用程序中使用它们。我有错误。我只有一种方法能够让它工作,它使用WebView和JavaScript,速度非常慢。有人能告诉我一些其他的方法来做到这一点,或者如何加快网络视图,因为我根本不使用它来查看内容。 顺便说一句,我添加了以下代码来加速WebView webView.getSettings().setJavaScriptEnabled(true);

我是Android新手,我想把网页上的全部文本转换成字符串。我发现了很多类似的问题,但正如我所说,我是Android新手,不知道如何在我的应用程序中使用它们。我有错误。我只有一种方法能够让它工作,它使用WebView和JavaScript,速度非常慢。有人能告诉我一些其他的方法来做到这一点,或者如何加快网络视图,因为我根本不使用它来查看内容。 顺便说一句,我添加了以下代码来加速WebView

webView.getSettings().setJavaScriptEnabled(true); 
    webView.getSettings().setBlockNetworkImage(true);
    webView.getSettings().setJavaScriptCanOpenWindowsAutomatically(false);
    webView.getSettings().setPluginsEnabled(false);
    webView.getSettings().setSupportMultipleWindows(false);
    webView.getSettings().setSupportZoom(false);
    webView.getSettings().setSavePassword(false);
    webView.setVerticalScrollBarEnabled(false);
    webView.setHorizontalScrollBarEnabled(false);
    webView.getSettings().setAppCacheEnabled(false);
    webView.getSettings().setCacheMode(WebSettings.LOAD_NO_CACHE);

如果您知道其他比使用WebView更好更快的解决方案,请向我提供主要活动的全部源代码,或者解释我应该在哪里编写,这样我就不会出错。

鉴于您对查看内容根本不感兴趣,请尝试使用以下方法:

为了从URL获取源代码,您可以使用:

HttpClient httpclient = new DefaultHttpClient(); // Create HTTP Client
HttpGet httpget = new HttpGet("http://yoururl.com"); // Set the action you want to do
HttpResponse response = httpclient.execute(httpget); // Executeit
HttpEntity entity = response.getEntity(); 
InputStream is = entity.getContent(); // Create an InputStream with the response
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "iso-8859-1"), 8);
StringBuilder sb = new StringBuilder();
String line = null;
while ((line = reader.readLine()) != null) // Read line by line
    sb.append(line + "\n");

String resString = sb.toString(); // Result is here

is.close(); // Close the stream

请确保在或中从主UI线程运行此代码。

这是我通常用于从internet下载字符串的代码

class RequestTask extends AsyncTask<String, String, String>{

@Override
// username, password, message, mobile
protected String doInBackground(String... url) {
    // constants
    int timeoutSocket = 5000;
    int timeoutConnection = 5000;

    HttpParams httpParameters = new BasicHttpParams();
    HttpConnectionParams.setConnectionTimeout(httpParameters, timeoutConnection);
    HttpConnectionParams.setSoTimeout(httpParameters, timeoutSocket);
    HttpClient client = new DefaultHttpClient(httpParameters);

    HttpGet httpget = new HttpGet(url[0]);

    try {
        HttpResponse getResponse = client.execute(httpget);
        final int statusCode = getResponse.getStatusLine().getStatusCode();

        if(statusCode != HttpStatus.SC_OK) {
            Log.w("MyApp", "Download Error: " + statusCode + "| for URL: " + url);
            return null;
        }

        String line = "";
        StringBuilder total = new StringBuilder();

        HttpEntity getResponseEntity = getResponse.getEntity();

        BufferedReader reader = new BufferedReader(new InputStreamReader(getResponseEntity.getContent()));  

        while((line = reader.readLine()) != null) {
            total.append(line);
        }

        line = total.toString();
        return line;
    } catch (Exception e) {
        Log.w("MyApp", "Download Exception : " + e.toString());
    }
    return null;
}

@Override
protected void onPostExecute(String result) {
    // do something with result
}
}
class RequestTask扩展了AsyncTask{
@凌驾
//用户名、密码、消息、手机
受保护的字符串doInBackground(字符串…url){
//常数
int timeoutSocket=5000;
int timeoutConnection=5000;
HttpParams httpParameters=新的BasicHttpParams();
HttpConnectionParams.setConnectionTimeout(httpParameters,timeoutConnection);
HttpConnectionParams.setSoTimeout(httpParameters,timeoutSocket);
HttpClient=新的默认HttpClient(httpParameters);
HttpGet HttpGet=新的HttpGet(url[0]);
试一试{
HttpResponse getResponse=client.execute(httpget);
final int statusCode=getResponse.getStatusLine().getStatusCode();
if(statusCode!=HttpStatus.SC\u OK){
Log.w(“MyApp”,“下载错误:+statusCode+”|表示URL:+URL);
返回null;
}
字符串行=”;
StringBuilder总计=新StringBuilder();
HttpEntity getResponseEntity=getResponse.getEntity();
BufferedReader reader=新的BufferedReader(新的InputStreamReader(getResponseEntity.getContent());
而((line=reader.readLine())!=null){
合计.追加(行);
}
行=总计.toString();
回流线;
}捕获(例外e){
w(“MyApp”,“下载异常:+e.toString());
}
返回null;
}
@凌驾
受保护的void onPostExecute(字符串结果){
//做一些有结果的事情
}
}
您可以使用

newrequesttask()。执行(“http://www.your-get-url.com/");

使用以下命令:

public class ReadWebpageAsyncTask extends Activity {
    private TextView textView;

    /** Called when the activity is first created. */
    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.main);
        textView = (TextView) findViewById(R.id.TextView01);
    }

    private class DownloadWebPageTask extends AsyncTask<String, Void, String> {
        @Override
        protected String doInBackground(String... urls) {
            String response = "";
            for (String url : urls) {
                DefaultHttpClient client = new DefaultHttpClient();
                HttpGet httpGet = new HttpGet(url);
                try {
                    HttpResponse execute = client.execute(httpGet);
                    InputStream content = execute.getEntity().getContent();

                    BufferedReader buffer = new BufferedReader(
                            new InputStreamReader(content));
                    String s = "";
                    while ((s = buffer.readLine()) != null) {
                        response += s;
                    }

                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            return response;
        }

        @Override
        protected void onPostExecute(String result) {
            textView.setText(Html.fromHtml(result));
        }
    }

    public void readWebpage(View view) {
        DownloadWebPageTask task = new DownloadWebPageTask();
        task.execute(new String[] { "http://www.google.com" });

    }
}
公共类ReadWebpageAsyncTask扩展活动{
私有文本视图文本视图;
/**在首次创建活动时调用*/
@凌驾
创建时的公共void(Bundle savedInstanceState){
super.onCreate(savedInstanceState);
setContentView(R.layout.main);
textView=(textView)findViewById(R.id.TextView01);
}
私有类下载WebPagetTask扩展异步任务{
@凌驾
受保护的字符串doInBackground(字符串…URL){
字符串响应=”;
for(字符串url:url){
DefaultHttpClient=新的DefaultHttpClient();
HttpGet HttpGet=新的HttpGet(url);
试一试{
HttpResponse execute=client.execute(httpGet);
InputStream内容=execute.getEntity().getContent();
BufferedReader buffer=新的BufferedReader(
新的InputStreamReader(内容));
字符串s=“”;
而((s=buffer.readLine())!=null){
响应+=s;
}
}捕获(例外e){
e、 printStackTrace();
}
}
返回响应;
}
@凌驾
受保护的void onPostExecute(字符串结果){
setText(Html.fromHtml(result));
}
}
公共作废阅读网页(查看){
DownloadWebPagetTask=新建DownloadWebPagetTask();
task.execute(新字符串[]{”http://www.google.com" });
}
}
main.xml

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:orientation="vertical"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    >

    <Button android:layout_height="wrap_content" android:layout_width="match_parent" android:id="@+id/readWebpage" android:onClick="readWebpage" android:text="Load Webpage"></Button>
    <TextView android:id="@+id/TextView01" android:layout_width="match_parent" android:layout_height="match_parent" android:text="Example Text"></TextView>
</LinearLayout>


据我所知,我应该将新的RequestTask()放在onCreate()中,而将其他代码放在onCreate()之外,对吗?如果这是正确的,我得到的标签上的错误-标签不能解析为一个变量。如何解决这个问题?字符串行是页面右侧的全部文本?是的。字符串行是作为字符串的整个响应。TAG只是我之前定义的一个字符串变量。把它换成你的巧克力串。我修改了我的答案。是的,当你想开始下载时,你可以把新的RequestTask()放在任何地方。如果你只想从特定的html元素中提取文本,你可以看看。如何读取@K_Anas的内容并在一些文本视图中显示-nulll