Java JSoup-提取表数据错误

Java JSoup-提取表数据错误,java,html,arrays,parsing,jsoup,Java,Html,Arrays,Parsing,Jsoup,我刚刚开始了一个小项目,我需要收集全球货币对的历史数据。根据这个问题的答案,我将代码粘贴到下面 然而,尽管“数据”元素数组的大小为7,我还是不断收到一个IndexOutOfBoundException 我挠头已经快一个小时了,如果有人能指出我哪里出了问题,我将不胜感激 主类 import java.util.ArrayList; import java.util.List; import java.io.IOException; import org.jsoup.*; import org.j

我刚刚开始了一个小项目,我需要收集全球货币对的历史数据。根据这个问题的答案,我将代码粘贴到下面

然而,尽管“数据”元素数组的大小为7,我还是不断收到一个
IndexOutOfBoundException

我挠头已经快一个小时了,如果有人能指出我哪里出了问题,我将不胜感激

主类

import java.util.ArrayList;
import java.util.List;
import java.io.IOException;

import org.jsoup.*;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


public class MainClass {


public static void main(String[] args) throws IOException{

    Document doc = Jsoup.connect("http://www.myfxbook.com/forex-market/currencies/GBPUSD-historical-data").get();

    Element table = doc.getElementById("symbolMarket");

    List<Entry> entries = new ArrayList<Entry>();

    for(Element row : table.select("tr")){

        int index = 0;
        Entry tableEntry = new Entry();
        Elements data = row.select("td");

        tableEntry.setDate(data.get(index++).text());
        tableEntry.setOpen(data.get(index++).text());
        tableEntry.setHigh(data.get(index++).text());
        tableEntry.setLow(data.get(index++).text());
        tableEntry.setClose(data.get(index++).text());
        tableEntry.setChangePips(data.get(index++).text());
        tableEntry.setChangePercent(data.get(index++).text());

        entries.add(tableEntry);

    }

}

}
public class Entry {

private String date;
private String open;
private String high;
private String low;
private String close;
private String changePips;
private String changePercent;

public String getDate() {
    return date;
}
public void setDate(String date) {
    this.date = date;
}
public String getOpen() {
    return open;
}
public void setOpen(String open) {
    this.open = open;
}
public String getHigh() {
    return high;
}
public void setHigh(String high) {
    this.high = high;
}
public String getLow() {
    return low;
}
public void setLow(String low) {
    this.low = low;
}
public String getClose() {
    return close;
}
public void setClose(String close) {
    this.close = close;
}
public String getChangePips() {
    return changePips;
}
public void setChangePips(String changePips) {
    this.changePips = changePips;
}
public String getChangePercent() {
    return changePercent;
}
public void setChangePercent(String changePercent) {
    this.changePercent = changePercent;
}



}
然而,尽管“数据”元素数组的大小为7,我还是不断收到一个
IndexOutOfBoundException

如果这是真的,你就不会看到这个例外

问题是第一行没有任何
td
,而是
th
(表头),所以对于这一行
行,select(“td”)
0
个匹配
td
选择器的元素,并通过异常信息通知您

java.lang.IndexOutOfBoundsException:索引:0,大小:0

要解决这个问题,您可以忽略第一行,或者显式地选择
tr
哪个
至少有一个
td
作为子元素

for(Element row : table.select("tr:has(td)")){
    //                            ^^^^^^^^
    ...
}
您还可以在对其应用任何操作之前手动测试
数据的大小
存储
td

for(Element row : table.select("tr")){
    Elements data = row.select("td");

    if(data.size()==7){

        int index = 0;
        Entry tableEntry = new Entry();

        tableEntry.setDate(data.get(index++).text());
        tableEntry.setOpen(data.get(index++).text());
        tableEntry.setHigh(data.get(index++).text());
        tableEntry.setLow(data.get(index++).text());
        tableEntry.setClose(data.get(index++).text());
        tableEntry.setChangePips(data.get(index++).text());
        tableEntry.setChangePercent(data.get(index++).text());

        entries.add(tableEntry);
    }
}

您正在尝试从表标题获取数据。。。你必须跳过它

public static void main(String[] args) throws IOException {
        Document doc = Jsoup.connect("http://www.myfxbook.com/forex-market/currencies/GBPUSD-historical-data").get();

        Element table = doc.getElementById("symbolMarket");

        List<Entry> entries = new ArrayList<Entry>();

        Elements elements = table.select("tr");
        Iterator<Element> itr = elements.iterator();
        itr.next(); //skip header data

        while ( itr.hasNext() ) {
            int index = 0;
            Entry tableEntry = new Entry();
            Elements data = itr.next().select("td");

            tableEntry.setDate(data.get(index++).text());
            tableEntry.setOpen(data.get(index++).text());
            tableEntry.setHigh(data.get(index++).text());
            tableEntry.setLow(data.get(index++).text());
            tableEntry.setClose(data.get(index++).text());
            tableEntry.setChangePips(data.get(index++).text());
            tableEntry.setChangePercent(data.get(index++).text());
            entries.add(tableEntry);

        }       




    }
publicstaticvoidmain(字符串[]args)引发IOException{
Document doc=Jsoup.connect(“http://www.myfxbook.com/forex-market/currencies/GBPUSD-historical-data).get();
元素表=doc.getElementById(“符号市场”);
列表项=新的ArrayList();
元素=表格。选择(“tr”);
迭代器itr=elements.Iterator();
itr.next();//跳过标题数据
while(itr.hasNext()){
int指数=0;
Entry tableEntry=新条目();
元素数据=itr.next()。选择(“td”);
tableEntry.setDate(data.get(index++).text());
tableEntry.setOpen(data.get(index++).text());
tableEntry.setHigh(data.get(index++).text());
tableEntry.setLow(data.get(index++).text());
tableEntry.setClose(data.get(index++).text());
tableEntry.setChangePips(data.get(index++.text());
tableEntry.setChangePercent(data.get(index++).text());
条目。添加(表格条目);
}       
}