Java 使用Hadoop MapReduce的ArrayIndexOutofBoundsException_Java_Hadoop_Mapreduce_Indexoutofboundsexception

Java 使用Hadoop MapReduce的ArrayIndexOutofBoundsException
java hadoop mapreduce
Java 使用Hadoop MapReduce的ArrayIndexOutofBoundsException,java,hadoop,mapreduce,indexoutofboundsexception,Java,Hadoop,Mapreduce,Indexoutofboundsexception,我在String temp=word[5]旁边获取ArrayIndexOutofBoundsException在我的映射器中我对此进行了研究，我知道错误来自何处（当输入数据为空或长度小于或大于代码中指定的索引时。我的数据有一些空单元格值）我尝试使用以下代码捕获数组索引错误，但它仍然给我错误 import java.io.IOException; import java.util.*; import org.apache.hadoop.io.*; import org.apache.hado
我在
String temp=word[5]旁边获取ArrayIndexOutofBoundsException
在我的映射器中
我对此进行了研究，我知道错误来自何处（当输入数据为空或长度小于或大于代码中指定的索引时。我的数据有一些空单元格值）
我尝试使用以下代码捕获数组索引错误，但它仍然给我错误
import java.io.IOException;
import java.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

public class AvgMaxTempMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, DoubleWritable> {

  public void map(LongWritable key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {


    String line = value.toString();

    if(line != null && !line.isEmpty() && str.matches(".*\\d+.*"));
        String [] word = line.split(",");
        String month = word[3];
        String temp = word[5];
        if (temp.length() > 1 && temp.length() < 5){
            Double avgtemp = Double.parseDouble(temp);


        output.collect(new Text(month),  new DoubleWritable(avgtemp));
    }
  }
}       

import java.io.IOException；
导入java.util.*；
导入org.apache.hadoop.io.*；
导入org.apache.hadoop.mapred.*；
公共类AvgMaxTempMapper扩展MapReduceBase实现Mapper{
公共void映射（LongWritable键、文本值、OutputCollector输出、Reporter报告器）引发IOException{
字符串行=value.toString（）；
如果（line！=null&&！line.isEmpty（）&&str.matches（“.\\d+.”）；
String[]word=line.split（“，”）；
字符串月份=字[3]；
字符串温度=字[5]；
如果（温度长度（）>1和温度长度（）<5）{
Double avgtemp=Double.parseDouble（temp）；
collect（新文本（月），新双写（avgtemp））；
}
}
}       

如果你能给我一些提示或提示，告诉我错误是在这段代码中，还是我应该去别的地方看看，那将节省很多压力
 通过在方法签名中抛出异常，基本上会导致整个映射程序在遇到单个“坏”数据行时停止。实际上，您要做的是让映射程序忽略该行数据，但继续处理其他行
您应该在split（）
之后立即检查word[]
的长度。如果不够长，请停止处理该行。您还需要在提取后检查month
和temp
是否有效。那么：
String [] word = line.split(",");
if (word == null || word.length < 6) {
    break;
}

String month = word[3];
if (month != null) {
    break;
}

String temp = word[5];

if (temp != null && temp.length() > 1 && temp.length() < 5) {
    try {
        Double avgtemp = Double.parseDouble(temp);
    } catch (NumberFormatException ex) {
        //Log that you've seen a dodgy temperature
        break;
    }
    output.collect(new Text(month), new DoubleWritable(avgtemp));
}

String[]word=line.split（“，”）；
if（word==null | | word.length<6）{
打破
}
字符串月份=字[3]；
如果（月！=null）{
打破
}
字符串温度=字[5]；
如果（temp！=null&&temp.length（）>1&&temp.length（）<5）{
试一试{
Double avgtemp=Double.parseDouble（temp）；
}捕获（NumberFormatException ex）{
//记录你已经看到了不稳定的温度
打破
}
collect（新文本（月），新双写（avgtemp））；
}

在MapReduce作业中验证数据是非常重要的，因为您永远无法保证将得到什么作为输入
您可能还想看看ApacheCommonsStringUtils
和ArrayUtils
类-它们提供了StringUtils.isEmpty（temp）
和ArrayUtils.isEmpty（word）
等方法，这些方法将整理上述内容。
通过在方法签名中抛出异常，基本上，只要遇到一行“坏”数据，就会导致整个映射程序停止。实际上，您要做的是让映射程序忽略该行数据，但继续处理其他行
您应该在split（）
之后立即检查word[]
的长度。如果不够长，请停止处理该行。您还需要在提取后检查month
和temp
是否有效。那么：
String [] word = line.split(",");
if (word == null || word.length < 6) {
    break;
}

String month = word[3];
if (month != null) {
    break;
}

String temp = word[5];

if (temp != null && temp.length() > 1 && temp.length() < 5) {
    try {
        Double avgtemp = Double.parseDouble(temp);
    } catch (NumberFormatException ex) {
        //Log that you've seen a dodgy temperature
        break;
    }
    output.collect(new Text(month), new DoubleWritable(avgtemp));
}

String[]word=line.split（“，”）；
if（word==null | | word.length<6）{
打破
}
字符串月份=字[3]；
如果（月！=null）{
打破
}
字符串温度=字[5]；
如果（temp！=null&&temp.length（）>1&&temp.length（）<5）{
试一试{
Double avgtemp=Double.parseDouble（temp）；
}捕获（NumberFormatException ex）{
//记录你已经看到了不稳定的温度
打破
}
collect（新文本（月），新双写（avgtemp））；
}

在MapReduce作业中验证数据是非常重要的，因为您永远无法保证将得到什么作为输入
您可能还想看看ApacheCommonsStringUtils
和ArrayUtils
类-它们提供了StringUtils.isEmpty（temp）
和ArrayUtils.isEmpty（word）
等方法，可以整理上述内容。我建议使用自定义计数器，每次你发现一个空单元格，你就会增加这个值。这将为您提供数据中存在多少这样的行的图片。
除其他一些效率改进外，我的建议如下：
import java.io.IOException;  //do you still need this?
import java.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

public class AvgMaxTempMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, DoubleWritable> {

  public static enum STATS {MISSING_VALUE};
  private Text outKey = new Text();
  private DoubleWritable outValue = new DoubleWritable();      

  public void map(LongWritable key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {


    String line = value.toString();

    if(line.matches(".*\\d+.*"));
        String [] word = line.split(",");
        if (word.length < 6) { //or whatever else you consider expected
            reporter.incrCounter(STATS.MISSING_VALUE,1); //you can also print/log an error message if you like                
            return;
        }
        String month = word[3];
        String temp = word[5];
        if (temp.length() > 1 && temp.length() < 5){
            Double avgtemp = Double.parseDouble(temp);                  
            outKey.set(month);
            outValue.set(avgtemp);
            output.collect(outKey, outValue);
        } //you were missing this '}'
    }
  }

}

import java.io.IOException//你还需要这个吗？
导入java.util.*；
导入org.apache.hadoop.io.*；
导入org.apache.hadoop.mapred.*；
公共类AvgMaxTempMapper扩展MapReduceBase实现Mapper{
公共静态枚举统计数据{缺少_值}；
私有文本输出键=新文本（）；
private DoubleWritable outValue=新的DoubleWritable（）；
公共void映射（LongWritable键、文本值、OutputCollector输出、Reporter报告器）引发IOException{
字符串行=value.toString（）；
if（line.matches（“.\\d+.”）；
String[]word=line.split（“，”）；
如果（Word.Load＜6）{//或你想得到的任何其他东西
reporter.incrCounter（STATS.MISSING_VALUE，1）；//如果愿意，还可以打印/记录错误消息
返回；
}
字符串月份=字[3]；
字符串温度=字[5]；
如果（温度长度（）>1和温度长度（）<5）{
Double avgtemp=Double.parseDouble（temp）；
outKey.set（月）；
输出值设置（avgtemp）；
输出.收集（输出键，输出值）；
}//您缺少这个“}”
}
}
}
我建议使用自定义计数器，每次发现空单元格时，计数器都会增加。这将为您提供数据中存在多少这样的行的图片。
除其他一些效率改进外，我的建议如下：
import java.io.IOException;  //do you still need this?
import java.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

public class AvgMaxTempMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, DoubleWritable> {

  public static enum STATS {MISSING_VALUE};
  private Text outKey = new Text();
  private DoubleWritable outValue = new DoubleWritable();      

  public void map(LongWritable key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException {


    String line = value.toString();

    if(line.matches(".*\\d+.*"));
        String [] word = line.split(",");
        if (word.length < 6) { //or whatever else you consider expected
            reporter.incrCounter(STATS.MISSING_VALUE,1); //you can also print/log an error message if you like                
            return;
        }
        String month = word[3];
        String temp = word[5];
        if (temp.length() > 1 && temp.length() < 5){
            Double avgtemp = Double.parseDouble(temp);                  
            outKey.set(month);
            outValue.set(avgtemp);
            output.collect(outKey, outValue);
        } //you were missing this '}'
    }
  }

}

import java.io.IOException//你还需要这个吗？
导入java.util.*；
导入org.apache.hadoop.io.*；
导入org.apache.hadoop.mapre