java中map reduce作业的列表索引超出范围

java中map reduce作业的列表索引超出范围,java,apache,hadoop,mapreduce,hortonworks-data-platform,Java,Apache,Hadoop,Mapreduce,Hortonworks Data Platform,我刚刚开始学习hadoop,并编写了一个map reduce作业,当我在hortonworks sandbox上运行该作业时,我得到一个错误,如下所示: IndexError at /jobbrowser/jobs/job_1415606658676_0002/single_logs list index out of range Request Method: GET Request URL: http://localhost/jobbrowser/jobs/job_1415606658

我刚刚开始学习hadoop,并编写了一个map reduce作业,当我在hortonworks sandbox上运行该作业时,我得到一个错误,如下所示:

IndexError at /jobbrowser/jobs/job_1415606658676_0002/single_logs
list index out of range
Request Method: GET
Request URL:    http://localhost/jobbrowser/jobs/job_1415606658676_0002/single_logs
Django Version: 1.2.3
Exception Type: IndexError
Exception Value:    
list index out of range
Exception Location: /usr/lib/hue/apps/jobbrowser/src/jobbrowser/views.py in job_single_logs, line 266
Python Executable:  /usr/bin/python2.6
Python Version: 2.6.6
Python Path:    ['', '/usr/lib/hue/build/env/lib/python2.6/site-packages/setuptools-0.6c11-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/pip-0.6.3-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Babel-0.9.6-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/BabelDjango-0.2.2-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Django-1.2.3-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Mako-0.7.2-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Markdown-2.0.3-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/MarkupSafe-0.9.3-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/MySQL_python-1.2.3c1-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Paste-1.7.2-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/PyYAML-3.09-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Pygments-1.3.1-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/South-0.7-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/Spawning-0.9.6-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/avro-1.5.0-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/configobj-4.6.0-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/django_auth_ldap-1.0.7-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/django_extensions-0.5-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/django_nose-0.5-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/elementtree-1.2.6_20050316-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/enum-0.4.4-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/eventlet-0.9.14-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/greenlet-0.3.1-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/happybase-0.6-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/kerberos-1.1.1-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/lockfile-0.8-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/lxml-2.2.2-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/moxy-1.0.0-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/pam-0.1.3-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/pyOpenSSL-0.13-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/pycrypto-2.6-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/pysqlite-2.5.5-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/python_daemon-1.5.1-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/python_ldap-2.3.13-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/pytidylib-0.2.1-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/sasl-0.1.1-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/sh-1.08-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/simplejson-2.0.9-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/threadframe-0.2-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/thrift-0.9.0-py2.6-linux-x86_64.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/urllib2_kerberos-0.1.6-py2.6.egg', '/usr/lib/hue/build/env/lib/python2.6/site-packages/xlrd-0.9.0-py2.6.egg', '/usr/lib/hue/desktop/core/src', '/usr/lib/hue/desktop/libs/hadoop/src', '/usr/lib/hue/desktop/libs/liboozie/src', '/usr/lib/hue/build/env/lib/python2.6/site-packages', '/usr/lib/hue/apps/about/src', '/usr/lib/hue/apps/beeswax/src', '/usr/lib/hue/apps/filebrowser/src', '/usr/lib/hue/apps/hcatalog/src', '/usr/lib/hue/apps/help/src', '/usr/lib/hue/apps/jobbrowser/src', '/usr/lib/hue/apps/jobsub/src', '/usr/lib/hue/apps/oozie/src', '/usr/lib/hue/apps/pig/src', '/usr/lib/hue/apps/proxy/src', '/usr/lib/hue/apps/shell/src', '/usr/lib/hue/apps/useradmin/src', '/usr/lib/hue/build/env/bin', '/usr/lib64/python2.6', '/usr/lib64/python2.6/plat-linux2', '/usr/lib64/python2.6/lib-dynload', '/usr/lib64/python2.6/site-packages', '/usr/lib/python2.6/site-packages', '/usr/lib/python2.6/site-packages/setuptools-0.6c11-py2.6.egg-info', '/usr/lib/hue/apps/beeswax/gen-py', '/usr/lib/hue', '/usr/lib64/python26.zip', '/usr/lib64/python2.6/lib-tk', '/usr/lib64/python2.6/lib-old', '/usr/lib/python2.6/site-packages/setuptools-0.6c11-py2.6.egg-info', '/usr/lib/python2.6/site-packages/setuptools-0.6c11-py2.6.egg-info', '/usr/lib/hue/apps/beeswax/src/beeswax/../../gen-py', '/usr/lib/hue/apps/jobbrowser/src/jobbrowser/../../gen-py', '/usr/lib/hue/apps/proxy/src/proxy/../../gen-py']
映射器类如下所示:

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

 package electionfraudhadoopvotechange;

 /**
  *
  * @author Rohan
  */
  import java.io.IOException;
  import org.apache.hadoop.io.LongWritable;
  import org.apache.hadoop.io.Text;
  import org.apache.hadoop.mapred.OutputCollector;
  import org.apache.hadoop.mapred.Reporter;
  import org.apache.hadoop.mapreduce.Mapper;

  public class VoteChangeMapper extends Mapper<Object, Text, Text, Text> {

  public void map(Object key, Text value,
                    OutputCollector<Text, Text> outputCollector, Reporter reporter) throws        IOException,
        InterruptedException {

    String[] words = value.toString().split("\t");
    outputCollector.collect(new Text(words[0]), new Text(words[2]));
    }
}
    /*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package electionfraudhadoopvotechange;

import java.io.IOException;
import java.util.Iterator;
import java.util.Vector;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Reducer;

/**
 *
 * @author Rohan
 */
public class VoteChangeReducer extends Reducer<Text, Text, Text, Text> 
{

    public void reduce(Text key, Iterator<Text> values,
                        OutputCollector<Text, Text> results, Reporter reporter)
            throws IOException, InterruptedException 
    {
        Vector<Text> votes = new Vector<Text>();
        while(values.hasNext())
        {
            votes.add(values.next());
        }
        if(votes.size() == 2)
        {
            results.collect(key, new Text(votes.elementAt(0).toString()+"->"+votes.elementAt(1).toString()));
        }

        votes.clear();
    }
}
package electionfraudhadoopvotechange;

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

import electionfraudhadoopvotechange.VoteChangeMapper;
import electionfraudhadoopvotechange.VoteChangeReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


/**
 *
 * @author Rohan
 */
public class ElectionFraudHadoopVoteChange extends Configured implements Tool{

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new ElectionFraudHadoopVoteChange(), args);
        System.exit(res);       
    }

    public int run(String[] args) throws Exception {
        if (args.length != 3) {
            System.out.println("usage: [2006 input] [2008 input] [output]");
            System.exit(-1);
        }

        Job job = Job.getInstance(new Configuration());
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(VoteChangeMapper.class);
        job.setReducerClass(VoteChangeReducer.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileInputFormat.addInputPath(job, new Path(args[1]));

        FileOutputFormat.setOutputPath(job, new Path(args[2]));

        job.setJarByClass(ElectionFraudHadoopVoteChange.class);

        job.submit();
        return 0;
    }
}
我的输入是两个只包含整数的文本文件,我正在hortonworks沙箱上运行一个单节点集群。我试了很多次调试,但都没有成功。感谢您的帮助。非常感谢。

是否尝试删除

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class); 

从你的代码?如果使用format类,还应创建RecordReader,以便它了解如何读取文本。当您不设置FormatClass时,Hadoop将以字符串形式读取该行,我认为这是您想要的。

您能给我看一下您的输入示例吗?int1\t int2\t int3