Hadoop MapReduce旧API-将命令行参数传递到映射_Hadoop_Mapreduce

Hadoop MapReduce旧API-将命令行参数传递到映射

hadoop mapreduce

Hadoop MapReduce旧API-将命令行参数传递到映射,hadoop,mapreduce,Hadoop,Mapreduce,我正在编写一个MapReduce作业，用于在使用旧API存储在HDFS中的输入文件中查找搜索字符串（通过命令行参数传递）的出现情况下面是我的驾驶课- public class StringSearchDriver { public static void main(String[] args) throws IOException { JobConf jc = new JobConf(StringSearchDriver.class); jc.

我正在编写一个MapReduce作业，用于在使用旧API存储在HDFS中的输入文件中查找搜索字符串（通过命令行参数传递）的出现情况

下面是我的驾驶课-

public class StringSearchDriver
{

    public static void main(String[] args) throws IOException
    {
        JobConf jc = new JobConf(StringSearchDriver.class);
        jc.set("SearchWord", args[2]);
        jc.setJobName("String Search");
        FileInputFormat.addInputPath(jc, new Path(args[0]));
        FileOutputFormat.setOutputPath(jc, new Path(args[1]));
        jc.setMapperClass(StringSearchMap.class);
        jc.setReducerClass(StringSearchReduce.class);
        jc.setOutputKeyClass(Text.class);
        jc.setOutputValueClass(IntWritable.class);
        JobClient.runJob(jc);
    }
}

下面是我的Mapper类-

public class StringSearchMap extends MapReduceBase implements
        Mapper<LongWritable, Text, Text, IntWritable>
{
    String searchWord;

    public void configure(JobConf jc)
    {
        searchWord = jc.get("SearchWord");

    }



    @Override
    public void map(LongWritable key, Text value,  
            OutputCollector<Text, IntWritable> out, Reporter reporter)
            throws IOException
    {
        String[] input = value.toString().split("");

        for(String word:input)
        {
            if (word.equalsIgnoreCase(searchWord))
                out.collect(new Text(word), new IntWritable(1));
        }
    }

}

公共类StringSearchMap扩展了MapReduceBase实现
制图员
{
字符串搜索词；
公共void配置（JobConf jc）
{
searchWord=jc.get（“searchWord”）；
}
@凌驾
公共无效映射（可长写键、文本值、，
输出收集器输出，报告器）
抛出IOException
{
字符串[]输入=value.toString（）.split（“”）；
for（字符串字：输入）
{
if（word.equalsIgnoreCase（searchWord））
out.collect（新文本（word）、新可写（1））；
}
}
}

在运行作业时（传递的命令行字符串为“hi”），我得到以下错误-

14/09/21 22:35:41 INFO mapred.JobClient: Task Id : attempt_201409212134_0005_m_000001_2, Status : FAILED
java.lang.ClassCastException: interface javax.xml.soap.Text
    at java.lang.Class.asSubclass(Class.java:3129)
    at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:795)
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:964)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:422)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

14/09/21 22:35:41信息映射。作业客户端：任务Id:尝试\u 201409212134\u 0005\u m\u000001\u 2，状态：失败
java.lang.ClassCastException:接口javax.xml.soap.Text
在java.lang.Class.asSubclass（Class.java:3129）中
位于org.apache.hadoop.mapred.JobConf.getOutputKeyComparator（JobConf.java:795）
位于org.apache.hadoop.mapred.MapTask$MapOutputBuffer。（MapTask.java:964）
位于org.apache.hadoop.mapred.MapTask.runOldMapper（MapTask.java:422）
位于org.apache.hadoop.mapred.MapTask.run（MapTask.java:366）
位于org.apache.hadoop.mapred.Child$4.run（Child.java:255）
位于java.security.AccessController.doPrivileged（本机方法）
位于javax.security.auth.Subject.doAs（Subject.java:416）
位于org.apache.hadoop.security.UserGroupInformation.doAs（UserGroupInformation.java:1190）
位于org.apache.hadoop.mapred.Child.main（Child.java:249）

请建议。

文本；所需的hadoop包是org.apache.hadoop.io。。 检查您的包裹

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

您自动导入了错误的导入。而不是导入org.apache.hadoop.io.Text您导入javax.xml.soap.Text

您可以在这个示例中找到错误的导入

有一点，最好采用新API

编辑

我使用了新的Api

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * @author Unmesha sreeveni
 * @Date 23 sep 2014
 */
public class StringSearchDriver extends Configured implements Tool {
    public static class Map extends
    Mapper<LongWritable, Text, Text, IntWritable> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            Configuration conf = context.getConfiguration();
            String line = value.toString();
            String searchString = conf.get("word");
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if(token.equals(searchString)){
                    word.set(token);
                    context.write(word, one);
                }

            }
        }
    }

    public static class Reduce extends
    Reducer<Text, IntWritable, Text, IntWritable> {

        public void reduce(Text key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {

            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            context.write(key, new IntWritable(sum));
        }
    }
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int res = ToolRunner.run(conf, new StringSearchDriver(), args);
        System.exit(res);

    }
    @Override
    public int run(String[] args) throws Exception {
        // TODO Auto-generated method stub
        if (args.length != 3) {
            System.out
            .printf("Usage: Search String <input dir> <output dir> <search word> \n");
            System.exit(-1);
        }

        String source = args[0];
        String dest = args[1];
        String searchword = args[2];
        Configuration conf = new Configuration();
        conf.set("word", searchword);
        Job job = new Job(conf, "Search String");
        job.setJarByClass(StringSearchDriver.class);
        FileSystem fs = FileSystem.get(conf);

        Path in =new Path(source);
        Path out =new Path(dest);
        if (fs.exists(out)) {
            fs.delete(out, true);
        }

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job, in);
        FileOutputFormat.setOutputPath(job, out);
        boolean sucess = job.waitForCompletion(true);
        return (sucess ? 0 : 1);
    }
}

import java.io.IOException；
导入java.util.StringTokenizer；
导入org.apache.hadoop.conf.Configuration；
导入org.apache.hadoop.conf.Configured；
导入org.apache.hadoop.fs.FileSystem；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.IntWritable；
导入org.apache.hadoop.io.LongWritable；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.Job；
导入org.apache.hadoop.mapreduce.Mapper；
导入org.apache.hadoop.mapreduce.Reducer；
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat；
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat；
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat；
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat；
导入org.apache.hadoop.util.Tool；
导入org.apache.hadoop.util.ToolRunner；
/**
*@作者埃米沙·斯雷韦尼
*@日期2014年9月23日
*/
公共类StringSearchDriver扩展配置的实现工具{
公共静态类映射扩展
制图员{
私有最终静态IntWritable one=新的IntWritable（1）；
私有文本字=新文本（）；
公共void映射（可长写键、文本值、上下文）
抛出IOException、InterruptedException{
conf=context.getConfiguration（）；
字符串行=value.toString（）；
String searchString=conf.get（“word”）；
StringTokenizer标记器=新的StringTokenizer（行）；
while（tokenizer.hasMoreTokens（））{
String token=tokenizer.nextToken（）；
if（令牌等于（搜索字符串））{
word.set（令牌）；
上下文。写（单词，一）；
}
}
}
}
公共静态类Reduce扩展
减速器{
public void reduce（文本键、Iterable值、，
上下文）抛出IOException、InterruptedException{
整数和=0；
for（可写入值：值）{
sum+=val.get（）；
}
write（key，newintwriteable（sum））；
}
}
公共静态void main（字符串[]args）引发异常{
Configuration conf=新配置（）；
int res=ToolRunner.run（conf，new StringSearchDriver（），args）；
系统退出（res）；
}
@凌驾
公共int运行（字符串[]args）引发异常{
//TODO自动生成的方法存根
如果（参数长度！=3）{
系统输出
.printf（“用法：搜索字符串\n”）；
系统退出（-1）；
}
字符串源=args[0]；
字符串dest=args[1]；
字符串searchword=args[2]；
Configuration conf=新配置（）；
conf.set（“word”，searchword）；
Job Job=新作业（conf，“搜索字符串”）；
job.setJarByClass（StringSearchDriver.class）；
FileSystem fs=FileSystem.get（conf）；
路径输入=新路径（源）；
路径输出=新路径（目的地）；
如果（fs.exists（out））{
fs.删除（out，true）；
}
job.setMapOutputKeyClass（Text.class）；
setMapOutputValueClass（IntWritable.class）；
job.setOutputKeyClass（Text.class）；
job.setOutputValueClass（IntWritable.class）；
job.setMapperClass（Map.class）；
job.setReducerClass（Reduce.class）；
setInputFormatClass（TextInputFormat.class）；
setOutputFormatClass（TextOutputFormat.class）；
addInputPath（作业，在中）；
setOutputPath（作业，输出）；
布尔值suces=job.waitForCompletion（true）；
返回（成功？0:1）；
}
}

这是可行的。

ClassCastException:interface javax.xml