Mapreduce 映射减少过滤器记录_Mapreduce

Mapreduce 映射减少过滤器记录

mapreduce

Mapreduce 映射减少过滤器记录,mapreduce,Mapreduce,我有一组只需要处理男性记录的记录，在map-reduce程序中，我使用if条件只过滤男性记录。但下面的程序给出零记录作为输出输入文件：布兰登·巴克纳，阿维尔，女，525 2，吠陀霍普金斯，阿维尔，男性，633 3，Zia Underwood，扑热息痛，男性，980 4岁，奥斯汀·梅尔，扑热息痛，女性，338 玛拉·希金斯，阿维尔，女，153岁 6岁，Sybill Crosby，avil，男，193岁 7岁，泰勒·罗萨莱斯，扑热息痛，男性，778 8岁，伊凡·黑尔，阿维尔，女，454 9岁，A

我有一组只需要处理男性记录的记录，在map-reduce程序中，我使用if条件只过滤男性记录。但下面的程序给出零记录作为输出

输入文件：

布兰登·巴克纳，阿维尔，女，525
2，吠陀霍普金斯，阿维尔，男性，633
3，Zia Underwood，扑热息痛，男性，980
4岁，奥斯汀·梅尔，扑热息痛，女性，338
玛拉·希金斯，阿维尔，女，153岁
6岁，Sybill Crosby，avil，男，193岁
7岁，泰勒·罗萨莱斯，扑热息痛，男性，778
8岁，伊凡·黑尔，阿维尔，女，454
9岁，Alika Gilmore，扑热息痛，女性，833
10岁，Len Burgess，美辛，男，325岁

Mapreduce程序：

package org.samples.mapreduce.training;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class patientrxMR_filter {

    public static class MapDemohadoop extends
            Mapper<LongWritable, Text, Text, IntWritable> {

        // setup , map, run, cleanup

        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            String[] elements = line.split(",");



String gender =elements[3];


if ( gender == "male" ) {

    Text tx = new Text(elements[2]);
                int i = Integer.parseInt(elements[4]);
                IntWritable it = new IntWritable(i);
                context.write(tx, it);
}
        }
    }

    public static class Reduce extends
            Reducer<Text, IntWritable, Text, IntWritable> {

        // setup, reduce, run, cleanup
        // innput - para [150,100]
        public void reduce(Text key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            context.write(key, new IntWritable(sum));
        }
    }

    public static void main(String[] args) throws Exception {

        if (args.length != 2) {
            System.err.println("Insufficient args");
            System.exit(-1);
        }
        Configuration conf = new Configuration();

        //conf.set("fs.default.name","hdfs://localhost:50000");
        conf.set("mapred.job.tracker", "hdfs://localhost:50001");

//      conf.set("DrugName", args[3]);
        Job job = new Job(conf, "Drug Amount Spent");

        job.setJarByClass(patientrxMR_filter.class); // class conmtains mapper and
                                                // reducer class

        job.setMapOutputKeyClass(Text.class); // map output key class
        job.setMapOutputValueClass(IntWritable.class);// map output value class
        job.setOutputKeyClass(Text.class); // output key type in reducer
        job.setOutputValueClass(IntWritable.class);// output value type in
                                                    // reducer

        job.setMapperClass(MapDemohadoop.class);
        job.setReducerClass(Reduce.class);
        job.setNumReduceTasks(1);
        job.setInputFormatClass(TextInputFormat.class); // default -- inputkey
                                                        // type -- longwritable
                                                        // : valuetype is text
        job.setOutputFormatClass(TextOutputFormat.class);



        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }

}

package org.samples.mapreduce.training；
导入java.io.IOException；
导入org.apache.hadoop.conf.Configuration；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.IntWritable；
导入org.apache.hadoop.io.LongWritable；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.Job；
导入org.apache.hadoop.mapreduce.Mapper；
导入org.apache.hadoop.mapreduce.Reducer；
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat；
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat；
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat；
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat；
公共类patientrxMR\u过滤器{
公共静态类MapDemohadoop扩展
制图员{
//设置、映射、运行、清理
公共void映射（可长写键、文本值、上下文）
抛出IOException、InterruptedException{
字符串行=value.toString（）；
String[]elements=line.split（“，”）；
字符串性别=元素[3]；
如果（性别=“男性”）{
文本tx=新文本（元素[2]）；
inti=Integer.parseInt（元素[4]）；
IntWritable it=新的IntWritable（i）；
context.write（tx，it）；
}
}
}
公共静态类Reduce扩展
减速器{
//设置、减少、运行、清理
//innput-第[150100]段
public void reduce（文本键、Iterable值、，
上下文）抛出IOException、InterruptedException{
整数和=0；
for（可写入值：值）{
sum+=val.get（）；
}
write（key，newintwriteable（sum））；
}
}
公共静态void main（字符串[]args）引发异常{
如果（参数长度！=2）{
System.err.println（“参数不足”）；
系统退出（-1）；
}
Configuration conf=新配置（）；
//conf.set（“fs.default.name”hdfs://localhost:50000");
conf.set（“mapred.job.tracker”hdfs://localhost:50001");
//conf.set（“DrugName”，args[3]）；
Job Job=新工作（conf，“药物消耗量”）；
job.setJarByClass（patientrxMR_filter.class）；//类conmtains映射器和
//减速器类
job.setMapOutputKeyClass（Text.class）；//映射输出键类
job.setMapOutputValueClass（IntWritable.class）；//映射输出值类
job.setOutputKeyClass（Text.class）；//减速机中的输出键类型
job.setOutputValueClass（IntWritable.class）；//输入输出值
//减速器
setMapperClass（MapDemohadoop.class）；
job.setReducerClass（Reduce.class）；
作业。setNumReduceTasks（1）；
job.setInputFormatClass（TextInputFormat.class）；//默认值--inputkey
//类型--longwritable
//：valuetype是文本
setOutputFormatClass（TextOutputFormat.class）；
addInputPath（作业，新路径（args[0]）；
setOutputPath（作业，新路径（args[1]）；
job.waitForCompletion（true）；
}
}

Hadoop正在使用分布式文件系统，在“String line=value.toString（）；”中行是块中具有偏移量（键）的文件内容。在本例中，该行加载整个测试文件，显然可以放入一个块中，而不是像您所期望的那样加载文件中的每一行

if ( gender == "male" )

if ( gender == "male" )

这一行不适用于相等性检查，对于java中的相等性，请使用object.equals（）

你的地图绘制课上了吗？我的意思是，您是否验证了任何系统输出（如果在集群中）并检查了流？尝试在您的映射器中提供系统输出，这将有助于您…您的映射器类被选中了吗？我的意思是，您是否验证了任何系统输出（如果在集群中）并检查了流？尝试在您的映射器中提供系统输出，这将帮助您。。。

i.e 
if ( gender.equals("male") )