Java Hadoop M/R二级排序不起作用,基于用户的姓氏

Java Hadoop M/R二级排序不起作用,基于用户的姓氏,java,hadoop,mapreduce,bigdata,secondary-sort,Java,Hadoop,Mapreduce,Bigdata,Secondary Sort,我想根据用户的lastname对输出进行排序,使用的键是firstName。下面是我正在使用的类,但我没有得到基于lastName的排序输出。我是hadoop新手,这篇文章是我在各种互联网资源的帮助下写的 主要类别:- public class WordCount { public static class Map extends Mapper<LongWritable, Text, CustomKey, Text> { public static fin

我想根据用户的lastname对输出进行排序,使用的键是firstName。下面是我正在使用的类,但我没有得到基于lastName的排序输出。我是hadoop新手,这篇文章是我在各种互联网资源的帮助下写的

主要类别:-

public class WordCount {

    public static class Map extends Mapper<LongWritable, Text, CustomKey, Text> {

        public static final Log log = LogFactory.getLog(Map.class);

        private final static IntWritable one = new IntWritable(1);
        private Text first = new Text();
        private Text last = new Text();

        @Override
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();

            StringTokenizer tokenizer = new StringTokenizer(line, "\n");

            log.info(" line issssssss " + tokenizer.hasMoreTokens());
            while (tokenizer.hasMoreTokens()) {

                String[] vals = tokenizer.nextToken().split(" ");

                first.set(vals[0]);
                last.set(vals[1]);

                context.write(new CustomKey(first.toString(), last.toString()), last);

            }
        }
    }

    public static class Reduce extends Reducer<CustomKey, Text, Text, Text> {

        public static final Log log = LogFactory.getLog(Reduce.class);

        @Override
        public void reduce(CustomKey key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            int sum = 0;

            // Text value = new Text();

                for (Text val : values) {
                context.write(new Text(key.getFirstName()), val);
            }

        }
    }

    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());

        job.setJarByClass(WordCount.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        // job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapOutputKeyClass(CustomKey.class);
        job.setPartitionerClass(CustomKeyPartinioner.class);
        job.setGroupingComparatorClass(CustomGroupComparator.class);
        job.setSortComparatorClass(CustomKeySorter.class);


          job.setInputFormatClass(TextInputFormat.class);
          job.setOutputFormatClass(TextOutputFormat.class);


        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }

}

Composite key class :- 

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

public class CustomKey implements WritableComparable<CustomKey> {


    public static final Log log = LogFactory.getLog(CustomKey.class);

    private String firstName, lastName;

     public CustomKey() {
        // TODO Auto-generated constructor stub
    }

    @Override
    public void readFields(DataInput arg0) throws IOException {

        this.firstName = WritableUtils.readString(arg0);
        this.lastName = WritableUtils.readString(arg0);
    }

    public String getFirstName() {
        return firstName;
    }

    public CustomKey(String firstName, String lastName) {
        super();
        this.firstName = firstName;
        this.lastName = lastName;
    }

    public void setFirstName(String firstName) {
        this.firstName = firstName;
    }

    public String getLastName() {
        return lastName;
    }

    public void setLastName(String lastName) {
        this.lastName = lastName;
    }

    @Override
    public void write(DataOutput arg0) throws IOException {

        log.debug("write value is " + firstName);

        WritableUtils.writeString(arg0, firstName);
        WritableUtils.writeString(arg0, lastName);
    }





    @Override
    public int compareTo(CustomKey o) {


        int result = firstName.compareTo(o.getFirstName());

        log.debug("value is " + result);


        if (result == 0) {
            return lastName.compareTo(o.getLastName());
        }

        return result;
    }

    @Override
    public String toString() {

    return (new StringBuilder()).append(firstName).append(',').append(lastName).toString();
    }

}


Partitioner Class :- 

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

public class CustomKeyPartinioner extends Partitioner<CustomKey, Text> {

    HashPartitioner<Text, Text> hashPartitioner = new HashPartitioner<Text, Text>();
    Text newKey = new Text();

    @Override
    public int getPartition(CustomKey arg0, Text arg1, int arg2) {

        try {
            newKey.set(arg0.getFirstName());
            return hashPartitioner.getPartition(newKey, arg1, arg2);
        } catch (Exception e) {

            e.printStackTrace();
            return (int) (Math.random() * arg2);
        }
    }

}


GroupComparator Class :- 

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class CustomGroupComparator extends WritableComparator {

    protected CustomGroupComparator() {

        super(CustomKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {

        CustomKey key1 = (CustomKey) w1;
        CustomKey key2 = (CustomKey) w2;

    // (check on udid)
        return key1.getFirstName().compareTo(key2.getFirstName());
    }
}


Custom Key Sorter Class :- 

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class CustomKeySorter extends WritableComparator {

    public static final Log log = LogFactory.getLog(CustomKeySorter.class);

    protected CustomKeySorter() {

        super(CustomKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {

        CustomKey key1 = (CustomKey) w1;
        CustomKey key2 = (CustomKey) w2;

        int value = key1.getFirstName().compareTo(key2.getFirstName());



        log.debug("value is " + value);

        if (value == 0) {

            return -key1.getLastName().compareTo(key2.getLastName());

        }
        return value;
    }
}
公共类字数{
公共静态类映射扩展映射器{
公共静态最终日志日志=LogFactory.getLog(Map.class);
私有最终静态IntWritable one=新的IntWritable(1);
私有文本优先=新文本();
上次私有文本=新文本();
@凌驾
公共void映射(LongWritable键、文本值、上下文上下文)引发IOException、InterruptedException{
字符串行=value.toString();
StringTokenizer tokenizer=新的StringTokenizer(行“\n”);
log.info(“行issss”+tokenizer.hasMoreTokens());
while(tokenizer.hasMoreTokens()){
字符串[]vals=tokenizer.nextToken().split(“”);
第一组(VAL[0]);
最后一组(VAL[1]);
write(新的CustomKey(first.toString(),last.toString()),last);
}
}
}
公共静态类Reduce扩展Reducer{
公共静态最终日志日志=LogFactory.getLog(Reduce.class);
@凌驾
公共void reduce(CustomKey、Iterable值、上下文)
抛出IOException、InterruptedException{
整数和=0;
//文本值=新文本();
用于(文本值:值){
write(新文本(key.getFirstName()),val);
}
}
}
公共静态void main(字符串[]args)引发异常{
Job Job=Job.getInstance(新配置());
job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
//job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(CustomKey.class);
job.setPartitionerClass(CustomKeyPartinioner.class);
job.setGroupingComparatorClass(CustomGroupComparator.class);
job.setSortComparatorClass(CustomKeySorter.class);
setInputFormatClass(TextInputFormat.class);
setOutputFormatClass(TextOutputFormat.class);
addInputPath(作业,新路径(args[0]);
setOutputPath(作业,新路径(args[1]);
job.waitForCompletion(true);
}
}
复合密钥类:-
导入java.io.DataInput;
导入java.io.DataOutput;
导入java.io.IOException;
导入org.apache.commons.logging.Log;
导入org.apache.commons.logging.LogFactory;
导入org.apache.hadoop.io.WritableComparable;
导入org.apache.hadoop.io.WritableUtils;
公共类CustomKey实现了可写性{
公共静态最终日志日志=LogFactory.getLog(CustomKey.class);
私有字符串firstName,lastName;
公钥{
//TODO自动生成的构造函数存根
}
@凌驾
公共void读取字段(DataInput arg0)引发IOException{
this.firstName=WritableUtils.readString(arg0);
this.lastName=writeableutils.readString(arg0);
}
公共字符串getFirstName(){
返回名字;
}
public CustomKey(String firstName、String lastName){
超级();
this.firstName=firstName;
this.lastName=lastName;
}
public void setFirstName(字符串firstName){
this.firstName=firstName;
}
公共字符串getLastName(){
返回姓氏;
}
public void setLastName(字符串lastName){
this.lastName=lastName;
}
@凌驾
公共无效写入(数据输出arg0)引发IOException{
log.debug(“写入值为”+firstName);
WritableUtils.writeString(arg0,firstName);
WritableUtils.writeString(arg0,lastName);
}
@凌驾
公共整数比较(自定义键o){
int result=firstName.compareTo(o.getFirstName());
log.debug(“值为”+结果);
如果(结果==0){
返回lastName.compareTo(o.getLastName());
}
返回结果;
}
@凌驾
公共字符串toString(){
return(new StringBuilder()).append(firstName).append(',').append(lastName).toString();
}
}
分区类别:-
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Partitioner;
导入org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
公共类CustomKeyPartinioner扩展了分区器{
HashPartitioner HashPartitioner=新的HashPartitioner();
Text newKey=新文本();
@凌驾
公共整型getPartition(自定义键arg0、文本arg1、整型arg2){
试一试{
newKey.set(arg0.getFirstName());
返回hashPartitioner.getPartition(newKey,arg1,arg2);
}捕获(例外e){
e、 printStackTrace();
返回值(int)(Math.random()*arg2);
}
}
}
GroupComparator类:-
导入org.apache.hadoop.io.WritableComparable;
导入org.apache.hadoop.io.WritableComparator;
公共类CustomGroupComparator扩展了WritableComparator{
受保护的CustomGroupComparator(){
super(CustomKey.class,true);
}
@凌驾
公共整数比较(可写可比w1,可写可比w2){
自定义键1=(自定义键)w1;
CustomKey key2=(CustomKey)w2;
//(检查udid)
返回key1.getFirstName().compareTo(key2.getFirstName());
}
}
定制钥匙分拣机等级:-
导入org.apache.commons.logging.Log;
导入org.apache.commons.logging.LogFactory;
导入org.apache.hadoop.io.WritableComparable;
导入org.apache.hadoop.io.WritableComparator;
公共类CustomKeySorter扩展了WritableCom