Java Hadoop M/R二级排序不起作用，基于用户的姓氏_Java_Hadoop_Mapreduce_Bigdata_Secondary Sort

Java Hadoop M/R二级排序不起作用，基于用户的姓氏

java hadoop mapreduce

Java Hadoop M/R二级排序不起作用，基于用户的姓氏,java,hadoop,mapreduce,bigdata,secondary-sort,Java,Hadoop,Mapreduce,Bigdata,Secondary Sort,我想根据用户的lastname对输出进行排序，使用的键是firstName。下面是我正在使用的类，但我没有得到基于lastName的排序输出。我是hadoop新手，这篇文章是我在各种互联网资源的帮助下写的主要类别：- public class WordCount { public static class Map extends Mapper<LongWritable, Text, CustomKey, Text> { public static fin

我想根据用户的lastname对输出进行排序，使用的键是firstName。下面是我正在使用的类，但我没有得到基于lastName的排序输出。我是hadoop新手，这篇文章是我在各种互联网资源的帮助下写的

主要类别：-

public class WordCount {

    public static class Map extends Mapper<LongWritable, Text, CustomKey, Text> {

        public static final Log log = LogFactory.getLog(Map.class);

        private final static IntWritable one = new IntWritable(1);
        private Text first = new Text();
        private Text last = new Text();

        @Override
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();

            StringTokenizer tokenizer = new StringTokenizer(line, "\n");

            log.info(" line issssssss " + tokenizer.hasMoreTokens());
            while (tokenizer.hasMoreTokens()) {

                String[] vals = tokenizer.nextToken().split(" ");

                first.set(vals[0]);
                last.set(vals[1]);

                context.write(new CustomKey(first.toString(), last.toString()), last);

            }
        }
    }

    public static class Reduce extends Reducer<CustomKey, Text, Text, Text> {

        public static final Log log = LogFactory.getLog(Reduce.class);

        @Override
        public void reduce(CustomKey key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            int sum = 0;

            // Text value = new Text();

                for (Text val : values) {
                context.write(new Text(key.getFirstName()), val);
            }

        }
    }

    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());

        job.setJarByClass(WordCount.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        // job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapOutputKeyClass(CustomKey.class);
        job.setPartitionerClass(CustomKeyPartinioner.class);
        job.setGroupingComparatorClass(CustomGroupComparator.class);
        job.setSortComparatorClass(CustomKeySorter.class);


          job.setInputFormatClass(TextInputFormat.class);
          job.setOutputFormatClass(TextOutputFormat.class);


        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }

}

Composite key class :- 

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

public class CustomKey implements WritableComparable<CustomKey> {


    public static final Log log = LogFactory.getLog(CustomKey.class);

    private String firstName, lastName;

     public CustomKey() {
        // TODO Auto-generated constructor stub
    }

    @Override
    public void readFields(DataInput arg0) throws IOException {

        this.firstName = WritableUtils.readString(arg0);
        this.lastName = WritableUtils.readString(arg0);
    }

    public String getFirstName() {
        return firstName;
    }

    public CustomKey(String firstName, String lastName) {
        super();
        this.firstName = firstName;
        this.lastName = lastName;
    }

    public void setFirstName(String firstName) {
        this.firstName = firstName;
    }

    public String getLastName() {
        return lastName;
    }

    public void setLastName(String lastName) {
        this.lastName = lastName;
    }

    @Override
    public void write(DataOutput arg0) throws IOException {

        log.debug("write value is " + firstName);

        WritableUtils.writeString(arg0, firstName);
        WritableUtils.writeString(arg0, lastName);
    }





    @Override
    public int compareTo(CustomKey o) {


        int result = firstName.compareTo(o.getFirstName());

        log.debug("value is " + result);


        if (result == 0) {
            return lastName.compareTo(o.getLastName());
        }

        return result;
    }

    @Override
    public String toString() {

    return (new StringBuilder()).append(firstName).append(',').append(lastName).toString();
    }

}


Partitioner Class :- 

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

public class CustomKeyPartinioner extends Partitioner<CustomKey, Text> {

    HashPartitioner<Text, Text> hashPartitioner = new HashPartitioner<Text, Text>();
    Text newKey = new Text();

    @Override
    public int getPartition(CustomKey arg0, Text arg1, int arg2) {

        try {
            newKey.set(arg0.getFirstName());
            return hashPartitioner.getPartition(newKey, arg1, arg2);
        } catch (Exception e) {

            e.printStackTrace();
            return (int) (Math.random() * arg2);
        }
    }

}


GroupComparator Class :- 

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class CustomGroupComparator extends WritableComparator {

    protected CustomGroupComparator() {

        super(CustomKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {

        CustomKey key1 = (CustomKey) w1;
        CustomKey key2 = (CustomKey) w2;

    // (check on udid)
        return key1.getFirstName().compareTo(key2.getFirstName());
    }
}


Custom Key Sorter Class :- 

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class CustomKeySorter extends WritableComparator {

    public static final Log log = LogFactory.getLog(CustomKeySorter.class);

    protected CustomKeySorter() {

        super(CustomKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {

        CustomKey key1 = (CustomKey) w1;
        CustomKey key2 = (CustomKey) w2;

        int value = key1.getFirstName().compareTo(key2.getFirstName());



        log.debug("value is " + value);

        if (value == 0) {

            return -key1.getLastName().compareTo(key2.getLastName());

        }
        return value;
    }
}

公共类字数{
公共静态类映射扩展映射器{
公共静态最终日志日志=LogFactory.getLog（Map.class）；
私有最终静态IntWritable one=新的IntWritable（1）；
私有文本优先=新文本（）；
上次私有文本=新文本（）；
@凌驾
公共void映射（LongWritable键、文本值、上下文上下文）引发IOException、InterruptedException{
字符串行=value.toString（）；
StringTokenizer tokenizer=新的StringTokenizer（行“\n”）；
log.info（“行issss”+tokenizer.hasMoreTokens（））；
while（tokenizer.hasMoreTokens（））{
字符串[]vals=tokenizer.nextToken（）.split（“”）；
第一组（VAL[0]）；
最后一组（VAL[1]）；
write（新的CustomKey（first.toString（），last.toString（）），last）；
}
}
}
公共静态类Reduce扩展Reducer{
公共静态最终日志日志=LogFactory.getLog（Reduce.class）；
@凌驾
公共void reduce（CustomKey、Iterable值、上下文）
抛出IOException、InterruptedException{
整数和=0；
//文本值=新文本（）；
用于（文本值：值）{
write（新文本（key.getFirstName（）），val）；
}
}
}
公共静态void main（字符串[]args）引发异常{
Job Job=Job.getInstance（新配置（））；
job.setJarByClass（WordCount.class）；
job.setMapperClass（Map.class）；
job.setReducerClass（Reduce.class）；
//job.setMapOutputKeyClass（Text.class）；
job.setMapOutputValueClass（Text.class）；
job.setOutputKeyClass（Text.class）；
job.setOutputValueClass（Text.class）；
job.setMapOutputKeyClass（CustomKey.class）；
job.setPartitionerClass（CustomKeyPartinioner.class）；
job.setGroupingComparatorClass（CustomGroupComparator.class）；
job.setSortComparatorClass（CustomKeySorter.class）；
setInputFormatClass（TextInputFormat.class）；
setOutputFormatClass（TextOutputFormat.class）；
addInputPath（作业，新路径（args[0]）；
setOutputPath（作业，新路径（args[1]）；
job.waitForCompletion（true）；
}
}
复合密钥类：-
导入java.io.DataInput；
导入java.io.DataOutput；
导入java.io.IOException；
导入org.apache.commons.logging.Log；
导入org.apache.commons.logging.LogFactory；
导入org.apache.hadoop.io.WritableComparable；
导入org.apache.hadoop.io.WritableUtils；
公共类CustomKey实现了可写性{
公共静态最终日志日志=LogFactory.getLog（CustomKey.class）；
私有字符串firstName，lastName；
公钥{
//TODO自动生成的构造函数存根
}
@凌驾
公共void读取字段（DataInput arg0）引发IOException{
this.firstName=WritableUtils.readString（arg0）；
this.lastName=writeableutils.readString（arg0）；
}
公共字符串getFirstName（）{
返回名字；
}
public CustomKey（String firstName、String lastName）{
超级（）；
this.firstName=firstName；
this.lastName=lastName；
}
public void setFirstName（字符串firstName）{
this.firstName=firstName；
}
公共字符串getLastName（）{
返回姓氏；
}
public void setLastName（字符串lastName）{
this.lastName=lastName；
}
@凌驾
公共无效写入（数据输出arg0）引发IOException{
log.debug（“写入值为”+firstName）；
WritableUtils.writeString（arg0，firstName）；
WritableUtils.writeString（arg0，lastName）；
}
@凌驾
公共整数比较（自定义键o）{
int result=firstName.compareTo（o.getFirstName（））；
log.debug（“值为”+结果）；
如果（结果==0）{
返回lastName.compareTo（o.getLastName（））；
}
返回结果；
}
@凌驾
公共字符串toString（）{
return（new StringBuilder（））.append（firstName）.append（'，'）.append（lastName）.toString（）；
}
}
分区类别：-
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.Partitioner；
导入org.apache.hadoop.mapreduce.lib.partition.HashPartitioner；
公共类CustomKeyPartinioner扩展了分区器{
HashPartitioner HashPartitioner=新的HashPartitioner（）；
Text newKey=新文本（）；
@凌驾
公共整型getPartition（自定义键arg0、文本arg1、整型arg2）{
试一试{
newKey.set（arg0.getFirstName（））；
返回hashPartitioner.getPartition（newKey，arg1，arg2）；
}捕获（例外e）{
e、 printStackTrace（）；
返回值（int）（Math.random（）*arg2）；
}
}
}
GroupComparator类：-
导入org.apache.hadoop.io.WritableComparable；
导入org.apache.hadoop.io.WritableComparator；
公共类CustomGroupComparator扩展了WritableComparator{
受保护的CustomGroupComparator（）{
super（CustomKey.class，true）；
}
@凌驾
公共整数比较（可写可比w1，可写可比w2）{
自定义键1=（自定义键）w1；
CustomKey key2=（CustomKey）w2；
//（检查udid）
返回key1.getFirstName（）.compareTo（key2.getFirstName（））；
}
}
定制钥匙分拣机等级：-
导入org.apache.commons.logging.Log；
导入org.apache.commons.logging.LogFactory；
导入org.apache.hadoop.io.WritableComparable；
导入org.apache.hadoop.io.WritableComparator；
公共类CustomKeySorter扩展了WritableCom