Hadoop RawComparator的意义是什么？我们在什么情况下使用它_Hadoop_Mapreduce

Hadoop RawComparator的意义是什么？我们在什么情况下使用它

hadoop mapreduce

Hadoop RawComparator的意义是什么？我们在什么情况下使用它,hadoop,mapreduce,Hadoop,Mapreduce,什么是比较器及其意义每个mapreduce程序都必须使用RawComparator吗？RawComparator直接操作对象的字节表示形式并非强制要求在每个地图缩小程序中使用它 MapReduce基本上是一个批处理系统，而不是适用于交互式分析。您无法在几秒钟或更短的时间内运行查询并返回结果。查询通常需要几分钟或更长时间，因此最好是脱机使用，因为在这种情况下，没有人坐在处理循环中等待结果若你们仍然想优化MapReduce作业所花费的时间，那个么你们必须使用RawComparator 使用R

什么是比较器及其意义

每个mapreduce程序都必须使用RawComparator吗？

RawComparator直接操作对象的字节表示形式

并非强制要求在每个地图缩小程序中使用它

MapReduce基本上是一个批处理系统，而不是适用于交互式分析。您无法在几秒钟或更短的时间内运行查询并返回结果。查询通常需要几分钟或更长时间，因此最好是脱机使用，因为在这种情况下，没有人坐在处理循环中等待结果

若你们仍然想优化MapReduce作业所花费的时间，那个么你们必须使用RawComparator

使用RawComparator:

public class IndexPairComparator extends WritableComparator {
    protected IndexPairComparator() {
        super(IndexPair.class);
    }

    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
        int i1 = readInt(b1, s1);
        int i2 = readInt(b2, s2);

        int comp = (i1 < i2) ? -1 : (i1 == i2) ? 0 : 1;
        if(0 != comp)
            return comp;

        int j1 = readInt(b1, s1+4);
        int j2 = readInt(b2, s2+4);
        comp = (j1 < j2) ? -1 : (j1 == j2) ? 0 : 1;

        return comp;
    }
}

中间键值对已从映射器传递到Reducer。在这些值到达映射器的Reducer之前，将执行洗牌和排序步骤

排序得到了改进，因为RawComparator将按字节比较键。如果我们不使用RawComparator，则必须完全反序列化中介键才能执行比较

示例：

public class IndexPairComparator extends WritableComparator {
    protected IndexPairComparator() {
        super(IndexPair.class);
    }

    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
        int i1 = readInt(b1, s1);
        int i2 = readInt(b2, s2);

        int comp = (i1 < i2) ? -1 : (i1 == i2) ? 0 : 1;
        if(0 != comp)
            return comp;

        int j1 = readInt(b1, s1+4);
        int j2 = readInt(b2, s2+4);
        comp = (j1 < j2) ? -1 : (j1 == j2) ? 0 : 1;

        return comp;
    }
}

公共类IndexPairComparator扩展了WritableComparator{
受保护的IndexPairComparator（）{
超级（IndexPair.class）；
}
@凌驾
公共整数比较（字节[]b1、整数s1、整数l1、字节[]b2、整数s2、整数l2）{
int i1=readInt（b1，s1）；
int i2=readInt（b2，s2）；
int comp=（i1


在上面的示例中，我们没有直接实现RawComparator。相反，我们扩展了WritableComparator，它在内部实现了RawComparator
有关更多详细信息，请参阅本文。
我知道我在回答一个老问题
下面是为可写可比较对象编写RawComparator的另一个示例
public class CompositeWritable2 implements WritableComparable<CompositeWritable2> {

  private Text textData1;
  private LongWritable longData;
  private Text textData2;

  static {
    WritableComparator.define(CompositeWritable2.class, new Comparator());
  }

  /**
   * Empty constructor
   */
  public CompositeWritable2() {
    textData1 = new Text();
    longData = new LongWritable();
    textData2 = new Text();
  }

  /**
   * Comparator
   * 
   * @author CuriousCat
   */
  public static class Comparator extends WritableComparator {

    private static final Text.Comparator TEXT_COMPARATOR = new Text.Comparator();
    private static final LongWritable.Comparator LONG_COMPARATOR = new LongWritable.Comparator();

    public Comparator() {
      super(CompositeWritable2.class);
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.io.WritableComparator#compare(byte[], int, int, byte[], int, int)
     */
    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
      int cmp;

      try {
        // Find the length of the first text property
        int textData11Len = WritableUtils.decodeVIntSize(b1[s1]) + readVInt(b1, s1);
        int textData12Len = WritableUtils.decodeVIntSize(b2[s2]) + readVInt(b2, s2);

        // Compare the first text data as bytes
        cmp = TEXT_COMPARATOR.compare(b1, s1, textData11Len, b2, s2, textData12Len);
        if (cmp != 0) {
          return cmp;
        }

        // Read and compare the next 8 bytes starting from the length of first text property.
        // The reason for hard coding 8 is, because the second property is long.
        cmp = LONG_COMPARATOR.compare(b1, textData11Len, 8, b2, textData12Len, 8);
        if (cmp != 0) {
          return cmp;
        }

        // Move the index to the end of the second long property
        textData11Len += 8;
        textData12Len += 8;

        // Find the length of the second text property
        int textData21Len = WritableUtils.decodeVIntSize(b1[textData11Len]) + readVInt(b1, textData11Len);
        int textData22Len = WritableUtils.decodeVIntSize(b2[textData12Len]) + readVInt(b2, textData12Len);

        // Compare the second text data as bytes
        return TEXT_COMPARATOR.compare(b1, textData11Len, textData21Len, b2, textData12Len, textData22Len);
      } catch (IOException ex) {
        throw new IllegalArgumentException("Failed in CompositeWritable's RawComparator!", ex);
      }
    }

  }

  /**
   * @return the textData1
   */
  public Text getTextData1() {
    return textData1;
  }

  /**
   * @return the longData
   */
  public LongWritable getLongData() {
    return longData;
  }

  /**
   * @return the textData2
   */
  public Text getTextData2() {
    return textData2;
  }

  /**
   * Setter method
   */
  public void set(Text textData1, LongWritable longData, Text textData2) {
    this.textData1 = textData1;
    this.longData = longData;
    this.textData2 = textData2;
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
   */
  @Override
  public void write(DataOutput out) throws IOException {
    textData1.write(out);
    longData.write(out);
    textData2.write(out);
  }

  /*
   * (non-Javadoc)
   * 
   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
   */
  @Override
  public void readFields(DataInput in) throws IOException {
    textData1.readFields(in);
    longData.readFields(in);
    textData2.readFields(in);
  }

  /*
   * (non-Javadoc)
   * 
   * @see java.lang.Comparable#compareTo(java.lang.Object)
   */
  @Override
  public int compareTo(CompositeWritable2 o) {    
    int cmp = textData1.compareTo(o.getTextData1());
    if (cmp != 0) {
      return cmp;
    }

    cmp = longData.compareTo(o.getLongData());
    if (cmp != 0) {
      return cmp;
    }

    return textData2.compareTo(o.getTextData2());
  }

}

公共类CompositeWritable2实现了可写性{
私有文本textData1；
私有长可写长数据；
私有文本textData2；
静止的{
WritableComparator.define（CompositeWritable2.class，new Comparator（））；
}
/**
*空构造函数
*/
公共合成可写2（）{
textData1=新文本（）；
longData=新的LongWritable（）；
textData2=新文本（）；
}
/**
*比较器
* 
*@作者好奇猫
*/
公共静态类比较器扩展了WritableComparator{
私有静态最终文本.Comparator Text_Comparator=新文本.Comparator（）；
私有静态最终LongWritable.Comparator LONG_Comparator=新的LongWritable.Comparator（）；
公共比较器（）{
super（compositewriteable2.class）；
}
/*
*（非Javadoc）
* 
*@see org.apache.hadoop.io.WritableComparator#compare（byte[]，int，int，byte[]，int，int）
*/
@凌驾
公共整数比较（字节[]b1、整数s1、整数l1、字节[]b2、整数s2、整数l2）{
int-cmp；
试一试{
//查找第一个文本属性的长度
int textData11Len=WritableUtils.decodeVIntSize（b1[s1]）+readVInt（b1，s1）；
int textData12Len=WritableUtils.decodeVIntSize（b2[s2]）+readVInt（b2，s2）；
//将第一个文本数据作为字节进行比较
cmp=文本\比较器。比较（b1、s1、textData11Len、b2、s2、textData12Len）；
如果（cmp！=0）{
返回cmp；
}
//从第一个文本属性的长度开始读取并比较接下来的8个字节。
//硬编码8的原因是，因为第二个属性很长。
cmp=长比较器。比较（b1，textData11Len，8，b2，textData12Len，8）；
如果（cmp！=0）{
返回cmp；
}
//将索引移动到第二个long属性的末尾
textData11Len+=8；
textData12Len+=8；
//查找第二个文本属性的长度
int textData21Len=WritableUtils.decodeVIntSize（b1[textData11Len]）+readVInt（b1，textData11Len）；
int textData22Len=WritableUtils.decodeVIntSize（b2[textData12Len]）+readVInt（b2，textData12Len）；
//将第二个文本数据作为字节进行比较
返回TEXT_COMPARATOR.compare（b1，textData11Len，textData21Len，b2，textData12Len，textData22Len）；
}捕获（IOEX异常）{
抛出新的IllegalArgumentException（“在CompositeWriteable的RawComparator中失败！”，例如）；
}
}
}
/**
*@returnthetextdata1
*/
公共文本getTextData1（）{
返回textData1；
}
/**
*@返回长数据
*/
公共LongWritable getLongData（）{
返回长数据；
}
/**
*@returnthetextdata2
*/
公共文本getTextData2（）{
返回textData2；
}
/**
*塞特法
*/
公共无效集（文本textData1、LongWritable longData、文本textData2）{
this.textData1=textData1；
this.longData=longData；
this.textData2=textData2；
}
/*
*（非Javadoc）
* 
*@see org.apache.hadoop.io.writeable#write（java.io.DataOutput）
*/
@凌驾
public void write（DataOutput out）引发IOException{
textData1.写入（输出）；
longData.write（out）；
textData2.写入（输出）；
}
/*
*（非Javadoc）
* 
*@see org.apache.hadoop.io.Writable#readFields（java.io.DataInput）
*/
@凌驾
public void readFields（DataInput in）引发IOException{
textData1.readFields（in）；
longData.readFields（in）；
textData2.readFields（in）；
}
/*
*（非Javadoc）
* 
*@see java.lang.Comparable#compareTo（java.lang.Object）
*/
@凌驾
public int compareTo（compositewritable2o）{
int cmp=textData1.compareTo（o.getTextData1（））；
如果（cmp！=0）{
返回cmp；
}
cmp=longData.compareTo（o.getLongData（））；
如果（cmp！=0）{
返回c