Hadoop RawComparator的意义是什么?我们在什么情况下使用它
什么是比较器及其意义Hadoop RawComparator的意义是什么?我们在什么情况下使用它,hadoop,mapreduce,Hadoop,Mapreduce,什么是比较器及其意义 每个mapreduce程序都必须使用RawComparator吗?RawComparator直接操作对象的字节表示形式 并非强制要求在每个地图缩小程序中使用它 MapReduce基本上是一个批处理系统,而不是 适用于交互式分析。您无法在几秒钟或更短的时间内运行查询并返回结果。查询通常需要几分钟或更长时间,因此最好是脱机使用,因为在这种情况下,没有人坐在处理循环中等待结果 若你们仍然想优化MapReduce作业所花费的时间,那个么你们必须使用RawComparator 使用R
每个mapreduce程序都必须使用RawComparator吗?RawComparator直接操作对象的字节表示形式 并非强制要求在每个地图缩小程序中使用它 MapReduce基本上是一个批处理系统,而不是 适用于交互式分析。您无法在几秒钟或更短的时间内运行查询并返回结果。查询通常需要几分钟或更长时间,因此最好是脱机使用,因为在这种情况下,没有人坐在处理循环中等待结果 若你们仍然想优化MapReduce作业所花费的时间,那个么你们必须使用RawComparator 使用RawComparator:
public class IndexPairComparator extends WritableComparator {
protected IndexPairComparator() {
super(IndexPair.class);
}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int i1 = readInt(b1, s1);
int i2 = readInt(b2, s2);
int comp = (i1 < i2) ? -1 : (i1 == i2) ? 0 : 1;
if(0 != comp)
return comp;
int j1 = readInt(b1, s1+4);
int j2 = readInt(b2, s2+4);
comp = (j1 < j2) ? -1 : (j1 == j2) ? 0 : 1;
return comp;
}
}
中间键值对已从映射器传递到Reducer。在这些值到达映射器的Reducer之前,将执行洗牌和排序步骤
排序得到了改进,因为RawComparator将按字节比较键。如果我们不使用RawComparator,则必须完全反序列化中介键才能执行比较
示例:
public class IndexPairComparator extends WritableComparator {
protected IndexPairComparator() {
super(IndexPair.class);
}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int i1 = readInt(b1, s1);
int i2 = readInt(b2, s2);
int comp = (i1 < i2) ? -1 : (i1 == i2) ? 0 : 1;
if(0 != comp)
return comp;
int j1 = readInt(b1, s1+4);
int j2 = readInt(b2, s2+4);
comp = (j1 < j2) ? -1 : (j1 == j2) ? 0 : 1;
return comp;
}
}
公共类IndexPairComparator扩展了WritableComparator{
受保护的IndexPairComparator(){
超级(IndexPair.class);
}
@凌驾
公共整数比较(字节[]b1、整数s1、整数l1、字节[]b2、整数s2、整数l2){
int i1=readInt(b1,s1);
int i2=readInt(b2,s2);
int comp=(i1
在上面的示例中,我们没有直接实现RawComparator。相反,我们扩展了WritableComparator,它在内部实现了RawComparator
有关更多详细信息,请参阅本文。我知道我在回答一个老问题 下面是为可写可比较对象编写RawComparator的另一个示例
public class CompositeWritable2 implements WritableComparable<CompositeWritable2> {
private Text textData1;
private LongWritable longData;
private Text textData2;
static {
WritableComparator.define(CompositeWritable2.class, new Comparator());
}
/**
* Empty constructor
*/
public CompositeWritable2() {
textData1 = new Text();
longData = new LongWritable();
textData2 = new Text();
}
/**
* Comparator
*
* @author CuriousCat
*/
public static class Comparator extends WritableComparator {
private static final Text.Comparator TEXT_COMPARATOR = new Text.Comparator();
private static final LongWritable.Comparator LONG_COMPARATOR = new LongWritable.Comparator();
public Comparator() {
super(CompositeWritable2.class);
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.io.WritableComparator#compare(byte[], int, int, byte[], int, int)
*/
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int cmp;
try {
// Find the length of the first text property
int textData11Len = WritableUtils.decodeVIntSize(b1[s1]) + readVInt(b1, s1);
int textData12Len = WritableUtils.decodeVIntSize(b2[s2]) + readVInt(b2, s2);
// Compare the first text data as bytes
cmp = TEXT_COMPARATOR.compare(b1, s1, textData11Len, b2, s2, textData12Len);
if (cmp != 0) {
return cmp;
}
// Read and compare the next 8 bytes starting from the length of first text property.
// The reason for hard coding 8 is, because the second property is long.
cmp = LONG_COMPARATOR.compare(b1, textData11Len, 8, b2, textData12Len, 8);
if (cmp != 0) {
return cmp;
}
// Move the index to the end of the second long property
textData11Len += 8;
textData12Len += 8;
// Find the length of the second text property
int textData21Len = WritableUtils.decodeVIntSize(b1[textData11Len]) + readVInt(b1, textData11Len);
int textData22Len = WritableUtils.decodeVIntSize(b2[textData12Len]) + readVInt(b2, textData12Len);
// Compare the second text data as bytes
return TEXT_COMPARATOR.compare(b1, textData11Len, textData21Len, b2, textData12Len, textData22Len);
} catch (IOException ex) {
throw new IllegalArgumentException("Failed in CompositeWritable's RawComparator!", ex);
}
}
}
/**
* @return the textData1
*/
public Text getTextData1() {
return textData1;
}
/**
* @return the longData
*/
public LongWritable getLongData() {
return longData;
}
/**
* @return the textData2
*/
public Text getTextData2() {
return textData2;
}
/**
* Setter method
*/
public void set(Text textData1, LongWritable longData, Text textData2) {
this.textData1 = textData1;
this.longData = longData;
this.textData2 = textData2;
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
*/
@Override
public void write(DataOutput out) throws IOException {
textData1.write(out);
longData.write(out);
textData2.write(out);
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
*/
@Override
public void readFields(DataInput in) throws IOException {
textData1.readFields(in);
longData.readFields(in);
textData2.readFields(in);
}
/*
* (non-Javadoc)
*
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
@Override
public int compareTo(CompositeWritable2 o) {
int cmp = textData1.compareTo(o.getTextData1());
if (cmp != 0) {
return cmp;
}
cmp = longData.compareTo(o.getLongData());
if (cmp != 0) {
return cmp;
}
return textData2.compareTo(o.getTextData2());
}
}
公共类CompositeWritable2实现了可写性{
私有文本textData1;
私有长可写长数据;
私有文本textData2;
静止的{
WritableComparator.define(CompositeWritable2.class,new Comparator());
}
/**
*空构造函数
*/
公共合成可写2(){
textData1=新文本();
longData=新的LongWritable();
textData2=新文本();
}
/**
*比较器
*
*@作者好奇猫
*/
公共静态类比较器扩展了WritableComparator{
私有静态最终文本.Comparator Text_Comparator=新文本.Comparator();
私有静态最终LongWritable.Comparator LONG_Comparator=新的LongWritable.Comparator();
公共比较器(){
super(compositewriteable2.class);
}
/*
*(非Javadoc)
*
*@see org.apache.hadoop.io.WritableComparator#compare(byte[],int,int,byte[],int,int)
*/
@凌驾
公共整数比较(字节[]b1、整数s1、整数l1、字节[]b2、整数s2、整数l2){
int-cmp;
试一试{
//查找第一个文本属性的长度
int textData11Len=WritableUtils.decodeVIntSize(b1[s1])+readVInt(b1,s1);
int textData12Len=WritableUtils.decodeVIntSize(b2[s2])+readVInt(b2,s2);
//将第一个文本数据作为字节进行比较
cmp=文本\比较器。比较(b1、s1、textData11Len、b2、s2、textData12Len);
如果(cmp!=0){
返回cmp;
}
//从第一个文本属性的长度开始读取并比较接下来的8个字节。
//硬编码8的原因是,因为第二个属性很长。
cmp=长比较器。比较(b1,textData11Len,8,b2,textData12Len,8);
如果(cmp!=0){
返回cmp;
}
//将索引移动到第二个long属性的末尾
textData11Len+=8;
textData12Len+=8;
//查找第二个文本属性的长度
int textData21Len=WritableUtils.decodeVIntSize(b1[textData11Len])+readVInt(b1,textData11Len);
int textData22Len=WritableUtils.decodeVIntSize(b2[textData12Len])+readVInt(b2,textData12Len);
//将第二个文本数据作为字节进行比较
返回TEXT_COMPARATOR.compare(b1,textData11Len,textData21Len,b2,textData12Len,textData22Len);
}捕获(IOEX异常){
抛出新的IllegalArgumentException(“在CompositeWriteable的RawComparator中失败!”,例如);
}
}
}
/**
*@returnthetextdata1
*/
公共文本getTextData1(){
返回textData1;
}
/**
*@返回长数据
*/
公共LongWritable getLongData(){
返回长数据;
}
/**
*@returnthetextdata2
*/
公共文本getTextData2(){
返回textData2;
}
/**
*塞特法
*/
公共无效集(文本textData1、LongWritable longData、文本textData2){
this.textData1=textData1;
this.longData=longData;
this.textData2=textData2;
}
/*
*(非Javadoc)
*
*@see org.apache.hadoop.io.writeable#write(java.io.DataOutput)
*/
@凌驾
public void write(DataOutput out)引发IOException{
textData1.写入(输出);
longData.write(out);
textData2.写入(输出);
}
/*
*(非Javadoc)
*
*@see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
*/
@凌驾
public void readFields(DataInput in)引发IOException{
textData1.readFields(in);
longData.readFields(in);
textData2.readFields(in);
}
/*
*(非Javadoc)
*
*@see java.lang.Comparable#compareTo(java.lang.Object)
*/
@凌驾
public int compareTo(compositewritable2o){
int cmp=textData1.compareTo(o.getTextData1());
如果(cmp!=0){
返回cmp;
}
cmp=longData.compareTo(o.getLongData());
如果(cmp!=0){
返回c