Java Hadoop Mapreduce中用户定义的密钥存在问题
我有一个以制表符分隔格式的纽约证券交易所数据集,我的数据如下所示Java Hadoop Mapreduce中用户定义的密钥存在问题,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我有一个以制表符分隔格式的纽约证券交易所数据集,我的数据如下所示 exchange stock_symbol date stock_price_open stock_price_high stock_price_low stock_price_close stock_volume stock_price_adj_close NYSE ASP 2001-12-31 12.55 12.8 12.42 12.8 11300
exchange stock_symbol date stock_price_open stock_price_high stock_price_low stock_price_close stock_volume stock_price_adj_close
NYSE ASP 2001-12-31 12.55 12.8 12.42 12.8 11300 6.91
NYSE ASP 2001-12-28 12.5 12.55 12.42 12.55 4800 6.78
NYSE KEN 2001-12-27 12.59 12.59 12.5 12.57 5400 6.79
NYSE JPG 2001-12-26 12.45 12.6 12.45 12.55 5400 6.78
NYSE KEN 2001-12-24 12.61 12.61 12.61 12.61 1400 6.76
NYSE JPG 2001-12-21 12.4 12.78 12.4 12.6 18200 6.75
NYSE,ASP,DECEMBER 12.525,12.675,12.42,12.675
NYSE,KEN,DECEMBER 12.60,12.60,12.55,12.58
NYSE,JPG,DECEMBER 12.425,12.69,12.425,12.575
我需要找到某个月某个符号的开盘价、高价、低价和收盘价的平均值,即我希望我的输出值是这样的
exchange stock_symbol date stock_price_open stock_price_high stock_price_low stock_price_close stock_volume stock_price_adj_close
NYSE ASP 2001-12-31 12.55 12.8 12.42 12.8 11300 6.91
NYSE ASP 2001-12-28 12.5 12.55 12.42 12.55 4800 6.78
NYSE KEN 2001-12-27 12.59 12.59 12.5 12.57 5400 6.79
NYSE JPG 2001-12-26 12.45 12.6 12.45 12.55 5400 6.78
NYSE KEN 2001-12-24 12.61 12.61 12.61 12.61 1400 6.76
NYSE JPG 2001-12-21 12.4 12.78 12.4 12.6 18200 6.75
NYSE,ASP,DECEMBER 12.525,12.675,12.42,12.675
NYSE,KEN,DECEMBER 12.60,12.60,12.55,12.58
NYSE,JPG,DECEMBER 12.425,12.69,12.425,12.575
为了做到这一点,我使用了一个用户定义的键,它将交换、符号和日期作为键
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class ComplexKey implements WritableComparable<ComplexKey>{
public String exchange;
public String symbol;
public String month;
public ComplexKey() {
// TODO Auto-generated constructor stub
}
public ComplexKey(String string, String string2, String string3) {
this.exchange=string;
this.symbol=string2;
this.month=string3;
}
@Override
public void readFields(DataInput arg0) throws IOException {
this.exchange=arg0.readUTF();
this.symbol=arg0.readUTF();
this.month=arg0.readUTF();
}
@Override
public void write(DataOutput arg0) throws IOException {
arg0.writeUTF(exchange);
arg0.writeUTF(symbol);
arg0.writeUTF(month);
}
@Override
public int compareTo(ComplexKey o) {
return this.toString().compareTo(o.toString());
}
public String getExchange() {
return exchange;
}
public String getMonth() {
return month;
}
public String getSymbol() {
return symbol;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return this.exchange+","+this.symbol+","+this.month;
}
@Override
public int hashCode() {
// TODO Auto-generated method stub
return this.exchange.length()*3+this.symbol.length()*2+this.month.hashCode()*9;
}
@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}
ComplexKey k=(ComplexKey)obj;
return exchange == k.exchange
&& (month == k.month
|| (month != null && month.equals(k.getMonth())))
&& (symbol == k.symbol
|| (symbol != null && symbol .equals(k.getSymbol())));
}
}
我的映射器以longwritable作为输入键,文本作为输入值,并输出复合键和复合值
import java.io.IOException;
import java.util.HashMap;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class StockMapper extends Mapper<LongWritable, Text, ComplexKey, ComplexValue>{
HashMap<Integer, String>month=new HashMap<Integer, String>();
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
month.put(1, "January");
month.put(2, "February");
month.put(3, "March");
month.put(4, "April");
month.put(5, "May");
month.put(6, "June");
month.put(7, "July");
month.put(8, "August");
month.put(9, "September");
month.put(10, "October");
month.put(11, "November");
month.put(12, "December");
}
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String []val=value.toString().split("\t");
if(isNumber(val[3])==true){
context.write(new ComplexKey(val[0],val[1],month.get(Integer.parseInt(val[2].split("-")[1]))),new ComplexValue(Double.parseDouble(val[3]),Double.parseDouble(val[4]),Double.parseDouble(val[5]),Double.parseDouble(val[6])));
}
}
public boolean isNumber(String n) {
try{
double i=Double.parseDouble(n);
}
catch(NumberFormatException e){
return false;
}
return true;
}
}
import java.io.IOException;
导入java.util.HashMap;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Mapper;
公共类StockMapper扩展了Mapper{
HashMapmonth=新建HashMap();
@凌驾
受保护的无效设置(上下文)
抛出IOException、InterruptedException{
月份。卖出价(1,“一月”);
月份(二月二日);
月份。卖出价(3,“三月”);
月份。卖出价(4月);
月。卖出期权(5月5日);
月份(6月6日);
月份(7月7日);
月份(8月8日);
月份(9月9日);
月份(10月10日);
月份(11月11日);
月份(12月12日);
}
@凌驾
受保护的void映射(可长写键、文本值、上下文)
抛出IOException、InterruptedException{
//TODO自动生成的方法存根
字符串[]val=value.toString().split(“\t”);
if(isNumber(val[3])==true){
context.write(新的ComplexKey(val[0],val[1],month.get(Integer.parseInt(val[2]),split(“-”[1])),新的ComplexValue(Double.parseDouble(val[3]),Double.parseDouble(val[4]),Double.parseDouble(val[5]),Double.parseDouble(val[6]);
}
}
公共布尔isNumber(字符串n){
试一试{
double i=double.parseDouble(n);
}
捕获(数字格式){
返回false;
}
返回true;
}
}
在reducer中,我使用reduce函数找到了平均值
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class StockReducer extends Reducer<ComplexKey, ComplexValue, Text, Text>{
@Override
protected void reduce(ComplexKey key, Iterable<ComplexValue> value,Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
double highPrice = 0;
double lowPrice = 0;
double openPrice = 0;
double closePrice = 0;
int count=0;
for (ComplexValue i:value){
++count;
highPrice+=i.getHighPrice();
lowPrice+=i.getLowPrice();
openPrice+=i.getOpenPrice();
closePrice+=i.getClosePrice();
}
context.write(new Text(key.toString()), new Text(openPrice/count+","+closePrice/count+","+highPrice/count+","+lowPrice/count+","+count));
}
}
import java.io.IOException;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Reducer;
公共类股票减持器{
@凌驾
受保护的void reduce(ComplexKey键、Iterable值、上下文)
抛出IOException、InterruptedException{
//TODO自动生成的方法存根
双高价格=0;
双低价=0;
双倍开盘价=0;
双倍收盘价=0;
整数计数=0;
for(ComplexValue i:值){
++计数;
highPrice+=i.getHighPrice();
lowPrice+=i.getLowPrice();
openPrice+=i.getOpenPrice();
closePrice+=i.getClosePrice();
}
write(新文本(key.toString()),新文本(openPrice/count+,“+closePrice/count+,“+highPrice/count+,“+lowPrice/count+,”+count));
}
}
我得到的不是期望的输出,而是未聚合的输出。我认为问题在于关键点,因为mapper无法正确区分关键点。有人能解释代码中的错误吗?在ComplexKey中,方法“equals”没有被覆盖。请尝试。仍然不起作用。我在这里错漏了代码中的那一部分