Java MapReduce作业挂起
我是Hadoop的MapReduce新手。我已经写了一个MapReduce任务,我正在本地机器上运行它。但这项工作在map 100%完成后仍然悬而未决 下面是代码,我不明白我遗漏了什么 我有一个自定义密钥类Java MapReduce作业挂起,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我是Hadoop的MapReduce新手。我已经写了一个MapReduce任务,我正在本地机器上运行它。但这项工作在map 100%完成后仍然悬而未决 下面是代码,我不明白我遗漏了什么 我有一个自定义密钥类 import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; public class AirlineMonthKey implements WritableComparable<A
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class AirlineMonthKey implements WritableComparable<AirlineMonthKey>{
Text airlineName;
Text month;
public AirlineMonthKey(){
super();
}
public AirlineMonthKey(Text airlineName, Text month) {
super();
this.airlineName = airlineName;
this.month = month;
}
public Text getAirlineName() {
return airlineName;
}
public void setAirlineName(Text airlineName) {
this.airlineName = airlineName;
}
public Text getMonth() {
return month;
}
public void setMonth(Text month) {
this.month = month;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.airlineName.readFields(in);
this.month.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
this.airlineName.write(out);
this.month.write(out);
}
@Override
public int compareTo(AirlineMonthKey airlineMonthKey) {
// TODO Auto-generated method stub
int diff = getAirlineName().compareTo(airlineMonthKey.getAirlineName());
if(diff != 0){
return diff;
}
int m1 = Integer.parseInt(getMonth().toString());
int m2 = Integer.parseInt(airlineMonthKey.getMonth().toString());
if(m1>m2){
return -1;
}
else
return 1;
}
}
import org.apache.hadoop.io.Text;
导入org.apache.hadoop.io.WritableComparable;
公共类AirlineMonthKey实现了可写可比性{
文本名称;
文字月;
蒙特基公共航空公司(){
超级();
}
公共航空公司蒙特基(文本航空公司名称,文本月份){
超级();
this.airlineName=airlineName;
本月=月;
}
公共文本getAirlineName(){
返回airlineName;
}
public void setAirlineName(文本airlineName){
this.airlineName=airlineName;
}
公共文本getMonth(){
返回月份;
}
公共无效设置月(文本月){
本月=月;
}
@凌驾
public void readFields(DataInput in)引发IOException{
//TODO自动生成的方法存根
this.airlineName.readFields(in);
本.month.readFields(in);
}
@凌驾
public void write(DataOutput out)引发IOException{
//TODO自动生成的方法存根
这个.airlineName.write(out);
这个月写出来;
}
@凌驾
公共国际比较(班机蒙特基班机蒙特基班机){
//TODO自动生成的方法存根
int diff=getAirlineName().compareTo(airlineMonthKey.getAirlineName());
如果(差异!=0){
返回差;
}
int m1=Integer.parseInt(getMonth().toString());
int m2=Integer.parseInt(airlineMonthKey.getMonth().toString());
如果(m1>m2){
返回-1;
}
其他的
返回1;
}
}
以及使用自定义键的mapper和reducer类,如下所示
package com.mapresuce.secondarysort;
import java.io.IOException;
import java.io.StringReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import com.opencsv.CSVReader;
public class FlightDelayByMonth {
public static class FlightDelayByMonthMapper extends
Mapper<Object, Text, AirlineMonthKey, Text> {
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String str = value.toString();
// Reading Line one by one from the input CSV.
CSVReader reader = new CSVReader(new StringReader(str));
String[] split = reader.readNext();
reader.close();
String airlineName = split[6];
String month = split[2];
String year = split[0];
String delayMinutes = split[37];
String cancelled = split[41];
if (!(airlineName.equals("") || month.equals("") || delayMinutes
.equals(""))) {
if (year.equals("2008") && cancelled.equals("0.00")) {
AirlineMonthKey airlineMonthKey = new AirlineMonthKey(
new Text(airlineName), new Text(month));
Text delay = new Text(delayMinutes);
context.write(airlineMonthKey, delay);
System.out.println("1");
}
}
}
}
public static class FlightDelayByMonthReducer extends
Reducer<AirlineMonthKey, Text, Text, Text> {
public void reduce(AirlineMonthKey key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
for(Text val : values){
context.write(new Text(key.getAirlineName().toString()+" "+key.getMonth().toString()), val);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage:<in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Average monthly flight dealy");
job.setJarByClass(FlightDelayByMonth.class);
job.setMapperClass(FlightDelayByMonthMapper.class);
job.setReducerClass(FlightDelayByMonthReducer.class);
job.setOutputKeyClass(AirlineMonthKey.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
package com.mapresuce.secondarysort;
导入java.io.IOException;
导入java.io.StringReader;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.util.GenericOptionsParser;
导入com.opencsv.CSVReader;
公务舱航班按月延迟{
公共静态类FlightDelayByMonthMapper扩展
制图员{
公共无效映射(对象键、文本值、上下文)
抛出IOException、InterruptedException{
字符串str=value.toString();
//从输入CSV中逐行读取。
CSVReader reader=新CSVReader(新StringReader(str));
String[]split=reader.readNext();
reader.close();
字符串airlineName=split[6];
字符串月份=拆分[2];
字符串年份=拆分[0];
字符串delayMinutes=split[37];
字符串取消=拆分[41];
如果(!(airlineName.equals(“”)|月.equals(“”)|延迟分钟
.等于(“”){
如果(年份等于(“2008”)&取消等于(“0.00”)){
AirlineMonthKey AirlineMonthKey=新AirlineMonthKey(
新文本(airlineName),新文本(month);
文本延迟=新文本(延迟分钟);
context.write(airlineMonthKey,delay);
系统输出打印项次(“1”);
}
}
}
}
公共静态类FlightDelayByMonthReducer扩展
减速器{
公共空间减少(AirlineMonthKey键,Iterable值,
上下文)抛出IOException、InterruptedException{
用于(文本值:值){
write(新文本(key.getAirlineName().toString()+“”+key.getMonth().toString()),val);
}
}
}
公共静态void main(字符串[]args)引发IOException、ClassNotFoundException、InterruptedException{
Configuration conf=新配置();
String[]otherArgs=新的GenericOptionsParser(conf,args)
.getremainargs();
if(otherArgs.length!=2){
System.err.println(“用法:”);
系统出口(2);
}
Job Job=新Job(形态,“每月平均航班日”);
job.setJarByClass(FlightDelayByMonth.class);
setMapperClass(FlightDelayByMonthMapper.class);
job.setReducerClass(FlightDelayByMonthReducer.class);
job.setOutputKeyClass(AirlineMonthKey.class);
job.setOutputValueClass(Text.class);
addInputPath(作业,新路径(其他参数[0]);
setOutputPath(作业,新路径(其他参数[1]);
系统退出(作业等待完成(真)?0:1;
}
}
此外,我还创建了一个作业和主配置。不知道我错过了什么。我正在本地环境中运行所有这些操作。尝试在AirlineMonthKey类中编写toString、equals和hashcode的自定义实现 阅读下面的链接 键类型实现hashCode()非常重要
希望这能对您有所帮助。问题是我必须在AirlineMonthKey中使用默认构造函数(我使用了),并在自定义key类中初始化实例变量(我没有)