在Spring data hadoop上运行作业时出现问题
我已经使用Mahout创建了以下映射器和减速器在Spring data hadoop上运行作业时出现问题,spring,hadoop,spring-data,spring-data-hadoop,Spring,Hadoop,Spring Data,Spring Data Hadoop,我已经使用Mahout创建了以下映射器和减速器 package mypackage.ItemSimilarity; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hado
package mypackage.ItemSimilarity;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VarLongWritable;
public class ItemPrefMapper extends
Mapper<LongWritable, Text, VarLongWritable, VarLongWritable> {
private static final Pattern NUMBERS = Pattern.compile("(\\d+)");
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
Matcher m = NUMBERS.matcher(line);
m.find();
VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group()));
VarLongWritable itemID = new VarLongWritable();
while (m.find()) {
itemID.set(Long.parseLong(m.group()));
context.write(userID, itemID);
}
}
}
package mypackage.ItemSimilarity;
导入java.io.IOException;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.mahout.math.VarLongWritable;
公共类ItemPrefMapper扩展
制图员{
私有静态最终模式编号=Pattern.compile(“\\d+”);
@凌驾
公共void映射(可长写键、文本值、上下文)
抛出IOException、InterruptedException{
字符串行=value.toString();
匹配器m=数字。匹配器(线);
m、 查找();
VarLongWritable userID=新的VarLongWritable(Long.parseLong(m.group());
VarLongWritable itemID=新的VarLongWritable();
while(m.find()){
itemID.set(Long.parseLong(m.group());
write(userID,itemID);
}
}
}
降低等级
package mypackage.ItemSimilarity;
import java.io.IOException;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.VarLongWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class UserVectorReducer
extends
Reducer<VarLongWritable, VarLongWritable, VarLongWritable, VectorWritable> {
@Override
public void reduce(VarLongWritable userID,
Iterable<VarLongWritable> itemPrefs, Context context)
throws IOException, InterruptedException {
Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
for (VarLongWritable itemPref : itemPrefs) {
userVector.set((int) itemPref.get(), 1.0f);
}
context.write(userID, new VectorWritable(userVector));
}
}
package mypackage.ItemSimilarity;
导入java.io.IOException;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.mahout.math.RandomAccessSparseVector;
导入org.apache.mahout.math.VarLongWritable;
导入org.apache.mahout.math.Vector;
导入org.apache.mahout.math.VectorWritable;
公共类uservectoreducer
延伸
减速器{
@凌驾
public void reduce(VarLongWritable userID,
Iterable itempres,上下文)
抛出IOException、InterruptedException{
Vector userVector=新的随机访问sparSevector(Integer.MAX_值,100);
for(VarLongWritable itemPref:itemPrefs){
set((int)itemPref.get(),1.0f);
}
write(userID,newvectorWritable(userVector));
}
}
Spring配置以运行此
<job id="mahoutJob" input-path="/home/ubuntu/input/data.txt" output-path="/home/ubuntu/output"
mapper="mypackage.ItemSimilarity.ItemPrefMapper"
reducer="mypackage.ItemSimilarity.UserVectorReducer"
jar-by-class="mypackage.ItemSimilarity.ItemPrefMapper"/>
<job-runner id="myjob-runner" pre-action="setupScript" job-ref="mahoutJob"
run-at-startup="true"/>
当我运行这个时,我得到了以下错误。我已经扩展了Hadoop映射器类,但spring说它不是映射器类
java.lang.RuntimeException:class mypackage.ItemSimilarity.ItemPrefMapper非org.apache.hadoop.mapreduce.Mapper
位于org.apache.hadoop.conf.Configuration.setClass(Configuration.java:931)
位于org.apache.hadoop.mapreduce.Job.setMapperClass(Job.java:175)
位于org.springframework.data.hadoop.mapreduce.JobFactoryBean.AfterPropertieSet(JobFactoryBean.java:153)
位于org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1571)
位于org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1509)
位于org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
在org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)您确定您的jar by class元素吗?因为它应该指向一些类似main方法的东西,您可以在其中实例化ApplicationContext实例 另外,你确定你的包裹名称吗 com.threepillar.labs.ItemSimilarity.ItemPrefMapper 及 mypackage.ItemSimilarity.ItemPrefMapper
这不是一个问题,因为正在加载给定的映射器类,但它表示此映射器不是hadoop映射器,尽管我正在从hadoop映射器扩展映射器。@Tarunnappal还检查您的包名它只是在堆栈溢出时的输入错误,因此,我在编辑我的帖子时,也尝试过使用jar-by-class属性,但仍然遇到同样的问题