Java MapReduce Hadoop 2.4.1 Reducer未运行
由于某种原因,我的减速器似乎没有运转 我的司机是Java MapReduce Hadoop 2.4.1 Reducer未运行,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,由于某种原因,我的减速器似乎没有运转 我的司机是 import java.io.File; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hado
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class PageRank {
public static void main(String[] args) throws Exception {
PageRank pageRanking = new PageRank();
//In and Out dirs in HDFS
pageRanking.runXmlParsing(args[0], args[1]);
System.out.println("finished");
}
public void runXmlParsing(String inputPath, String outputPath) throws IOException {
Configuration conf = new Configuration();
conf.set(XmlInputFormat.START_TAG_KEY, "<page>");
conf.set(XmlInputFormat.END_TAG_KEY, "</page>");
Job job1 = Job.getInstance(conf);
job1.setJarByClass(PageRank.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
// Our class to parse links from content.
job1.setMapperClass(WikiPageXMLMapper.class);
job1.setReducerClass(WikiLinksReducer.class);
job1.setInputFormatClass(XmlInputFormat.class);
job1.setOutputFormatClass(TextOutputFormat.class);
// Remove output if already exists
FileSystem.getLocal(conf).delete(new Path(outputPath), true);
FileInputFormat.setInputPaths(job1, new Path(inputPath));
FileOutputFormat.setOutputPath(job1, new Path(outputPath));
System.out.println("BEFORE RUN");
try {
job1.waitForCompletion(true);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void deleteDir(File dir) {
File[] files = dir.listFiles();
for (File myFile: files) {
if (myFile.isDirectory()) {
deleteDir(myFile);
}
myFile.delete();
}
}
}
导入java.io.File;
导入java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.*;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
公共类PageRank{
公共静态void main(字符串[]args)引发异常{
PageRank pageRanking=新的PageRank();
//HDFS中的输入和输出目录
runXmlParsing(args[0],args[1]);
系统输出打印项次(“完成”);
}
公共void runXmlParsing(String inputPath,String outputPath)引发IOException{
Configuration conf=新配置();
conf.set(XmlInputFormat.START_TAG_KEY,“”);
conf.set(XmlInputFormat.END_TAG_KEY,“”);
Job job1=Job.getInstance(conf);
job1.setJarByClass(PageRank.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
//我们的类可以解析来自内容的链接。
job1.setMapperClass(WikiPageXMLMapper.class);
job1.setReducerClass(WikiLinksReducer.class);
job1.setInputFormatClass(XmlInputFormat.class);
job1.setOutputFormatClass(TextOutputFormat.class);
//删除已存在的输出
FileSystem.getLocal(conf.delete)(新路径(outputPath),true);
setInputPath(作业1,新路径(inputPath));
setOutputPath(job1,新路径(outputPath));
System.out.println(“运行前”);
试一试{
作业1.等待完成(真);
}catch(classnotfounde异常){
//TODO自动生成的捕捉块
e、 printStackTrace();
}捕捉(中断异常e){
//TODO自动生成的捕捉块
e、 printStackTrace();
}
}
公共无效删除目录(文件目录){
File[]files=dir.listFiles();
用于(文件myFile:files){
如果(myFile.isDirectory()){
deleteDir(myFile);
}
myFile.delete();
}
}
}
我的制图器是
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
public class WikiPageXMLMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
public void map(LongWritable key, Text value, Context output) throws IOException {
String[] titleAndText = parseTitleAndText(value.toString());
String pageString = titleAndText[0];
Text page = new Text(pageString.replace(' ', '_'));
String[] parts = titleAndText[1].split("\\[\\[");
String pages = "!@#$ ";
for (int i = 1; i < parts.length; i++) {
int lastIndexBrackets = parts[i].lastIndexOf("]]");
// This checks and skips the first part of the outer link
if (lastIndexBrackets == -1)
continue;
String insideLinkPlusExtra = parts[i].substring(0, lastIndexBrackets);
int multipleClosingBrackets = insideLinkPlusExtra.indexOf("]]");
String otherPage = insideLinkPlusExtra;
if (multipleClosingBrackets != -1) {
otherPage = insideLinkPlusExtra.substring(0, multipleClosingBrackets);
}
otherPage = otherPage.split("\\|")[0];
otherPage = checkForDuplicates(otherPage, pages);
otherPage = (otherPage.indexOf(":") == -1) ? otherPage : "";
otherPage = (otherPage.indexOf("#") == -1) ? otherPage : "";
otherPage = checkForSubpageLinks(otherPage);
otherPage = checkForRedLink(otherPage);
if (otherPage == "")
continue;
Text oP = new Text(otherPage.replace(' ', '_'));
pages += oP + " ";
// taking each outlink and making it its own key (ingraph)
try {
output.write(new Text(oP), new Text(page));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
// Designate this page as not a redlink
try {
output.write(new Text(page), new Text("!@#$"));
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return;
}
}
import java.io.IOException;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.*;
公共类WikiPageXMLMapper扩展了映射器{
@凌驾
公共void映射(LongWritable键、文本值、上下文输出)引发IOException{
字符串[]titleAndText=parseTitleAndText(value.toString());
字符串pageString=titleAndText[0];
文本页=新文本(pageString.replace('''.'');
字符串[]部分=标题文本[1]。拆分(“\\[\\[”);
字符串页=“!@#$”;
对于(int i=1;i
我的减速机是:
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WikiLinksReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, Text>.Context output) throws IOException, InterruptedException {
System.out.println("REDUCER");
String links = "";
boolean isNotRedLink = false;
System.out.println("Starting reduce");
// Brett concern (and zach's): if n pages link to a redlink
// we will iterate n times and it could be wasteful
while(values.hasNext()){
String v = values.next().toString();
// Check first outlink is not #, if so, it is a redlink
if (v.equals("!@#$")) {
isNotRedLink = true;
continue;
} else {
links += v;
continue;
}
}
// If the key is not a redlink, send it to the output
if (isNotRedLink) {
try {
output.write(key, new Text(links));
output.write(key, new Text("TESTING!"));
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(links);
} else {
System.out.println(output);
try {
output.write(key, new Text("BLEG"));
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(key + " IS A RED LINK");
return;
}
}
}
import java.io.IOException;
导入java.util.Iterator;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Reducer;
公共类WikiLinksReducer扩展了Reducer{
public void reduce(文本键、迭代器值、org.apache.hadoop.mapreduce.Reducer.Context输出)抛出IOException、InterruptedException{
System.out.println(“减速机”);
字符串链接=”;
布尔值isNotRedLink=false;
System.out.println(“开始减少”);
//Brett关注点(和zach的):如果n个页面链接到一个redlink
//我们将迭代n次,这可能是浪费
while(values.hasNext()){
字符串v=值.next().toString();
//选中第一个大纲链接不是#,如果是,则它是一个红色链接
如果(v.equals(“!@#$”){
isNotRedLink=true;
持续
}否则{
链接+=v;
持续
}
}
//如果该键不是红色链接,则将其发送到输出
如果(isNotRedLink){
试一试{
输出.写入(键,新文本(链接));
写入(键,新文本(“测试”);
}捕捉(中断异常e){