Java MapReduce Hadoop 2.4.1 Reducer未运行_Java_Hadoop_Mapreduce

Java MapReduce Hadoop 2.4.1 Reducer未运行

java hadoop mapreduce

Java MapReduce Hadoop 2.4.1 Reducer未运行,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,由于某种原因，我的减速器似乎没有运转我的司机是 import java.io.File; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hado

由于某种原因，我的减速器似乎没有运转

我的司机是

import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class PageRank {

    public static void main(String[] args) throws Exception {
        PageRank pageRanking = new PageRank();

        //In and Out dirs in HDFS

        pageRanking.runXmlParsing(args[0], args[1]);
        System.out.println("finished");

    }

    public void runXmlParsing(String inputPath, String outputPath) throws IOException {
        Configuration conf = new Configuration();
        conf.set(XmlInputFormat.START_TAG_KEY, "<page>");
        conf.set(XmlInputFormat.END_TAG_KEY, "</page>");

        Job job1 = Job.getInstance(conf);
        job1.setJarByClass(PageRank.class);

        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(Text.class);

        // Our class to parse links from content.
        job1.setMapperClass(WikiPageXMLMapper.class);
        job1.setReducerClass(WikiLinksReducer.class);

        job1.setInputFormatClass(XmlInputFormat.class);
        job1.setOutputFormatClass(TextOutputFormat.class);

        // Remove output if already exists
        FileSystem.getLocal(conf).delete(new Path(outputPath), true);

        FileInputFormat.setInputPaths(job1, new Path(inputPath));
        FileOutputFormat.setOutputPath(job1, new Path(outputPath));  

        System.out.println("BEFORE RUN");

        try {
            job1.waitForCompletion(true);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }    
    }

    public void deleteDir(File dir) {
        File[] files = dir.listFiles();

        for (File myFile: files) {
            if (myFile.isDirectory()) {  
                deleteDir(myFile);
            } 
            myFile.delete();

        }
    }
}

导入java.io.File；
导入java.io.IOException；
导入org.apache.hadoop.conf.Configuration；
导入org.apache.hadoop.fs.FileSystem；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.*；
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat；
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat；
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat；
公共类PageRank{
公共静态void main（字符串[]args）引发异常{
PageRank pageRanking=新的PageRank（）；
//HDFS中的输入和输出目录
runXmlParsing（args[0]，args[1]）；
系统输出打印项次（“完成”）；
}
公共void runXmlParsing（String inputPath，String outputPath）引发IOException{
Configuration conf=新配置（）；
conf.set（XmlInputFormat.START_TAG_KEY，“”）；
conf.set（XmlInputFormat.END_TAG_KEY，“”）；
Job job1=Job.getInstance（conf）；
job1.setJarByClass（PageRank.class）；
job1.setOutputKeyClass（Text.class）；
job1.setOutputValueClass（Text.class）；
//我们的类可以解析来自内容的链接。
job1.setMapperClass（WikiPageXMLMapper.class）；
job1.setReducerClass（WikiLinksReducer.class）；
job1.setInputFormatClass（XmlInputFormat.class）；
job1.setOutputFormatClass（TextOutputFormat.class）；
//删除已存在的输出
FileSystem.getLocal（conf.delete）（新路径（outputPath），true）；
setInputPath（作业1，新路径（inputPath））；
setOutputPath（job1，新路径（outputPath））；
System.out.println（“运行前”）；
试一试{
作业1.等待完成（真）；
}catch（classnotfounde异常）{
//TODO自动生成的捕捉块
e、 printStackTrace（）；
}捕捉（中断异常e）{
//TODO自动生成的捕捉块
e、 printStackTrace（）；
}    
}
公共无效删除目录（文件目录）{
File[]files=dir.listFiles（）；
用于（文件myFile:files）{
如果（myFile.isDirectory（））{
deleteDir（myFile）；
} 
myFile.delete（）；
}
}
}

我的制图器是

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;


public class WikiPageXMLMapper extends Mapper<LongWritable, Text, Text, Text> {

    @Override
    public void map(LongWritable key, Text value, Context output) throws IOException {

        String[] titleAndText = parseTitleAndText(value.toString());

        String pageString = titleAndText[0];
        Text page = new Text(pageString.replace(' ', '_'));

        String[] parts = titleAndText[1].split("\\[\\[");

        String pages = "!@#$ ";
        for (int i = 1; i < parts.length; i++) {
            int lastIndexBrackets = parts[i].lastIndexOf("]]");
            // This checks and skips the first part of the outer link
            if (lastIndexBrackets == -1)
                continue;

            String insideLinkPlusExtra = parts[i].substring(0, lastIndexBrackets);
            int multipleClosingBrackets = insideLinkPlusExtra.indexOf("]]");

            String otherPage = insideLinkPlusExtra;

            if (multipleClosingBrackets != -1) {
                otherPage = insideLinkPlusExtra.substring(0, multipleClosingBrackets);
            }

            otherPage = otherPage.split("\\|")[0];
            otherPage = checkForDuplicates(otherPage, pages);
            otherPage = (otherPage.indexOf(":") == -1) ? otherPage : "";
            otherPage = (otherPage.indexOf("#") == -1) ? otherPage : "";
            otherPage = checkForSubpageLinks(otherPage);
            otherPage = checkForRedLink(otherPage);

            if (otherPage == "")
                continue;

            Text oP = new Text(otherPage.replace(' ', '_'));
            pages += oP + " ";

            // taking each outlink and making it its own key (ingraph)
            try {
                output.write(new Text(oP), new Text(page));
            } catch (InterruptedException e) {
                e.printStackTrace();
            }                   
        }

        // Designate this page as not a redlink
        try {
            output.write(new Text(page), new Text("!@#$"));
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return; 
    }
 }

import java.io.IOException；
导入java.util.regex.Matcher；
导入java.util.regex.Pattern；
导入org.apache.hadoop.io.LongWritable；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.*；
公共类WikiPageXMLMapper扩展了映射器{
@凌驾
公共void映射（LongWritable键、文本值、上下文输出）引发IOException{
字符串[]titleAndText=parseTitleAndText（value.toString（））；
字符串pageString=titleAndText[0]；
文本页=新文本（pageString.replace（'''.''）；
字符串[]部分=标题文本[1]。拆分（“\\[\\[”）；
字符串页=“！@#$”；
对于（int i=1；i


我的减速机是：
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class WikiLinksReducer extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values, org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, Text>.Context output) throws IOException, InterruptedException {

        System.out.println("REDUCER");
        String links = "";        
        boolean isNotRedLink = false;

        System.out.println("Starting reduce");

        // Brett concern (and zach's): if n pages link to a redlink
        // we will iterate n times and it could be wasteful
        while(values.hasNext()){
            String v = values.next().toString();

            // Check first outlink is not #, if so, it is a redlink
            if (v.equals("!@#$")) {
                isNotRedLink = true;
                continue;

            } else {
                links += v;
                continue;
            }
        }

        // If the key is not a redlink, send it to the output
        if (isNotRedLink) {

            try {
                output.write(key, new Text(links));
                output.write(key, new Text("TESTING!"));
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            System.out.println(links);


        } else {

            System.out.println(output);
            try {
                output.write(key, new Text("BLEG"));
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            System.out.println(key + " IS A RED LINK");
            return;
        }
     }
}

import java.io.IOException；
导入java.util.Iterator；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.Reducer；
公共类WikiLinksReducer扩展了Reducer{
public void reduce（文本键、迭代器值、org.apache.hadoop.mapreduce.Reducer.Context输出）抛出IOException、InterruptedException{
System.out.println（“减速机”）；
字符串链接=”；
布尔值isNotRedLink=false；
System.out.println（“开始减少”）；
//Brett关注点（和zach的）：如果n个页面链接到一个redlink
//我们将迭代n次，这可能是浪费
while（values.hasNext（））{
字符串v=值.next（）.toString（）；
//选中第一个大纲链接不是#，如果是，则它是一个红色链接
如果（v.equals（“！@#$”）{
isNotRedLink=true；
持续
}否则{
链接+=v；
持续
}
}
//如果该键不是红色链接，则将其发送到输出
如果（isNotRedLink）{
试一试{
输出.写入（键，新文本（链接））；
写入（键，新文本（“测试”）；
}捕捉（中断异常e）{