Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/364.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/hadoop/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java MapReduce Hadoop 2.4.1 Reducer未运行_Java_Hadoop_Mapreduce - Fatal编程技术网

Java MapReduce Hadoop 2.4.1 Reducer未运行

Java MapReduce Hadoop 2.4.1 Reducer未运行,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,由于某种原因,我的减速器似乎没有运转 我的司机是 import java.io.File; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hado

由于某种原因,我的减速器似乎没有运转

我的司机是

import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class PageRank {

    public static void main(String[] args) throws Exception {
        PageRank pageRanking = new PageRank();

        //In and Out dirs in HDFS

        pageRanking.runXmlParsing(args[0], args[1]);
        System.out.println("finished");

    }

    public void runXmlParsing(String inputPath, String outputPath) throws IOException {
        Configuration conf = new Configuration();
        conf.set(XmlInputFormat.START_TAG_KEY, "<page>");
        conf.set(XmlInputFormat.END_TAG_KEY, "</page>");

        Job job1 = Job.getInstance(conf);
        job1.setJarByClass(PageRank.class);

        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(Text.class);

        // Our class to parse links from content.
        job1.setMapperClass(WikiPageXMLMapper.class);
        job1.setReducerClass(WikiLinksReducer.class);

        job1.setInputFormatClass(XmlInputFormat.class);
        job1.setOutputFormatClass(TextOutputFormat.class);

        // Remove output if already exists
        FileSystem.getLocal(conf).delete(new Path(outputPath), true);

        FileInputFormat.setInputPaths(job1, new Path(inputPath));
        FileOutputFormat.setOutputPath(job1, new Path(outputPath));  

        System.out.println("BEFORE RUN");

        try {
            job1.waitForCompletion(true);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }    
    }

    public void deleteDir(File dir) {
        File[] files = dir.listFiles();

        for (File myFile: files) {
            if (myFile.isDirectory()) {  
                deleteDir(myFile);
            } 
            myFile.delete();

        }
    }
}
导入java.io.File;
导入java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.*;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
公共类PageRank{
公共静态void main(字符串[]args)引发异常{
PageRank pageRanking=新的PageRank();
//HDFS中的输入和输出目录
runXmlParsing(args[0],args[1]);
系统输出打印项次(“完成”);
}
公共void runXmlParsing(String inputPath,String outputPath)引发IOException{
Configuration conf=新配置();
conf.set(XmlInputFormat.START_TAG_KEY,“”);
conf.set(XmlInputFormat.END_TAG_KEY,“”);
Job job1=Job.getInstance(conf);
job1.setJarByClass(PageRank.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
//我们的类可以解析来自内容的链接。
job1.setMapperClass(WikiPageXMLMapper.class);
job1.setReducerClass(WikiLinksReducer.class);
job1.setInputFormatClass(XmlInputFormat.class);
job1.setOutputFormatClass(TextOutputFormat.class);
//删除已存在的输出
FileSystem.getLocal(conf.delete)(新路径(outputPath),true);
setInputPath(作业1,新路径(inputPath));
setOutputPath(job1,新路径(outputPath));
System.out.println(“运行前”);
试一试{
作业1.等待完成(真);
}catch(classnotfounde异常){
//TODO自动生成的捕捉块
e、 printStackTrace();
}捕捉(中断异常e){
//TODO自动生成的捕捉块
e、 printStackTrace();
}    
}
公共无效删除目录(文件目录){
File[]files=dir.listFiles();
用于(文件myFile:files){
如果(myFile.isDirectory()){
deleteDir(myFile);
} 
myFile.delete();
}
}
}
我的制图器是

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;


public class WikiPageXMLMapper extends Mapper<LongWritable, Text, Text, Text> {

    @Override
    public void map(LongWritable key, Text value, Context output) throws IOException {

        String[] titleAndText = parseTitleAndText(value.toString());

        String pageString = titleAndText[0];
        Text page = new Text(pageString.replace(' ', '_'));

        String[] parts = titleAndText[1].split("\\[\\[");

        String pages = "!@#$ ";
        for (int i = 1; i < parts.length; i++) {
            int lastIndexBrackets = parts[i].lastIndexOf("]]");
            // This checks and skips the first part of the outer link
            if (lastIndexBrackets == -1)
                continue;

            String insideLinkPlusExtra = parts[i].substring(0, lastIndexBrackets);
            int multipleClosingBrackets = insideLinkPlusExtra.indexOf("]]");

            String otherPage = insideLinkPlusExtra;

            if (multipleClosingBrackets != -1) {
                otherPage = insideLinkPlusExtra.substring(0, multipleClosingBrackets);
            }

            otherPage = otherPage.split("\\|")[0];
            otherPage = checkForDuplicates(otherPage, pages);
            otherPage = (otherPage.indexOf(":") == -1) ? otherPage : "";
            otherPage = (otherPage.indexOf("#") == -1) ? otherPage : "";
            otherPage = checkForSubpageLinks(otherPage);
            otherPage = checkForRedLink(otherPage);

            if (otherPage == "")
                continue;

            Text oP = new Text(otherPage.replace(' ', '_'));
            pages += oP + " ";

            // taking each outlink and making it its own key (ingraph)
            try {
                output.write(new Text(oP), new Text(page));
            } catch (InterruptedException e) {
                e.printStackTrace();
            }                   
        }

        // Designate this page as not a redlink
        try {
            output.write(new Text(page), new Text("!@#$"));
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return; 
    }
 }
import java.io.IOException;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.*;
公共类WikiPageXMLMapper扩展了映射器{
@凌驾
公共void映射(LongWritable键、文本值、上下文输出)引发IOException{
字符串[]titleAndText=parseTitleAndText(value.toString());
字符串pageString=titleAndText[0];
文本页=新文本(pageString.replace('''.'');
字符串[]部分=标题文本[1]。拆分(“\\[\\[”);
字符串页=“!@#$”;
对于(int i=1;i
我的减速机是:

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class WikiLinksReducer extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values, org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, Text>.Context output) throws IOException, InterruptedException {

        System.out.println("REDUCER");
        String links = "";        
        boolean isNotRedLink = false;

        System.out.println("Starting reduce");

        // Brett concern (and zach's): if n pages link to a redlink
        // we will iterate n times and it could be wasteful
        while(values.hasNext()){
            String v = values.next().toString();

            // Check first outlink is not #, if so, it is a redlink
            if (v.equals("!@#$")) {
                isNotRedLink = true;
                continue;

            } else {
                links += v;
                continue;
            }
        }

        // If the key is not a redlink, send it to the output
        if (isNotRedLink) {

            try {
                output.write(key, new Text(links));
                output.write(key, new Text("TESTING!"));
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            System.out.println(links);


        } else {

            System.out.println(output);
            try {
                output.write(key, new Text("BLEG"));
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            System.out.println(key + " IS A RED LINK");
            return;
        }
     }
}
import java.io.IOException;
导入java.util.Iterator;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Reducer;
公共类WikiLinksReducer扩展了Reducer{
public void reduce(文本键、迭代器值、org.apache.hadoop.mapreduce.Reducer.Context输出)抛出IOException、InterruptedException{
System.out.println(“减速机”);
字符串链接=”;
布尔值isNotRedLink=false;
System.out.println(“开始减少”);
//Brett关注点(和zach的):如果n个页面链接到一个redlink
//我们将迭代n次,这可能是浪费
while(values.hasNext()){
字符串v=值.next().toString();
//选中第一个大纲链接不是#,如果是,则它是一个红色链接
如果(v.equals(“!@#$”){
isNotRedLink=true;
持续
}否则{
链接+=v;
持续
}
}
//如果该键不是红色链接,则将其发送到输出
如果(isNotRedLink){
试一试{
输出.写入(键,新文本(链接));
写入(键,新文本(“测试”);
}捕捉(中断异常e){