Java 在Hadoop 2.2.0中打开缓存文件_Java_Hadoop_Mapreduce_Yarn

Java 在Hadoop 2.2.0中打开缓存文件

java hadoop mapreduce

Java 在Hadoop 2.2.0中打开缓存文件,java,hadoop,mapreduce,yarn,Java,Hadoop,Mapreduce,Yarn,使用job.addCacheFile（）将缓存文件添加到作业中，并使用我的映射器使用context.getCacheFiles（）将其下拉后。如何打开缓存文件。我试过使用： BufferedReader reader=new BufferedReader（new FileReader（filename））（注释如下）其中filename是URI的toString（）。有人能帮我吗 import java.io.*; import java.net.*; import java.util.*;

使用

job.addCacheFile（）

将缓存文件添加到作业中，并使用我的映射器使用

context.getCacheFiles（）

将其下拉后。如何打开缓存文件。我试过使用：

BufferedReader reader=new BufferedReader（new FileReader（filename））（注释如下）
其中filename是URI
的toString（）。有人能帮我吗
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.chain.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;

public class UFOLocation2
{
    public static class MapClass extends Mapper<LongWritable, Text, Text, LongWritable>
    {
        private final static LongWritable one = new LongWritable(1);
        private static Pattern locationPattern = Pattern.compile("[a-zA-Z]{2}[^a-zA-Z]*$");

    private Map<String, String> stateNames;

    @Override
    public void setup(Context context)
    {
        try
        {
            URI[] cacheFiles = context.getCacheFiles();
            setupStateMap(cacheFiles[0].toString());
        }
        catch (IOException ioe)
        {
            System.err.println("Error reading state file.");
            ioe.printStackTrace();
            System.exit(1);
        }
    }

    public void map(LongWritable key, Text value, Context context) 
                    throws IOException, InterruptedException
    {
        String line = value.toString();
        String[] fields = line.split("\t");
        String location = fields[2].trim();

        if (location.length() >= 2)
        {
            Matcher matcher = locationPattern.matcher(location);
            if (matcher.find())
            {
                int start = matcher.start();
                String state = location.substring(start, start + 2);
                context.write(new Text(lookupState(state.toUpperCase())), one);
            }
        }
    }

    private void setupStateMap(String filename) throws IOException
    {
        Map<String, String> states = new HashMap<String, String>();
        // the following line causes an IOException
        BufferedReader reader = new BufferedReader(new FileReader(filename));
        String line = reader.readLine();
        while (line != null)
        {
            String[] split = line.split("\t");
            states.put(split[0], split[1]);
            line = reader.readLine();
        }
        stateNames = states;
    }

    private String lookupState(String state)
    {
        String fullName = stateNames.get(state);
        return fullName == null ? "Other" : fullName;
    }
}

public static void main(String[] args) throws Exception
{
    Configuration config = new Configuration();
    Job job = Job.getInstance(config, "UFO Location 2");
    job.setJarByClass(UFOLocation2.class);

    job.addCacheFile(new URI("/user/kevin/data/states.txt"));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    Configuration mapconf1 = new Configuration(false);
    ChainMapper.addMapper(job, UFORecordValidationMapper.class, LongWritable.class, 
                Text.class, LongWritable.class, Text.class, mapconf1);

    Configuration mapconf2 = new Configuration(false);
    ChainMapper.addMapper(job, MapClass.class, LongWritable.class, 
                Text.class, Text.class, LongWritable.class, mapconf2);

    job.setMapperClass(ChainMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);               
}
}

import java.io.*；
导入java.net。*；
导入java.util.*；
导入java.util.regex.*；
导入org.apache.hadoop.conf.*；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.*；
导入org.apache.hadoop.mapreduce.*；
导入org.apache.hadoop.mapreduce.lib.chain.*；
导入org.apache.hadoop.mapreduce.lib.input.*；
导入org.apache.hadoop.mapreduce.lib.output.*；
导入org.apache.hadoop.mapreduce.lib.reduce.*；
公共类UFOLocation2
{
公共静态类映射器类扩展映射器
{
private final static LongWritable one=新的LongWritable（1）；
私有静态模式locationPattern=Pattern.compile（“[a-zA-Z]{2}[^a-zA-Z]*$”；
私有地图州名；
@凌驾
公共无效设置（上下文）
{
尝试
{
URI[]cacheFiles=context.getCacheFiles（）；
setupStateMap（缓存文件[0].toString（））；
}
捕获（ioe异常ioe）
{
System.err.println（“读取状态文件时出错”）；
ioe.printStackTrace（）；
系统出口（1）；
}
}
公共void映射（可长写键、文本值、上下文）
抛出IOException、InterruptedException
{
字符串行=value.toString（）；
String[]fields=line.split（“\t”）；
字符串位置=字段[2]。trim（）；
if（location.length（）>=2）
{
Matcher Matcher=locationPattern.Matcher（位置）；
if（matcher.find（））
{
int start=matcher.start（）；
字符串状态=位置.子字符串（开始，开始+2）；
write（新文本（lookupState（state.toUpperCase（）），一个）；
}
}
}
私有void setupStateMap（字符串文件名）引发IOException
{
映射状态=新的HashMap（）；
//以下行导致IOException
BufferedReader reader=新的BufferedReader（新文件读取器（文件名））；
字符串行=reader.readLine（）；
while（行！=null）
{
String[]split=line.split（“\t”）；
出售（分割[0]，分割[1]）；
line=reader.readLine（）；
}
州名=州；
}
私有字符串lookupState（字符串状态）
{
字符串fullName=stateNames.get（state）；
返回fullName==null？“其他”：fullName；
}
}
公共静态void main（字符串[]args）引发异常
{
配置配置=新配置（）；
Job Job=Job.getInstance（配置，“UFO位置2”）；
job.setJarByClass（UFOLocation2.class）；
addCacheFile（新URI（“/user/kevin/data/states.txt”）；
job.setOutputKeyClass（Text.class）；
job.setOutputValueClass（LongWritable.class）；
Configuration mapconf1=新配置（错误）；
ChainMapper.addMapper（作业、UForeRecordValidationMapper.class、LongWritable.class、，
Text.class、LongWritable.class、Text.class、mapconf1）；
Configuration mapconf2=新配置（错误）；
ChainMapper.addMapper（作业，MapClass.class，LongWritable.class，
Text.class、Text.class、LongWritable.class、mapconf2）；
setMapperClass（ChainMapper.class）；
job.setCombinerClass（LongSumReducer.class）；
job.setReducerClass（LongSumReducer.class）；
addInputPath（作业，新路径（args[0]）；
setOutputPath（作业，新路径（args[1]）；
系统退出（作业等待完成（真）？0:1；
}
}

这是代码问题还是配置问题？我在一个假定为psuedo的分布式集群上运行
您可以使用类似的东西
    Path path = new Path(uri[0].getPath().toString());
    if (fileSystem.exists(path)) {
        FSDataInputStream dataInputStream = fileSystem.open(path);

        byte[] data = new byte[1024];
        while (dataInputStream.read(data) > 0) {
            //do your stuff here
        }

        dataInputStream.close();
    }