Java 哪种映射程序代码的内存效率更高?

Java 哪种映射程序代码的内存效率更高?,java,mapreduce,out-of-memory,mapper,Java,Mapreduce,Out Of Memory,Mapper,我想构建一个具有 输入: 按键1\t3,217412 | 553 | 1518619199 | 151863947 | 151865938 | 1518615517 按键2\t925 | 10295 | 651825557344 | 7344925 | 102957344 |3,2 输出:min\t(2,3),它是value1的每个元素、value2的每个元素……和valueN之间的交点 所以,我设计了我的映射器 mapper1将包含键1、键2、键3的值之间的交点 mapper2将包含键4、键

我想构建一个具有

输入

按键1\t3,217412 | 553 | 1518619199 | 151863947 | 151865938 | 1518615517
按键2\t925 | 10295 | 651825557344 | 7344925 | 102957344 |3,2

输出:min\t(2,3),它是value1的每个元素、value2的每个元素……和valueN之间的交点

所以,我设计了我的映射器

mapper1将包含键1、键2、键3的值之间的交点

mapper2将包含键4、键5、键6的值之间的交点

然后,我的还原器再次从这些映射器获取结果,以找到最终的交点。所以,基本上我的映射器和reducer使用相同的代码。在我的代码中,我按顺序查找交点,即首先查找value1和value2之间的交点,然后使用结果与value3相交,依此类推

我的制图员

Mapper-Code1:

public static class MapAPP extends Mapper<Text, Text, Text, Text>{     
    public static int j=0,k=0;
    public static List<String> min_pre = new ArrayList<>();
    public static List<String> min_current = new ArrayList<>();
    public static Set<String> min_p1 = new HashSet<>();
    public static Set<String> min_c1 = new HashSet<>();
    public static List<String> min_result = new ArrayList<>(); 
    public static Boolean no_exist_min=false;

    public void map(Text key, Text value, Context con) throws IOException, InterruptedException
    {
        String[] v=value.toString().split("\t");
        // aggregate min
        if (no_exist_min==false){
            if (j==0){
                    min_pre= Arrays.asList(v[1].toString().trim().split("\\|"));
                    j=1;
                 }else{
                    min_current= Arrays.asList(v[1].toString().trim().split("\\|")); 
                    for (String p: min_pre){                   
                       min_p1 = new HashSet<String>(Arrays.asList(p.split(",")));
                       for (String c: min_current){
                           min_c1 = new HashSet<String>(Arrays.asList(c.split(",")));
                           min_c1.retainAll(min_p1);
                           if (!min_c1.isEmpty()){
                               Joiner m_comma = Joiner.on(",").skipNulls();
                               String buff = m_comma.join(min_c1);
                               if (!min_result.contains(buff))
                                    min_result.add(buff);
                           }                       
                       }                   
                    }
                    if (min_result.isEmpty()){
                        no_exist_min=true;          
                    } else {                    
                        min_pre=new ArrayList(min_result);
                        min_result.clear();                       
                    }
            }                   
        }            
    }

    protected void cleanup(Context con) throws IOException, InterruptedException {
        Joiner m_pipe = Joiner.on("|").skipNulls();
        if (no_exist_min==true){
            con.write(new Text("min"), new Text("no_exist"));
        }else {               
            String min_str = m_pipe.join(min_pre);
            con.write(new Text("min"), new Text(min_str)); 
        }            
    }
}
public static class MapAPP extends Mapper<Text, Text, Text, Text>{     
    public static int j=0,k=0;
    public static List<String> min_pre = new ArrayList<>();
    public static List<String> min_result = new ArrayList<>(); 
    public static Boolean no_exist_min=false;

    public void map(Text key, Text value, Context con) throws IOException, InterruptedException
    {
        String[] v=value.toString().split("\t");
        // aggregate min
        if (no_exist_min==false){
            if (j==0){
                    min_pre= Arrays.asList(v[1].toString().trim().split("\\|"));
                    j=1;
                 }else{
                    List<String> min_current= Arrays.asList(v[1].toString().trim().split("\\|")); 
                    for (String p: min_pre){                   
                       Set<String> min_p1 = new HashSet<String>(Arrays.asList(p.split(",")));
                       for (String c: min_current){
                           Set<String> min_c1 = new HashSet<String>(Arrays.asList(c.split(",")));
                           min_c1.retainAll(min_p1);
                           if (!min_c1.isEmpty()){
                               Joiner m_comma = Joiner.on(",").skipNulls();
                               String buff = m_comma.join(min_c1);
                               if (!min_result.contains(buff))
                                    min_result.add(buff);
                           }                       
                       }                   
                    }
                    if (min_result.isEmpty()){
                        no_exist_min=true;          
                    } else {                    
                        min_pre=new ArrayList(min_result);
                        min_result.clear();                       
                    }
            }                   
        }            
    }

    protected void cleanup(Context con) throws IOException, InterruptedException {
        Joiner m_pipe = Joiner.on("|").skipNulls();
        if (no_exist_min==true){
            con.write(new Text("min"), new Text("no_exist"));
        }else {               
            String min_str = m_pipe.join(min_pre);
            con.write(new Text("min"), new Text(min_str)); 
        }            
    }
}
公共静态类MapAPP扩展映射程序{
公共静态int j=0,k=0;
public static List min_pre=new ArrayList();
public static List min_current=new ArrayList();
公共静态集min_p1=新HashSet();
公共静态集min_c1=新HashSet();
public static List min_result=new ArrayList();
公共静态布尔值no_exist_min=false;
公共void映射(文本键、文本值、上下文con)引发IOException、InterruptedException
{
字符串[]v=value.toString().split(“\t”);
//总最小值
if(不存在\u min==false){
如果(j==0){
minu pre=Arrays.asList(v[1].toString().trim().split(“\\\\\”);
j=1;
}否则{
min_current=Arrays.asList(v[1].toString().trim().split(“\\\\\”);
对于(字符串p:min_pre){
min_p1=新哈希集(Arrays.asList(p.split(“,”));
用于(字符串c:最小电流){
min_c1=新的哈希集(Arrays.asList(c.split(“,”));
最小值c1.保留值(最小值p1);
如果(!min_c1.isEmpty()){
Joiner m_逗号=Joiner.on(“,”).skipNulls();
字符串buff=m_逗号.join(min_c1);
如果(!min_result.contains(buff))
最小结果添加(buff);
}                       
}                   
}
if(minu result.isEmpty()){
不存在\u min=真;
}否则{
min_pre=新阵列列表(min_结果);
最小结果清除();
}
}                   
}            
}
受保护的无效清理(上下文con)引发IOException、InterruptedException{
Joiner m|u pipe=Joiner.on(“|”)skipNulls();
if(不存在\u min==true){
con.write(新文本(“min”)、新文本(“不存在”);
}否则{
管柱最小长度=m_管道连接(最小长度前);
控写(新文本(“min”)、新文本(min_str));
}            
}
}
我的减速器(与Mapper几乎相同):

公共静态类ReduceAPP扩展Reducer
{
公共void reduce(文本键、Iterable值、上下文con)引发IOException、InterruptedException
{
List pre=新建ArrayList();
当前列表=新的ArrayList();
Set p1=新的HashSet();
Set c1=新的HashSet();
列表结果=新建ArrayList();
Joiner逗号=Joiner.on(“,”).skipNulls();
细木工管=细木工.on(“|”).skipNulls();
布尔值no_exist=false;
int i=0;
//聚合
用于(文本值:值){
if(value.toString().trim()=“不存在”){
不存在=真;
打破
}
如果(i==0){
pre=Arrays.asList(value.toString().trim().split(“\\\\”);
i=1;
}否则{
当前=Arrays.asList(value.toString().trim().split(“\\\\”);
对于(字符串p:pre){
p1=新哈希集(Arrays.asList(p.split(“,”));
for(字符串c:当前){
c1=新哈希集(Arrays.asList(c.split(“,”));
c1.保留(p1);
如果(!c1.isEmpty()){
字符串buff=comma.join(c1);
如果(!result.contains(buff))
结果。添加(buff);
}                       
}                   
}
if(result.isEmpty()){
不存在=真;
打破
}
pre=新阵列列表(结果);
result.clear();
}                   
}
如果(不存在==真){
con.write(键,新文本(“不存在”);
}
否则{
管柱预紧=管道连接(预紧);
con.write(键,新文本(preStr));
}            
}
公共静态集合并集(集合集合集合A,集合集合集合B){
设置tmp=新树集(setA);
tmp.addAll(setB);
返回tmp;
}
}    
我在小的输入文件上运行得很好,但在大文件(约450Mb文本文件)中总是内存不足。因此,我怀疑我的java代码没有内存效率。在我的简化程序中,我使用了所有的局部变量,这些变量将被销毁
public static class MapAPP extends Mapper<Text, Text, Text, Text>{     
    public static int j=0,k=0;
    public static List<String> min_pre = new ArrayList<>();
    public static List<String> min_result = new ArrayList<>(); 
    public static Boolean no_exist_min=false;

    public void map(Text key, Text value, Context con) throws IOException, InterruptedException
    {
        String[] v=value.toString().split("\t");
        // aggregate min
        if (no_exist_min==false){
            if (j==0){
                    min_pre= Arrays.asList(v[1].toString().trim().split("\\|"));
                    j=1;
                 }else{
                    List<String> min_current= Arrays.asList(v[1].toString().trim().split("\\|")); 
                    for (String p: min_pre){                   
                       Set<String> min_p1 = new HashSet<String>(Arrays.asList(p.split(",")));
                       for (String c: min_current){
                           Set<String> min_c1 = new HashSet<String>(Arrays.asList(c.split(",")));
                           min_c1.retainAll(min_p1);
                           if (!min_c1.isEmpty()){
                               Joiner m_comma = Joiner.on(",").skipNulls();
                               String buff = m_comma.join(min_c1);
                               if (!min_result.contains(buff))
                                    min_result.add(buff);
                           }                       
                       }                   
                    }
                    if (min_result.isEmpty()){
                        no_exist_min=true;          
                    } else {                    
                        min_pre=new ArrayList(min_result);
                        min_result.clear();                       
                    }
            }                   
        }            
    }

    protected void cleanup(Context con) throws IOException, InterruptedException {
        Joiner m_pipe = Joiner.on("|").skipNulls();
        if (no_exist_min==true){
            con.write(new Text("min"), new Text("no_exist"));
        }else {               
            String min_str = m_pipe.join(min_pre);
            con.write(new Text("min"), new Text(min_str)); 
        }            
    }
}