Java 哪种映射程序代码的内存效率更高?
我想构建一个具有 输入: 按键1\t3,217412 | 553 | 1518619199 | 151863947 | 151865938 | 1518615517Java 哪种映射程序代码的内存效率更高?,java,mapreduce,out-of-memory,mapper,Java,Mapreduce,Out Of Memory,Mapper,我想构建一个具有 输入: 按键1\t3,217412 | 553 | 1518619199 | 151863947 | 151865938 | 1518615517 按键2\t925 | 10295 | 651825557344 | 7344925 | 102957344 |3,2 输出:min\t(2,3),它是value1的每个元素、value2的每个元素……和valueN之间的交点 所以,我设计了我的映射器 mapper1将包含键1、键2、键3的值之间的交点 mapper2将包含键4、键
按键2\t925 | 10295 | 651825557344 | 7344925 | 102957344 |3,2 输出:min\t(2,3),它是value1的每个元素、value2的每个元素……和valueN之间的交点 所以,我设计了我的映射器 mapper1将包含键1、键2、键3的值之间的交点 mapper2将包含键4、键5、键6的值之间的交点 然后,我的还原器再次从这些映射器获取结果,以找到最终的交点。所以,基本上我的映射器和reducer使用相同的代码。在我的代码中,我按顺序查找交点,即首先查找value1和value2之间的交点,然后使用结果与value3相交,依此类推 我的制图员 Mapper-Code1:
public static class MapAPP extends Mapper<Text, Text, Text, Text>{
public static int j=0,k=0;
public static List<String> min_pre = new ArrayList<>();
public static List<String> min_current = new ArrayList<>();
public static Set<String> min_p1 = new HashSet<>();
public static Set<String> min_c1 = new HashSet<>();
public static List<String> min_result = new ArrayList<>();
public static Boolean no_exist_min=false;
public void map(Text key, Text value, Context con) throws IOException, InterruptedException
{
String[] v=value.toString().split("\t");
// aggregate min
if (no_exist_min==false){
if (j==0){
min_pre= Arrays.asList(v[1].toString().trim().split("\\|"));
j=1;
}else{
min_current= Arrays.asList(v[1].toString().trim().split("\\|"));
for (String p: min_pre){
min_p1 = new HashSet<String>(Arrays.asList(p.split(",")));
for (String c: min_current){
min_c1 = new HashSet<String>(Arrays.asList(c.split(",")));
min_c1.retainAll(min_p1);
if (!min_c1.isEmpty()){
Joiner m_comma = Joiner.on(",").skipNulls();
String buff = m_comma.join(min_c1);
if (!min_result.contains(buff))
min_result.add(buff);
}
}
}
if (min_result.isEmpty()){
no_exist_min=true;
} else {
min_pre=new ArrayList(min_result);
min_result.clear();
}
}
}
}
protected void cleanup(Context con) throws IOException, InterruptedException {
Joiner m_pipe = Joiner.on("|").skipNulls();
if (no_exist_min==true){
con.write(new Text("min"), new Text("no_exist"));
}else {
String min_str = m_pipe.join(min_pre);
con.write(new Text("min"), new Text(min_str));
}
}
}
public static class MapAPP extends Mapper<Text, Text, Text, Text>{
public static int j=0,k=0;
public static List<String> min_pre = new ArrayList<>();
public static List<String> min_result = new ArrayList<>();
public static Boolean no_exist_min=false;
public void map(Text key, Text value, Context con) throws IOException, InterruptedException
{
String[] v=value.toString().split("\t");
// aggregate min
if (no_exist_min==false){
if (j==0){
min_pre= Arrays.asList(v[1].toString().trim().split("\\|"));
j=1;
}else{
List<String> min_current= Arrays.asList(v[1].toString().trim().split("\\|"));
for (String p: min_pre){
Set<String> min_p1 = new HashSet<String>(Arrays.asList(p.split(",")));
for (String c: min_current){
Set<String> min_c1 = new HashSet<String>(Arrays.asList(c.split(",")));
min_c1.retainAll(min_p1);
if (!min_c1.isEmpty()){
Joiner m_comma = Joiner.on(",").skipNulls();
String buff = m_comma.join(min_c1);
if (!min_result.contains(buff))
min_result.add(buff);
}
}
}
if (min_result.isEmpty()){
no_exist_min=true;
} else {
min_pre=new ArrayList(min_result);
min_result.clear();
}
}
}
}
protected void cleanup(Context con) throws IOException, InterruptedException {
Joiner m_pipe = Joiner.on("|").skipNulls();
if (no_exist_min==true){
con.write(new Text("min"), new Text("no_exist"));
}else {
String min_str = m_pipe.join(min_pre);
con.write(new Text("min"), new Text(min_str));
}
}
}
公共静态类MapAPP扩展映射程序{
公共静态int j=0,k=0;
public static List min_pre=new ArrayList();
public static List min_current=new ArrayList();
公共静态集min_p1=新HashSet();
公共静态集min_c1=新HashSet();
public static List min_result=new ArrayList();
公共静态布尔值no_exist_min=false;
公共void映射(文本键、文本值、上下文con)引发IOException、InterruptedException
{
字符串[]v=value.toString().split(“\t”);
//总最小值
if(不存在\u min==false){
如果(j==0){
minu pre=Arrays.asList(v[1].toString().trim().split(“\\\\\”);
j=1;
}否则{
min_current=Arrays.asList(v[1].toString().trim().split(“\\\\\”);
对于(字符串p:min_pre){
min_p1=新哈希集(Arrays.asList(p.split(“,”));
用于(字符串c:最小电流){
min_c1=新的哈希集(Arrays.asList(c.split(“,”));
最小值c1.保留值(最小值p1);
如果(!min_c1.isEmpty()){
Joiner m_逗号=Joiner.on(“,”).skipNulls();
字符串buff=m_逗号.join(min_c1);
如果(!min_result.contains(buff))
最小结果添加(buff);
}
}
}
if(minu result.isEmpty()){
不存在\u min=真;
}否则{
min_pre=新阵列列表(min_结果);
最小结果清除();
}
}
}
}
受保护的无效清理(上下文con)引发IOException、InterruptedException{
Joiner m|u pipe=Joiner.on(“|”)skipNulls();
if(不存在\u min==true){
con.write(新文本(“min”)、新文本(“不存在”);
}否则{
管柱最小长度=m_管道连接(最小长度前);
控写(新文本(“min”)、新文本(min_str));
}
}
}
我的减速器(与Mapper几乎相同):
公共静态类ReduceAPP扩展Reducer
{
公共void reduce(文本键、Iterable值、上下文con)引发IOException、InterruptedException
{
List pre=新建ArrayList();
当前列表=新的ArrayList();
Set p1=新的HashSet();
Set c1=新的HashSet();
列表结果=新建ArrayList();
Joiner逗号=Joiner.on(“,”).skipNulls();
细木工管=细木工.on(“|”).skipNulls();
布尔值no_exist=false;
int i=0;
//聚合
用于(文本值:值){
if(value.toString().trim()=“不存在”){
不存在=真;
打破
}
如果(i==0){
pre=Arrays.asList(value.toString().trim().split(“\\\\”);
i=1;
}否则{
当前=Arrays.asList(value.toString().trim().split(“\\\\”);
对于(字符串p:pre){
p1=新哈希集(Arrays.asList(p.split(“,”));
for(字符串c:当前){
c1=新哈希集(Arrays.asList(c.split(“,”));
c1.保留(p1);
如果(!c1.isEmpty()){
字符串buff=comma.join(c1);
如果(!result.contains(buff))
结果。添加(buff);
}
}
}
if(result.isEmpty()){
不存在=真;
打破
}
pre=新阵列列表(结果);
result.clear();
}
}
如果(不存在==真){
con.write(键,新文本(“不存在”);
}
否则{
管柱预紧=管道连接(预紧);
con.write(键,新文本(preStr));
}
}
公共静态集合并集(集合集合集合A,集合集合集合B){
设置tmp=新树集(setA);
tmp.addAll(setB);
返回tmp;
}
}
我在小的输入文件上运行得很好,但在大文件(约450Mb文本文件)中总是内存不足。因此,我怀疑我的java代码没有内存效率。在我的简化程序中,我使用了所有的局部变量,这些变量将被销毁
public static class MapAPP extends Mapper<Text, Text, Text, Text>{
public static int j=0,k=0;
public static List<String> min_pre = new ArrayList<>();
public static List<String> min_result = new ArrayList<>();
public static Boolean no_exist_min=false;
public void map(Text key, Text value, Context con) throws IOException, InterruptedException
{
String[] v=value.toString().split("\t");
// aggregate min
if (no_exist_min==false){
if (j==0){
min_pre= Arrays.asList(v[1].toString().trim().split("\\|"));
j=1;
}else{
List<String> min_current= Arrays.asList(v[1].toString().trim().split("\\|"));
for (String p: min_pre){
Set<String> min_p1 = new HashSet<String>(Arrays.asList(p.split(",")));
for (String c: min_current){
Set<String> min_c1 = new HashSet<String>(Arrays.asList(c.split(",")));
min_c1.retainAll(min_p1);
if (!min_c1.isEmpty()){
Joiner m_comma = Joiner.on(",").skipNulls();
String buff = m_comma.join(min_c1);
if (!min_result.contains(buff))
min_result.add(buff);
}
}
}
if (min_result.isEmpty()){
no_exist_min=true;
} else {
min_pre=new ArrayList(min_result);
min_result.clear();
}
}
}
}
protected void cleanup(Context con) throws IOException, InterruptedException {
Joiner m_pipe = Joiner.on("|").skipNulls();
if (no_exist_min==true){
con.write(new Text("min"), new Text("no_exist"));
}else {
String min_str = m_pipe.join(min_pre);
con.write(new Text("min"), new Text(min_str));
}
}
}