Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/348.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/extjs/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何在Spark Java API中计算工资总额_Java_Apache Spark - Fatal编程技术网

如何在Spark Java API中计算工资总额

如何在Spark Java API中计算工资总额,java,apache-spark,Java,Apache Spark,我是SPARK的新手,我正在研究SPARK Java API。我有一份档案 1201, John, 2500 1202, Alex, 2800 1203, amith, 3900 1204, javed, 2300 1205, Saminga, 23000 现在我需要计算总工资并将其存储在一个文件中。因为我对MR/spark Java API非常陌生,所以我无法理解它。请任何人帮我解决这个问题 示例代码: import java.util.Arrays; import java.util.Co

我是SPARK的新手,我正在研究SPARK Java API。我有一份档案

1201, John, 2500
1202, Alex, 2800
1203, amith, 3900
1204, javed, 2300
1205, Saminga, 23000
现在我需要计算总工资并将其存储在一个文件中。因为我对MR/spark Java API非常陌生,所以我无法理解它。请任何人帮我解决这个问题

示例代码:

import java.util.Arrays;
import java.util.Comparator;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.DoubleFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;
public class SalarySum {

    public static void main(String[] args)
    {
        final int k=0;

        if(args.length<1)
        {
            System.out.println("Please provide  input files for processing");
            System.exit(0);
        }
        else
        {
            String inputFile=args[0];
            String outputFile=args[1];
            SparkConf config=new SparkConf().setAppName("Total Salary  Example");
            JavaSparkContext spartContext=new JavaSparkContext(config);

            JavaRDD<String> inputReader=spartContext.textFile(inputFile);

            JavaRDD<String> map=inputReader.flatMap(new FlatMapFunction<String, String>() {
                @Override
                public Iterable<String> call(String t) throws Exception
                {
                    System.out.println("Flat Map Data: "+t);
                    return Arrays.asList(t);
                }
            });

            JavaPairRDD<Integer, Iterable<String>> group=map.groupBy(new Function<String, Integer>() {

                @Override
                public Integer call(String s2) throws Exception
                {
                    String data=s2.split(",")[2].trim();
                    int value=Integer.parseInt(data);
                    System.out.println("Tuple: "+s2 +" : "+data);
                    return value;
                }
            });


            JavaPairRDD<Integer, Integer> totalSaleData = group.flatMapValues(new Function<Iterable<String>, Iterable<Integer>>() {

                @Override
                public Iterable<Integer> call(Iterable<String> v1)
                        throws Exception 
                {
                    int count=0;
                    for(String str:v1)
                    {
                        String data=str.split(",")[2].trim();
                        int value=Integer.parseInt(data);
                        System.out.println("Iterating Values : "+str);
                        System.out.println("Count: "+count);
                        count =count+value;
                    }
                    return Arrays.asList(count);
                }
            });

            totalSaleData.saveAsTextFile(outputFile);

        }
    }

}
导入java.util.array;
导入java.util.Comparator;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.javapairdd;
导入org.apache.spark.api.java.JavaRDD;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.api.java.function.DoubleFunction;
导入org.apache.spark.api.java.function.FlatMapFunction;
导入org.apache.spark.api.java.function.function;
导入org.apache.spark.api.java.function.Function2;
导入org.apache.spark.api.java.function.PairFunction;
导入scala.Tuple2;
公营工资总额{
公共静态void main(字符串[]args)
{
最终整数k=0;

如果(args.length您可以使用Spark 1.6按如下方式进行操作

public class SparkSalarySum {
public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("SparkSalarySum").setMaster("local[2]");
    JavaSparkContext jsc = new JavaSparkContext(conf);
    JavaRDD<String> lines = jsc.textFile("c:\\temp\\test.txt");
    JavaPairRDD<String, Integer> total = lines.flatMap(line -> Arrays.asList(Integer.parseInt(line.split(",")[2].trim())))
            .mapToPair(sal -> new Tuple2<String, Integer>("Total", sal))
            .reduceByKey((x, y) ->  x +  y);
    total.foreach(data -> {
        System.out.println(data._1()+"-"+data._2());
    });
    total.coalesce(1).saveAsTextFile("c:\\temp\\testOut");
    jsc.stop();
  }
}
公共类SparkSalarySum{
公共静态void main(字符串[]args){
SparkConf conf=new SparkConf().setAppName(“SparkSalarySum”).setMaster(“local[2]”);
JavaSparkContext jsc=新的JavaSparkContext(conf);
JavaRDD lines=jsc.textFile(“c:\\temp\\test.txt”);
javapairdd total=lines.flatMap(line->Arrays.asList(Integer.parseInt(line.split(“,”[2].trim()))
.mapToPair(sal->new Tuple2(“总计”,sal))
.reduceByKey((x,y)->x+y);
总计.foreach(数据->{
System.out.println(data._1()+“-”+data._2());
});
total.coalesce(1).saveAsTextFile(“c:\\temp\\testOut”);
jsc.stop();
}
}

您的输入文件是txt或csv?您想使用RDD而不是数据帧吗?文件中的预期输出是什么?我的输入文件是文本文件,我需要使用RDD,输出必须是完整工资的总和。您好,我们不能用JavaSparkContext API编写相同的代码。我使用的是Spark 1.6版本更新了Spark 1.6的答案。但是h使用此代码将结果存储到文本文件中时,我看不到任何将结果存储到文件中的方法更新了答案。请检查。您可以阅读Spark文档,因为它提供了Scala、Java、Python和R中的示例。您应该参考Spark安装中的Java示例文件夹。。