Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/392.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
java服务-spark通信_Java_Rest_Apache Spark - Fatal编程技术网

java服务-spark通信

java服务-spark通信,java,rest,apache-spark,Java,Rest,Apache Spark,我是spark的新手,正在寻找一些指导:) 我有一个java服务,它充当我的应用程序的后端。我从UI中得到一些临时查询,我发现其中一些查询需要花费很多时间。所以,我决定搬到spark去完成这些任务。但我一直在研究如何从java与ApacheSpark建立通信 我看到了其他的SO问题,看起来Ooyala的Spark Job Server解决了我的问题。我只是想知道是否还有其他方法可以解决我的问题。这是在java中使用spark提供的示例代码,您可以在spark中输入后编写代码,因为我在这里使用了预

我是spark的新手,正在寻找一些指导:)

我有一个java服务,它充当我的应用程序的后端。我从UI中得到一些临时查询,我发现其中一些查询需要花费很多时间。所以,我决定搬到spark去完成这些任务。但我一直在研究如何从java与ApacheSpark建立通信


我看到了其他的SO问题,看起来Ooyala的Spark Job Server解决了我的问题。我只是想知道是否还有其他方法可以解决我的问题。

这是在java中使用spark提供的示例代码,您可以在spark中输入后编写代码,因为我在这里使用了预构建的KMean集群库,您可以使用您的

import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.clustering.KMeans;
import org.apache.spark.mllib.clustering.KMeansModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;

public final class Spark_KMeans {
private static class ParsePoint implements Function<String, Vector> {
private static final Pattern SPACE = Pattern.compile(" ");

@Override
public Vector call(String line) {
String[] tok = SPACE.split(line);
double[] point = new double[tok.length];
for (int i = 0; i < tok.length; ++i) {
point[i] = Double.parseDouble(tok[i]);
}
return Vectors.dense(point);
}
}

public static void main(String[] args1) {

String[] args = { "/usr/spark_pack/spark_input", "3", "5" };

if (args.length < 3) {
System.err
.println("Usage: JavaKMeans <input_file> <k> <max_iterations> [<runs>]");
System.exit(1);
}
String inputFile = args[0];
int k = Integer.parseInt(args[1]);
int iterations = Integer.parseInt(args[2]);
int runs = 1;
if (args.length >= 4) {
runs = Integer.parseInt(args[3]);
}

String sparkHome = "/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/";
String sparkMasterUrl = "spark://master:7077";

String jarFile1 = "/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/lib/spark-assembly-1.3.0-hadoop2.4.0.jar";
String jarFile2 = "/usr/spark_pack/spark_jar/spark_mlib.jar";

/*
* JavaStreamingContext ssc = new JavaStreamingContext(sparkMasterUrl,
* "Kshitij Stream Engine", new Duration(1000), sparkHome);
*/
SparkConf conf = new SparkConf().setAppName("Log Analyzer SQL")
.setMaster(sparkMasterUrl).setSparkHome(sparkHome)
.setJars(new String[] { jarFile1, jarFile2 })
.setAppName("JavaKMeans");

JavaSparkContext sc = new JavaSparkContext(conf);

/*
* SparkConf sparkConf = new SparkConf().setAppName("JavaKMeans");
* JavaSparkContext sc = new JavaSparkContext(sparkConf);
*/

JavaRDD<String> lines = sc.textFile(inputFile);

//****This portion is my algo part*************************
JavaRDD<Vector> points = lines.map(new ParsePoint());
KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs,
KMeans.K_MEANS_PARALLEL());
System.out.println("Cluster centers:");
for (Vector center : model.clusterCenters()) {
System.out.println(" " + center);
}
double cost = model.computeCost(points.rdd());
System.out.println("Cost: " + cost);
sc.stop();
}
}
import java.util.regex.Pattern;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaRDD;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.api.java.function.function;
导入org.apache.spark.mllib.clustering.KMeans;
导入org.apache.spark.mllib.clustering.KMeansModel;
导入org.apache.spark.mllib.linalg.Vector;
导入org.apache.spark.mllib.linalg.Vectors;
公共期末班火花线{
私有静态类ParsePoint实现函数{
私有静态最终模式空间=Pattern.compile(“”);
@凌驾
公共向量调用(字符串行){
字符串[]tok=空格。拆分(行);
double[]点=新的double[tok.长度];
对于(int i=0;i=4){
runs=Integer.parseInt(args[3]);
}
字符串sparkHome=“/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/”;
字符串sparkMasterUrl=”spark://master:7077";
字符串jarFile1=“/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/lib/spark-assembly-1.3.0-hadoop2.4.0.jar”;
字符串jarFile2=“/usr/spark\u pack/spark\u jar/spark\u mlib.jar”;
/*
*JavaStreamingContext ssc=新的JavaStreamingContext(sparkMasterUrl,
*“Kshitij流引擎”,新的持续时间(1000),sparkHome);
*/
SparkConf conf=new SparkConf().setAppName(“日志分析器SQL”)
.setMaster(sparkMasterUrl).setSparkHome(sparkHome)
.setJars(新字符串[]{jarFile1,jarFile2})
.setAppName(“JavaKMeans”);
JavaSparkContext sc=新的JavaSparkContext(conf);
/*
*SparkConf SparkConf=new SparkConf().setAppName(“JavaKMeans”);
*JavaSparkContext sc=新的JavaSparkContext(sparkConf);
*/
javarddlines=sc.textFile(inputFile);
//****这部分是我的算法部分*************************
JavaRDD points=lines.map(新的ParsePoint());
KMeansModel model=KMeans.train(points.rdd(),k,迭代,运行,
K_的意思是并行();
System.out.println(“集群中心:”);
对于(向量中心:model.clusterCenters()){
系统输出打印项次(“+中心);
}
double cost=model.computeCost(points.rdd());
系统输出打印项次(“成本:+成本);
sc.停止();
}
}

以下是在java中使用spark时提供的示例代码,您可以在spark中输入后编写代码,因为我在这里使用了预构建的KMean集群库,您可以使用您的

import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.clustering.KMeans;
import org.apache.spark.mllib.clustering.KMeansModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;

public final class Spark_KMeans {
private static class ParsePoint implements Function<String, Vector> {
private static final Pattern SPACE = Pattern.compile(" ");

@Override
public Vector call(String line) {
String[] tok = SPACE.split(line);
double[] point = new double[tok.length];
for (int i = 0; i < tok.length; ++i) {
point[i] = Double.parseDouble(tok[i]);
}
return Vectors.dense(point);
}
}

public static void main(String[] args1) {

String[] args = { "/usr/spark_pack/spark_input", "3", "5" };

if (args.length < 3) {
System.err
.println("Usage: JavaKMeans <input_file> <k> <max_iterations> [<runs>]");
System.exit(1);
}
String inputFile = args[0];
int k = Integer.parseInt(args[1]);
int iterations = Integer.parseInt(args[2]);
int runs = 1;
if (args.length >= 4) {
runs = Integer.parseInt(args[3]);
}

String sparkHome = "/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/";
String sparkMasterUrl = "spark://master:7077";

String jarFile1 = "/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/lib/spark-assembly-1.3.0-hadoop2.4.0.jar";
String jarFile2 = "/usr/spark_pack/spark_jar/spark_mlib.jar";

/*
* JavaStreamingContext ssc = new JavaStreamingContext(sparkMasterUrl,
* "Kshitij Stream Engine", new Duration(1000), sparkHome);
*/
SparkConf conf = new SparkConf().setAppName("Log Analyzer SQL")
.setMaster(sparkMasterUrl).setSparkHome(sparkHome)
.setJars(new String[] { jarFile1, jarFile2 })
.setAppName("JavaKMeans");

JavaSparkContext sc = new JavaSparkContext(conf);

/*
* SparkConf sparkConf = new SparkConf().setAppName("JavaKMeans");
* JavaSparkContext sc = new JavaSparkContext(sparkConf);
*/

JavaRDD<String> lines = sc.textFile(inputFile);

//****This portion is my algo part*************************
JavaRDD<Vector> points = lines.map(new ParsePoint());
KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs,
KMeans.K_MEANS_PARALLEL());
System.out.println("Cluster centers:");
for (Vector center : model.clusterCenters()) {
System.out.println(" " + center);
}
double cost = model.computeCost(points.rdd());
System.out.println("Cost: " + cost);
sc.stop();
}
}
import java.util.regex.Pattern;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaRDD;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.api.java.function.function;
导入org.apache.spark.mllib.clustering.KMeans;
导入org.apache.spark.mllib.clustering.KMeansModel;
导入org.apache.spark.mllib.linalg.Vector;
导入org.apache.spark.mllib.linalg.Vectors;
公共期末班火花线{
私有静态类ParsePoint实现函数{
私有静态最终模式空间=Pattern.compile(“”);
@凌驾
公共向量调用(字符串行){
字符串[]tok=空格。拆分(行);
double[]点=新的double[tok.长度];
对于(int i=0;i=4){
runs=Integer.parseInt(args[3]);
}
字符串sparkHome=“/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/”;
字符串sparkMasterUrl=”spark://master:7077";
字符串jarFile1=“/usr/spark_pack/spark-1.3.0-bin-hadoop2.4/lib/spark-assembly-1.3.0-hadoop2.4.0.jar”;
字符串jarFile2=“/usr/spark\u pack/spark\u jar/spark\u mlib.jar”;
/*
*JavaStreamingContext ssc=新的JavaStreamingContext(sparkMasterUrl,
*“Kshitij流引擎”,新的持续时间(1000),sparkHome);
*/
SparkConf conf=new SparkConf().setAppName(“日志分析器SQL”)
.setMaster(sparkMasterUrl).setSparkHome(sparkHome)
.setJars(新字符串[]{jarFile1,jarFile2})
.setAppName(“JavaKMeans”);
JavaSparkContext sc=新的JavaSparkContext(conf);
/*
*SparkConf SparkConf=new SparkConf().setAppName(“JavaKMeans”);
*JavaSparkContext sc=新的JavaSparkContext(sparkConf);
*/
javarddlines=sc.textFile(inputFile);
//****这部分是我的算法部分*************************
JavaRDD points=lines.map(新的ParsePoint());
KMeansModel model=KMeans.train(points.rdd(),k,迭代,运行,
K_的意思是并行();
System.out.println(“集群中心:”);
对于(向量中心:model.clusterCenters()){
系统输出打印项次(“+中心);
}
double cost=model.computeCost(points.rdd());
系统输出打印项次(“成本:+成本);
sc.停止();
}
}
Hace a look to Hace a look to Hace a look to Hace a look to