Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/maven/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java /MojoFailureException 更新_Java_Maven_Apache Spark - Fatal编程技术网

Java /MojoFailureException 更新

Java /MojoFailureException 更新,java,maven,apache-spark,Java,Maven,Apache Spark,感谢@holden,我确保添加了这些行 //其他导入 导入org.apache.spark.api.java.*; 导入org.apache.spark.SparkConf; 导入org.apache.spark.api.java.function.function; 导入org.apache.spark.sql.SQLContext; //在课堂上添加这些作为起点 SparkConf conf=new SparkConf().setAppName(“简单估计量”); JavaSparkCont

感谢@holden,我确保添加了这些行

//其他导入
导入org.apache.spark.api.java.*;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.function.function;
导入org.apache.spark.sql.SQLContext;
//在课堂上添加这些作为起点
SparkConf conf=new SparkConf().setAppName(“简单估计量”);
JavaSparkContext sc=新的JavaSparkContext(conf);
SQLContext SQLContext=neworg.apache.spark.sql.SQLContext(sc);
这使事情有了一些进展,但现在我得到了以下错误

[ERROR]无法在project simple estimator上执行目标org.apache.maven.plugins:maven编译器plugin:3.1:compile(默认编译):编译失败
[错误]/Users/philip/study/spark/estimator/src/main/java/SimpleEstimator.java:[21,36]找不到适合createDataFrame(java.util.List,java.lang.Class)的方法
[错误]方法org.apache.spark.sql.SQLContext.createDataFrame(org.apache.spark.rdd.rdd,scala.reflect.api.TypeTags.TypeTag)不适用
[错误](无法推断类型变量A
[错误](参数不匹配;java.util.List无法转换为org.apache.spark.rdd.rdd))
[错误]方法org.apache.spark.sql.SQLContext.createDataFrame(scala.collection.Seq、scala.reflect.api.TypeTags.TypeTag)不适用
[错误](无法推断类型变量A
[错误](参数不匹配;java.util.List无法转换为scala.collection.Seq))
[错误]方法org.apache.spark.sql.SQLContext.createDataFrame(org.apache.spark.rdd.rdd,org.apache.spark.sql.types.StructType)不适用
[错误](参数不匹配;java.util.List无法转换为org.apache.spark.rdd.rdd)
[错误]方法org.apache.spark.sql.SQLContext.createDataFrame(org.apache.spark.api.java.JavaRDD,org.apache.spark.sql.types.StructType)不适用
[错误](参数不匹配;java.util.List无法转换为org.apache.spark.api.java.JavaRDD)
[错误]方法org.apache.spark.sql.SQLContext.createDataFrame(org.apache.spark.rdd.rdd,java.lang.Class)不适用
[错误](参数不匹配;java.util.List无法转换为org.apache.spark.rdd.rdd)
[错误]方法org.apache.spark.sql.SQLContext.createDataFrame(org.apache.spark.api.java.JavaRDD,java.lang.Class)不适用
[错误](参数不匹配;java.util.List无法转换为org.apache.spark.api.java.JavaRDD)
错误引用的代码直接来自示例

DataFrame training = sqlContext.createDataFrame(Arrays.asList(
      new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
      new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
      new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
      new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))
    ), LabeledPoint.class);

这些示例通常省略了创建
sqlContext
sc
(或SparkContext),因为它们对于每个示例都是相同的。了解如何创建
sqlContext
,以及如何创建sc(或SparkContext)

您可能需要以下内容:

还有一些进口:

//Additional imports
import org.apache.spark.api.java.*;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.Function;
在主方法的开头添加:

// In your method:
SparkConf conf = new SparkConf().setAppName("Simple Application");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
根据您的更新,您遇到的第二个问题是创建DataFrame(Java示例中再次遗漏了DataFrame)。您尝试使用的方法尚未实现(事实上,我有一个挂起的pull请求来实现类似的内容,尽管该版本需要行的and&schema,但我已经添加了一个JIRA来跟踪为本地JavaBean解决方案添加此内容)

谢天谢地,这一额外步骤并不是我们将要执行的全部代码:

   DataFrame test = sqlContext.createDataFrame(Arrays.asList(
      new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
      new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
      new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))
    ), LabeledPoint.class);
而是:

   DataFrame test = sqlContext.createDataFrame(sc.parallelize(
      Arrays.asList(
        new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
        new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
        new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))
    )), LabeledPoint.class);

除了上面提到的@holden未定义sql和spark上下文外,您提到的Java示例还缺少一个关键步骤,即将标签点列表转换为RDD(有关更多信息,请参阅)

为了解决这个问题,您可以使用
JavaSparkContext
中的
sc.parallelize
方法将列表转换为
JavaRDD
对象,该对象是
createDataFrame
方法所期望的参数。请参阅下面的代码片段

DataFrame training = sqlContext.createDataFrame(sc.parallelize(
        Arrays.asList(
            new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
            new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
            new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
            new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))
        )
    ), LabeledPoint.class);
此外,如果要通过maven独立运行主URL,则需要在代码中指定主URL。您可以简单地使用
local[2]
作为URL,使用2个线程在本地运行Spark

SparkConf conf = new SparkConf()
    .setMaster("local[2]")
    .setAppName("Simple Estimator");
通常,在使用
spark submit
脚本时,这将从环境中提供给您的程序。如果是这样的话,你可以跳过它

最后,您可能希望禁用Spark的详细日志消息,以便更容易地跟踪算法的输出。您可以跳过此步骤,因为它是可选的

请参阅下面的完整代码以及所有提及的修改

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.classification.LogisticRegressionModel;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;

//ADDITIONAL IMPORTS FOR MUTING SYS.ERR
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;

import java.util.Arrays;

public class SimpleEstimator {
    public static void main(String[] args) {
        //MUTE LOG MESSAGES  FOR READABILITY (OPTIONAL)
        System.setErr(new PrintStream(new OutputStream() {
            @Override
            public void write(int arg0) throws IOException {
                // keep empty
            }
        }));

        // added these as starting lines in class
        SparkConf conf = new SparkConf()
                .setMaster("local[2]")
                .setAppName("Simple Estimator");

        JavaSparkContext sc = new JavaSparkContext(conf);
        SQLContext sqlContext = new SQLContext(sc);

        DataFrame training = sqlContext.createDataFrame(sc.parallelize(
                Arrays.asList(
                        new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
                        new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
                        new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
                        new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))
                )
        ), LabeledPoint.class);

        LogisticRegression lr = new LogisticRegression();
        System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");

        lr.setMaxIter(10)
                .setRegParam(0.01);

        LogisticRegressionModel model1 = lr.fit(training);

        System.out.println("Model 1 was fit using parameters: " + model1.parent().extractParamMap());

        ParamMap paramMap = new ParamMap()
                .put(lr.maxIter().w(20)) // Specify 1 Param.
                .put(lr.maxIter(), 30) // This overwrites the original maxIter.
                .put(lr.regParam().w(0.1), lr.threshold().w(0.55)); // Specify multiple Params.

        ParamMap paramMap2 = new ParamMap()
                .put(lr.probabilityCol().w("myProbability")); // Change output column name
        ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);

        LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
        System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());

        DataFrame test = sqlContext.createDataFrame(sc.parallelize(
                Arrays.asList(
                        new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
                        new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
                        new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))
                )
        ), LabeledPoint.class);

        DataFrame results = model2.transform(test);
        for (Row r: results.select("features", "label", "myProbability", "prediction").collect()) {
            System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
                    + ", prediction=" + r.get(3));
        }
    }
}

这和Maven没什么关系。
sqlContext
变量未定义。我在示例链接上也没有看到任何声明……谢谢,这有助于推动事情的发展。不过,我今天的投票上限已经达到了。我明天一定会投票的谢谢你。只是为了澄清一下,您说“您需要指定主URL”,这是不是只有在我打算使用
mvn exec:java-Dexec.mainClass=“SimpleEstimator”
运行程序时才这么说?通常,我不会在代码本身中指定它,而是运行类似于
bin/spark submit--class“SimpleEstimator”--master local[4]~/study/spark/estimator/target/simple-estimator-1.0.jar的命令
Correct。如果在其他地方指定,则无需在代码中指定。您的情况是
spark submit
。我将更新答案以澄清。
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.classification.LogisticRegressionModel;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;

//ADDITIONAL IMPORTS FOR MUTING SYS.ERR
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;

import java.util.Arrays;

public class SimpleEstimator {
    public static void main(String[] args) {
        //MUTE LOG MESSAGES  FOR READABILITY (OPTIONAL)
        System.setErr(new PrintStream(new OutputStream() {
            @Override
            public void write(int arg0) throws IOException {
                // keep empty
            }
        }));

        // added these as starting lines in class
        SparkConf conf = new SparkConf()
                .setMaster("local[2]")
                .setAppName("Simple Estimator");

        JavaSparkContext sc = new JavaSparkContext(conf);
        SQLContext sqlContext = new SQLContext(sc);

        DataFrame training = sqlContext.createDataFrame(sc.parallelize(
                Arrays.asList(
                        new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
                        new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
                        new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
                        new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))
                )
        ), LabeledPoint.class);

        LogisticRegression lr = new LogisticRegression();
        System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");

        lr.setMaxIter(10)
                .setRegParam(0.01);

        LogisticRegressionModel model1 = lr.fit(training);

        System.out.println("Model 1 was fit using parameters: " + model1.parent().extractParamMap());

        ParamMap paramMap = new ParamMap()
                .put(lr.maxIter().w(20)) // Specify 1 Param.
                .put(lr.maxIter(), 30) // This overwrites the original maxIter.
                .put(lr.regParam().w(0.1), lr.threshold().w(0.55)); // Specify multiple Params.

        ParamMap paramMap2 = new ParamMap()
                .put(lr.probabilityCol().w("myProbability")); // Change output column name
        ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);

        LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
        System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());

        DataFrame test = sqlContext.createDataFrame(sc.parallelize(
                Arrays.asList(
                        new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
                        new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
                        new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))
                )
        ), LabeledPoint.class);

        DataFrame results = model2.transform(test);
        for (Row r: results.select("features", "label", "myProbability", "prediction").collect()) {
            System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
                    + ", prediction=" + r.get(3));
        }
    }
}