Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Scala-can';这';对于活动对象,Scala中是否为null?_Scala_Apache Spark_Xgboost - Fatal编程技术网

Scala-can';这';对于活动对象,Scala中是否为null?

Scala-can';这';对于活动对象,Scala中是否为null?,scala,apache-spark,xgboost,Scala,Apache Spark,Xgboost,我正在经历一些违背我理解的事情。我的理解是,对于活动对象,“this”不能为null,但是,对于下面所示的情况,我遇到了类似的情况 上下文-我在本例中使用XGBoost4J Spark包。您可以查看源代码。更具体地说,我指的是班级。我对这个类有如下定义,只添加了一个print语句 package ml.dmlc.xgboost4j.scala.spark import ml.dmlc.xgboost4j.scala.{EvalTrait, ObjectiveTrait} import org.

我正在经历一些违背我理解的事情。我的理解是,对于活动对象,“this”不能为null,但是,对于下面所示的情况,我遇到了类似的情况

上下文-我在本例中使用XGBoost4J Spark包。您可以查看源代码。更具体地说,我指的是班级。我对这个类有如下定义,只添加了一个print语句

package ml.dmlc.xgboost4j.scala.spark

import ml.dmlc.xgboost4j.scala.{EvalTrait, ObjectiveTrait}
import org.apache.spark.ml.{Predictor, Estimator}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.mllib.linalg.{VectorUDT, Vector}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{NumericType, DoubleType, StructType}
import org.apache.spark.sql.{DataFrame, TypedColumn, Dataset, Row}

/**
 * the estimator wrapping XGBoost to produce a training model
 *
 * @param inputCol the name of input column
 * @param labelCol the name of label column
 * @param xgboostParams the parameters configuring XGBoost
 * @param round the number of iterations to train
 * @param nWorkers the total number of workers of xgboost
 * @param obj the customized objective function, default to be null and using the default in model
 * @param eval the customized eval function, default to be null and using the default in model
 * @param useExternalMemory whether to use external memory when training
 * @param missing the value taken as missing
 */
class XGBoostEstimator(
    inputCol: String, labelCol: String,
    xgboostParams: Map[String, Any], round: Int, nWorkers: Int,
    obj: Option[ObjectiveTrait] = None,
    eval: Option[EvalTrait] = None, useExternalMemory: Boolean = false, missing: Float = Float.NaN)
  extends Estimator[XGBoostModel] {

  println(s"This is ${this}")
  override val uid: String = Identifiable.randomUID("XGBoostEstimator")


  /**
   * produce a XGBoostModel by fitting the given dataset
   */
  def fit(trainingSet: Dataset[_]): XGBoostModel = {
    val instances = trainingSet.select(
      col(inputCol), col(labelCol).cast(DoubleType)).rdd.map {
      case Row(feature: Vector, label: Double) =>
        LabeledPoint(label, feature)
    }
    transformSchema(trainingSet.schema, logging = true)
    val trainedModel = XGBoost.trainWithRDD(instances, xgboostParams, round, nWorkers, obj.get,
      eval.get, useExternalMemory, missing).setParent(this)
    copyValues(trainedModel)
  }

  override def copy(extra: ParamMap): Estimator[XGBoostModel] = {
    defaultCopy(extra)
  }

  override def transformSchema(schema: StructType): StructType = {
    // check input type, for now we only support vectorUDT as the input feature type
    val inputType = schema(inputCol).dataType
    require(inputType.equals(new VectorUDT), s"the type of input column $inputCol has to VectorUDT")
    // check label Type,
    val labelType = schema(labelCol).dataType
    require(labelType.isInstanceOf[NumericType], s"the type of label column $labelCol has to" +
      s" be NumericType")
    schema
  }
}
当我通过Spark Shell(或通过测试)初始化相同的代码时,我得到的输出如下:

scala> import ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator
import ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator

scala> val xgb = new XGBoostEstimator("features", "label", Map.empty,10, 2)
This is null
xgb: ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator = XGBoostEstimator_6cd31d495c8f

scala> xgb.uid
res1: String = XGBoostEstimator_6cd31d495c8f
任何关于为什么以及何时可能出现这种行为的澄清都会有所帮助。

您的
toString()
实现来自
identificatable
,它只返回uid集。由于您在下一行中设置了uid,所以在打印时它没有初始化

可识别:


您确定返回字符串的不是
this.toString()
“null”?如果您改为打印
println(“null?”+(这个eq null))
?@sjrd我忽略了toString已经在基类中被重写了。你是对的,是toString导致了问题。
trait Identifiable {

  /**
   * An immutable unique ID for the object and its derivatives.
   */
  val uid: String

  override def toString: String = uid
}