Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/mongodb/11.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Mongodb 在Db上使用Spark Mongo Hadoop读写时,错误状态应为打开_Mongodb_Scala_Hadoop_Apache Spark - Fatal编程技术网

Mongodb 在Db上使用Spark Mongo Hadoop读写时,错误状态应为打开

Mongodb 在Db上使用Spark Mongo Hadoop读写时,错误状态应为打开,mongodb,scala,hadoop,apache-spark,Mongodb,Scala,Hadoop,Apache Spark,情景: 我有一个从卡夫卡收集的RDD,在映射和还原之后,我想将它写入数据库db\u aggregatecollectiondate\u xx 映射时,我需要从该数据库中获取以获得以前的结果 就像我写A一样,我需要B的结果(以前写过)来计算,然后把A写到db 我认为我面临的问题是,当我在向数据库中写入新记录时,读取db_聚合时,db游标可能会通过一个操作“写入”或“读取”关闭 我使用的是Spark 1.4.1 mongo hadoop.1.4.1 mongo 2.6 职能: def getPrev

情景: 我有一个从卡夫卡收集的RDD,在映射和还原之后,我想将它写入数据库
db\u aggregate
collection
date\u xx

映射时,我需要从该数据库中获取以获得以前的结果

就像我写A一样,我需要B的结果(以前写过)来计算,然后把A写到db

我认为我面临的问题是,当我在向数据库中写入新记录时,读取db_聚合时,db游标可能会通过一个操作“写入”或“读取”关闭

我使用的是Spark 1.4.1 mongo hadoop.1.4.1 mongo 2.6

职能:

def getPreviousAggregate(campaignId: String, publisher: String, width: Int, height: Int,
                           date: Int, month: Int, year: Int): BasicBSONObject = {
    findLatestAggregate(campaignId, publisher, width, height, date, month, year) match {
      case Some(toReturn) => return toReturn
      case None => {
        println("Not found previous date ....")
        val previousDate = Calendar.getInstance();
        previousDate.set(year, month, date)
        previousDate.add(Calendar.DATE, -1)
        val _date = previousDate.get(Calendar.DATE)
        val _month = previousDate.get(Calendar.MONTH)
        val _year = previousDate.get(Calendar.YEAR)
        findLatestAggregate(campaignId, publisher, width, height, _date, _month, _year) match {
          case Some(toReturn) => return toReturn
          case None => {
          }
        }
      }
    }
    null
  }

  def findLatestAggregate(campaignId: String, publisher: String, width: Int, height: Int,
                          date: Int, month: Int, year: Int): Option[BasicBSONObject] = {
    val config = new Configuration()
    val outDb = DB_AGGREGATE + "_%02d_%s".format(month, year)
    val collName: String = COLL_AGGREGATE + "_%02d".format(date)
    val mongoInputUri = "mongodb://%s:%s/%s.%s".format(DB_STATISTIC_HOST, DB_STATISTIC_PORT, outDb, collName)
    config.set("mongo.input.uri", mongoInputUri)
    try {
      val aggregate = sc.newAPIHadoopRDD(config,
        classOf[MongoInputFormat],
        classOf[Object],
        classOf[BSONObject])
      val res = aggregate.sortBy(k => k._2.get("timestamp").toString, true).filter(r =>
        //        Integer.parseInt(r._2.get("timestamp").toString) <= timestamp - BATCH_TIME
        //          &&
        Integer.parseInt(r._2.get("width").toString) == width
          && Integer.parseInt(r._2.get("height").toString) == height
          && r._2.get("publisher").toString == publisher
          && r._2.get("campaignId").toString == campaignId
      ).map(x => x._2).take(1)

      if (res.nonEmpty) {
        println("\nfound previous record")
        val bson = new BasicBSONObject()
        val collect: BSONObject = res(0)
        bson.put("totalBudgetSpent", collect.get("totalBudgetSpent"))
        bson.put("totalAuctions", collect.get("totalAuctions"))
        bson.put("totalWin", collect.get("totalWin"))
        return Some(bson)
      }
    }
    catch {
      case ex: MongoCommandException => {
        println(ex.getMessage)
      }
    }
    None
  }
我有一个错误

15/08/27 10:35:44 ERROR Executor: Exception in task 0.0 in stage 19.0 (TID 23)
java.lang.IllegalStateException: state should be: open
    at com.mongodb.assertions.Assertions.isTrue(Assertions.java:70)
    at com.mongodb.connection.BaseCluster.selectServer(BaseCluster.java:79)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:75)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:71)
    at com.mongodb.binding.ClusterBinding.getWriteConnectionSource(ClusterBinding.java:68)
    at com.mongodb.operation.OperationHelper.withConnection(OperationHelper.java:175)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:141)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:72)
    at com.mongodb.Mongo.execute(Mongo.java:745)
    at com.mongodb.Mongo$2.execute(Mongo.java:728)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1968)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1962)
    at com.mongodb.BulkWriteOperation.execute(BulkWriteOperation.java:98)
    at com.mongodb.hadoop.output.MongoOutputCommitter.commitTask(MongoOutputCommitter.java:133)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1045)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
    at org.apache.spark.scheduler.Task.run(Task.scala:70)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
15/08/27 10:35:44 WARN TaskSetManager: Lost task 0.0 in stage 19.0 (TID 23, localhost): java.lang.IllegalStateException: state should be: open
    at com.mongodb.assertions.Assertions.isTrue(Assertions.java:70)
    at com.mongodb.connection.BaseCluster.selectServer(BaseCluster.java:79)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:75)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:71)
    at com.mongodb.binding.ClusterBinding.getWriteConnectionSource(ClusterBinding.java:68)
    at com.mongodb.operation.OperationHelper.withConnection(OperationHelper.java:175)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:141)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:72)
    at com.mongodb.Mongo.execute(Mongo.java:745)
    at com.mongodb.Mongo$2.execute(Mongo.java:728)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1968)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1962)
    at com.mongodb.BulkWriteOperation.execute(BulkWriteOperation.java:98)
    at com.mongodb.hadoop.output.MongoOutputCommitter.commitTask(MongoOutputCommitter.java:133)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1045)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
    at org.apache.spark.scheduler.Task.run(Task.scala:70)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
15/08/27 10:35:44错误执行者:第19.0阶段任务0.0中出现异常(TID 23)
java.lang.IllegalStateException:状态应为:打开
位于com.mongodb.assertions.assertions.isTrue(assertions.java:70)
位于com.mongodb.connection.BaseCluster.selectServer(BaseCluster.java:79)
位于com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource。(ClusterBinding.java:75)
位于com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource。(ClusterBinding.java:71)
位于com.mongodb.binding.ClusterBinding.getWriteConnectionSource(ClusterBinding.java:68)
位于com.mongodb.operation.OperationHelper.withConnection(OperationHelper.java:175)
在com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:141)上
在com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:72)
位于com.mongodb.Mongo.execute(Mongo.java:745)
位于com.mongodb.Mongo$2.execute(Mongo.java:728)
位于com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1968)
位于com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1962)
在com.mongodb.BulkWriteOperation.execute(BulkWriteOperation.java:98)
位于com.mongodb.hadoop.output.mongoutputcommitter.commitTask(mongoutputcommitter.java:133)
位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(pairddfunctions.scala:1045)
位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(pairddfunctions.scala:1014)
位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
位于org.apache.spark.scheduler.Task.run(Task.scala:70)
位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:213)
位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
运行(Thread.java:745)
15/08/27 10:35:44警告TaskSetManager:在阶段19.0中丢失任务0.0(TID 23,localhost):java.lang.IllegalStateException:状态应为:打开
位于com.mongodb.assertions.assertions.isTrue(assertions.java:70)
位于com.mongodb.connection.BaseCluster.selectServer(BaseCluster.java:79)
位于com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource。(ClusterBinding.java:75)
位于com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource。(ClusterBinding.java:71)
位于com.mongodb.binding.ClusterBinding.getWriteConnectionSource(ClusterBinding.java:68)
位于com.mongodb.operation.OperationHelper.withConnection(OperationHelper.java:175)
在com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:141)上
在com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:72)
位于com.mongodb.Mongo.execute(Mongo.java:745)
位于com.mongodb.Mongo$2.execute(Mongo.java:728)
位于com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1968)
位于com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1962)
在com.mongodb.BulkWriteOperation.execute(BulkWriteOperation.java:98)
位于com.mongodb.hadoop.output.mongoutputcommitter.commitTask(mongoutputcommitter.java:133)
位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(pairddfunctions.scala:1045)
位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(pairddfunctions.scala:1014)
位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
位于org.apache.spark.scheduler.Task.run(Task.scala:70)
位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:213)
位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
运行(Thread.java:745)
15/08/27 10:35:44 ERROR Executor: Exception in task 0.0 in stage 19.0 (TID 23)
java.lang.IllegalStateException: state should be: open
    at com.mongodb.assertions.Assertions.isTrue(Assertions.java:70)
    at com.mongodb.connection.BaseCluster.selectServer(BaseCluster.java:79)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:75)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:71)
    at com.mongodb.binding.ClusterBinding.getWriteConnectionSource(ClusterBinding.java:68)
    at com.mongodb.operation.OperationHelper.withConnection(OperationHelper.java:175)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:141)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:72)
    at com.mongodb.Mongo.execute(Mongo.java:745)
    at com.mongodb.Mongo$2.execute(Mongo.java:728)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1968)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1962)
    at com.mongodb.BulkWriteOperation.execute(BulkWriteOperation.java:98)
    at com.mongodb.hadoop.output.MongoOutputCommitter.commitTask(MongoOutputCommitter.java:133)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1045)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
    at org.apache.spark.scheduler.Task.run(Task.scala:70)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
15/08/27 10:35:44 WARN TaskSetManager: Lost task 0.0 in stage 19.0 (TID 23, localhost): java.lang.IllegalStateException: state should be: open
    at com.mongodb.assertions.Assertions.isTrue(Assertions.java:70)
    at com.mongodb.connection.BaseCluster.selectServer(BaseCluster.java:79)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:75)
    at com.mongodb.binding.ClusterBinding$ClusterBindingConnectionSource.<init>(ClusterBinding.java:71)
    at com.mongodb.binding.ClusterBinding.getWriteConnectionSource(ClusterBinding.java:68)
    at com.mongodb.operation.OperationHelper.withConnection(OperationHelper.java:175)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:141)
    at com.mongodb.operation.MixedBulkWriteOperation.execute(MixedBulkWriteOperation.java:72)
    at com.mongodb.Mongo.execute(Mongo.java:745)
    at com.mongodb.Mongo$2.execute(Mongo.java:728)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1968)
    at com.mongodb.DBCollection.executeBulkWriteOperation(DBCollection.java:1962)
    at com.mongodb.BulkWriteOperation.execute(BulkWriteOperation.java:98)
    at com.mongodb.hadoop.output.MongoOutputCommitter.commitTask(MongoOutputCommitter.java:133)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1045)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
    at org.apache.spark.scheduler.Task.run(Task.scala:70)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)