Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/image-processing/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Apache spark Spark DynamoDB连接问题_Apache Spark_Amazon Dynamodb - Fatal编程技术网

Apache spark Spark DynamoDB连接问题

Apache spark Spark DynamoDB连接问题,apache-spark,amazon-dynamodb,Apache Spark,Amazon Dynamodb,要求:使用Scala从本地机器通过Spark从DynamoDB(不是本地而是AWS)读取数据 理解:当我们使用emr集群时,可以使用emr-hadoop-dynamodb.jar读取数据 问题: 我们可以使用emr-DynamoDB-hadoop.jar从DynamoDB(在云端而非本地)读取数据吗 不使用EMR集群。我直接想在本地机器上使用scala代码从spark访问dynamodb build.sbt 从ddb.scala读取数据 错误 java.lang.RuntimeException

要求:使用Scala从本地机器通过Spark从DynamoDB(不是本地而是AWS)读取数据

理解:当我们使用emr集群时,可以使用emr-hadoop-dynamodb.jar读取数据

问题

  • 我们可以使用emr-DynamoDB-hadoop.jar从DynamoDB(在云端而非本地)读取数据吗
  • 不使用EMR集群。我直接想在本地机器上使用scala代码从spark访问dynamodb
  • build.sbt

    从ddb.scala读取数据

    错误

    java.lang.RuntimeException:无法在DynamoDB中查找表音乐。 位于org.apache.hadoop.dynamodb.DynamoDBClient.descripbetable(DynamoDBClient.java:116) 位于org.apache.hadoop.dynamodb.read.ReadIopsCalculator.getThroughput(ReadIopsCalculator.java:67) 在org.apache.hadoop.dynamodb.read.ReadIopsCalculator.calculateTargetIops(ReadIopsCalculator.java:57)上 位于org.apache.hadoop.dynamodb.read.AbstractDynamoDBRecordReader.initReadManager(AbstractDynamoDBRecordReader.java:153) 位于org.apache.hadoop.dynamodb.read.AbstractDynamoDBRecordReader。(AbstractDynamoDBRecordReader.java:84) 位于org.apache.hadoop.dynamodb.read.DefaultDynamoDBRecordReader.(DefaultDynamoDBRecordReader.java:24) 位于org.apache.hadoop.dynamodb.read.DynamoDBInputFormat.getRecordReader(DynamoDBInputFormat.java:32) 位于org.apache.spark.rdd.HadoopRDD$$anon$1.liftedTree1$1(HadoopRDD.scala:267) 位于org.apache.spark.rdd.HadoopRDD$$anon$1。(HadoopRDD.scala:266) 位于org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:224) 位于org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:95) 在org.apache.spark.rdd.rdd.computeOrReadCheckpoint(rdd.scala:324) 位于org.apache.spark.rdd.rdd.iterator(rdd.scala:288) 位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) 位于org.apache.spark.scheduler.Task.run(Task.scala:121) 位于org.apache.spark.executor.executor$TaskRunner$$anonfun$10.apply(executor.scala:403) 位于org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) 位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:409) 位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 运行(Thread.java:748) 原因:java.lang.RuntimeException:java.lang.IllegalStateException:套接字不是由此工厂创建的 位于org.apache.hadoop.dynamodb.DynamoDBFibonacciRetryer.handleException(DynamoDBFibonacciRetryer.java:120) 位于org.apache.hadoop.dynamodb.DynamoDBFibonacciRetryer.runWithRetry(DynamoDBFibonacciRetryer.java:83) 位于org.apache.hadoop.dynamodb.DynamoDBClient.descripbetable(DynamoDBClient.java:105) ... 20多

    已审查的链接


    当更新以下依赖项版本时,此问题已得到解决

    "software.amazon.awssdk" % "dynamodb" % "2.15.31",
    "com.amazon.emr" % "emr-dynamodb-hadoop" % "4.14.0"
    
    import org.apache.hadoop.dynamodb.DynamoDBItemWritable
    import org.apache.hadoop.dynamodb.read.DynamoDBInputFormat
    import org.apache.hadoop.io.Text
    import org.apache.hadoop.mapred.JobConf
    import org.apache.spark.api.java.JavaSparkContext
    import org.apache.spark.{SparkConf, SparkContext}
    
    object readDataFromDDB {
      def main(args: Array[String]): Unit = {
        var sc: SparkContext = null
        try {
          val conf = new SparkConf().setAppName("DynamoDBApplication").setMaster("local")
          sc = new SparkContext(conf)
          val jobConf = getDynamoDbJobConf(sc, "Music", "TableNameForWrite")
          val tableData = sc.hadoopRDD(jobConf, classOf[DynamoDBInputFormat], classOf[Text], classOf[DynamoDBItemWritable])
          println(tableData.count())
    
        } catch {
          case e: Exception => {
            println(e.getStackTrace)
          }
        } finally {
          sc.stop()
        }
      }
    
      private def getDynamoDbJobConf(sc: JavaSparkContext, tableNameForRead: String, tableNameForWrite: String) = {
        val jobConf = new JobConf(sc.hadoopConfiguration)
        jobConf.set("dynamodb.servicename", "dynamodb")
        jobConf.set("dynamodb.input.tableName", tableNameForRead)
        jobConf.set("dynamodb.output.tableName", tableNameForWrite)
        jobConf.set("dynamodb.awsAccessKeyId", "*****************")
        jobConf.set("dynamodb.awsSecretAccessKey", "*********************")
        jobConf.set("dynamodb.endpoint", "dynamodb.us-east-1.amazonaws.com")
        jobConf.set("dynamodb.regionid", "us-east-1")
        jobConf.set("mapred.output.format.class", "org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat")
        jobConf.set("mapred.input.format.class", "org.apache.hadoop.dynamodb.read.DynamoDBInputFormat")
        jobConf
      }
    }
    
    "software.amazon.awssdk" % "dynamodb" % "2.15.31",
    "com.amazon.emr" % "emr-dynamodb-hadoop" % "4.14.0"