Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/359.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 无法在spark Executor中创建DynamoDB客户端_Java_Apache Spark_Amazon Dynamodb_Spark Streaming - Fatal编程技术网

Java 无法在spark Executor中创建DynamoDB客户端

Java 无法在spark Executor中创建DynamoDB客户端,java,apache-spark,amazon-dynamodb,spark-streaming,Java,Apache Spark,Amazon Dynamodb,Spark Streaming,我需要将流数据加载到DynamoDB表中。我尝试了下面的代码 object UnResolvedLoad { def main(args: Array[String]){ val spark = SparkSession.builder().appName("unresolvedload").enableHiveSupport().getOrCreate() val tokensDf = spark.sql("select * from unresolved_logi

我需要将流数据加载到DynamoDB表中。我尝试了下面的代码

  object UnResolvedLoad  {

  def main(args: Array[String]){
    val spark = SparkSession.builder().appName("unresolvedload").enableHiveSupport().getOrCreate()
    val tokensDf = spark.sql("select * from unresolved_logic.unresolved_dynamo_load")  
    tokensDf.foreachPartition { x => loadFunc(x) }
  }


    def loadFunc(iter : Iterator[org.apache.spark.sql.Row]) = {

      val client:AmazonDynamoDB = AmazonDynamoDBClientBuilder.standard().build()
      val dynamoDB:DynamoDB = new DynamoDB(client)
      val table:Table = dynamoDB.getTable("UnResolvedTokens")

      while(iter.hasNext){
        val cur = iter.next()
        val item:Item = new Item().withString("receiverId ", cur.get(2).asInstanceOf[String]).
                withString("payload_id", cur.get(0).asInstanceOf[String]).
                withString("payload_confirmation_code", cur.get(1).asInstanceOf[String]).
                withString("token", cur.get(3).asInstanceOf[String])

        table.putItem(item)

      }

}
}

当我执行spark Submit时,它无法实例化类。下面是错误消息。它说它不能实例化这个类。谢谢你的帮助。 有没有办法将Spark数据集保存到Amazon DynamoDB中

, executor 5): java.lang.NoClassDefFoundError: Could not initialize class com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder
        at com.dish.payloads.UnResolvedLoad$.loadFunc(UnResolvedLoad.scala:22)
        at com.dish.payloads.UnResolvedLoad$$anonfun$main$1.apply(UnResolvedLoad.scala:16)
        at com.dish.payloads.UnResolvedLoad$$anonfun$main$1.apply(UnResolvedLoad.scala:16)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:99)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:748)

17/07/19 17:35:15 INFO TaskSetManager: Lost task 26.0 in stage 0.0 (TID 26) on ip-10-176-225-151.us-west-2.compute.internal, executor 5: java.lang.NoClassDefFoundError (Could not initialize class com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder) [duplicate 1]
17/07/19 17:35:15 WARN TaskSetManager: Lost task 6.0 in stage 0.0 (TID 6, ip-10-176-225-151.us-west-2.compute.internal, executor 5): java.lang.IllegalAccessError: tried to access class com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientConfigurationFactory from class com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder
        at com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder.<clinit>(AmazonDynamoDBClientBuilder.java:30)
        at com.dish.payloads.UnResolvedLoad$.loadFunc(UnResolvedLoad.scala:22)
        at com.dish.payloads.UnResolvedLoad$$anonfun$main$1.apply(UnResolvedLoad.scala:16)
        at com.dish.payloads.UnResolvedLoad$$anonfun$main$1.apply(UnResolvedLoad.scala:16)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:99)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:748)
,执行器5):java.lang.NoClassDefFoundError:无法初始化类com.amazonaws.services.dynamodbv2.amazondynamodClientBuilder
在com.dish.payloads.unsolvedload$.loadFunc上(unsolvedload.scala:22)
在com.dish.payloads.unsolvedload$$anonfun$main$1.apply上(unsolvedload.scala:16)
在com.dish.payloads.unsolvedload$$anonfun$main$1.apply上(unsolvedload.scala:16)
位于org.apache.spark.rdd.rdd$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(rdd.scala:926)
位于org.apache.spark.rdd.rdd$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(rdd.scala:926)
位于org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
位于org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
位于org.apache.spark.scheduler.Task.run(Task.scala:99)
位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:322)
位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
运行(Thread.java:748)
17/07/19 17:35:15信息任务集管理器:在ip-10-176-225-151.us-west-2.compute.internal上的0.0阶段(TID 26)中丢失任务26.0,执行器5:java.lang.NoClassDefFoundError(无法初始化类com.amazonaws.services.dynamodbv2.amazondynamodClientBuilder)[重复1]
17/07/19 17:35:15警告TaskSetManager:在0.0阶段丢失task 6.0(TID 6,ip-10-176-225-151.us-west-2.compute.internal,executor 5):java.lang.IllegalAccessError:尝试从com.amazonaws.services.dynamodbv2.amazondynamodClientConfigurationFactory类访问com.amazonaws.services.dynamodbv2.amazondynamodClientBuilder类
位于com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder。(AmazonDynamoDBClientBuilder.java:30)
在com.dish.payloads.unsolvedload$.loadFunc上(unsolvedload.scala:22)
在com.dish.payloads.unsolvedload$$anonfun$main$1.apply上(unsolvedload.scala:16)
在com.dish.payloads.unsolvedload$$anonfun$main$1.apply上(unsolvedload.scala:16)
位于org.apache.spark.rdd.rdd$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(rdd.scala:926)
位于org.apache.spark.rdd.rdd$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(rdd.scala:926)
位于org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
位于org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
位于org.apache.spark.scheduler.Task.run(Task.scala:99)
位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:322)
位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
运行(Thread.java:748)

我终于能够通过使用较低版本的DynamoDB API来解决这个问题。EMR 5.7仅支持1.10.75.1。下面是适合我的代码

object UnResolvedLoad  {

  def main(args: Array[String]){
    val spark = SparkSession.builder().appName("unresolvedload").enableHiveSupport().getOrCreate()
    val tokensDf = spark.sql("select * from unresolved_logic.unresolved_dynamo_load")  
    tokensDf.foreachPartition { x => loadFunc(x) }
  }


    def loadFunc(iter : Iterator[org.apache.spark.sql.Row]) = {

      val client:AmazonDynamoDBClient  = new AmazonDynamoDBClient();
      val usWest2 = Region.getRegion(Regions.US_WEST_2);
      client.setRegion(usWest2)



      while(iter.hasNext){
        val cur = iter.next()

        val putMap = Map("receiverId" -> new AttributeValue(cur.get(2).asInstanceOf[String]), 
                          "payload_id" -> new AttributeValue(cur.get(0).asInstanceOf[String]),
                          "payload_confirmation_code" -> new AttributeValue(cur.get(1).asInstanceOf[String]),
                          "token" -> new AttributeValue(cur.get(3).asInstanceOf[String])).asJava

        val putItemRequest:PutItemRequest = new PutItemRequest("UnResolvedTokens",putMap)
        client.putItem(putItemRequest)
      }

    }
}

节省了我很多时间!谢谢你,伙计。你是怎么想的?真的很难想到这个方向我从AWS支持论坛得到了答案