Apache spark Spark DynamoDB连接问题
要求:使用Scala从本地机器通过Spark从DynamoDB(不是本地而是AWS)读取数据 理解:当我们使用emr集群时,可以使用emr-hadoop-dynamodb.jar读取数据 问题:Apache spark Spark DynamoDB连接问题,apache-spark,amazon-dynamodb,Apache Spark,Amazon Dynamodb,要求:使用Scala从本地机器通过Spark从DynamoDB(不是本地而是AWS)读取数据 理解:当我们使用emr集群时,可以使用emr-hadoop-dynamodb.jar读取数据 问题: 我们可以使用emr-DynamoDB-hadoop.jar从DynamoDB(在云端而非本地)读取数据吗 不使用EMR集群。我直接想在本地机器上使用scala代码从spark访问dynamodb build.sbt 从ddb.scala读取数据 错误 java.lang.RuntimeException
当更新以下依赖项版本时,此问题已得到解决
"software.amazon.awssdk" % "dynamodb" % "2.15.31",
"com.amazon.emr" % "emr-dynamodb-hadoop" % "4.14.0"
import org.apache.hadoop.dynamodb.DynamoDBItemWritable
import org.apache.hadoop.dynamodb.read.DynamoDBInputFormat
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.{SparkConf, SparkContext}
object readDataFromDDB {
def main(args: Array[String]): Unit = {
var sc: SparkContext = null
try {
val conf = new SparkConf().setAppName("DynamoDBApplication").setMaster("local")
sc = new SparkContext(conf)
val jobConf = getDynamoDbJobConf(sc, "Music", "TableNameForWrite")
val tableData = sc.hadoopRDD(jobConf, classOf[DynamoDBInputFormat], classOf[Text], classOf[DynamoDBItemWritable])
println(tableData.count())
} catch {
case e: Exception => {
println(e.getStackTrace)
}
} finally {
sc.stop()
}
}
private def getDynamoDbJobConf(sc: JavaSparkContext, tableNameForRead: String, tableNameForWrite: String) = {
val jobConf = new JobConf(sc.hadoopConfiguration)
jobConf.set("dynamodb.servicename", "dynamodb")
jobConf.set("dynamodb.input.tableName", tableNameForRead)
jobConf.set("dynamodb.output.tableName", tableNameForWrite)
jobConf.set("dynamodb.awsAccessKeyId", "*****************")
jobConf.set("dynamodb.awsSecretAccessKey", "*********************")
jobConf.set("dynamodb.endpoint", "dynamodb.us-east-1.amazonaws.com")
jobConf.set("dynamodb.regionid", "us-east-1")
jobConf.set("mapred.output.format.class", "org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat")
jobConf.set("mapred.input.format.class", "org.apache.hadoop.dynamodb.read.DynamoDBInputFormat")
jobConf
}
}
"software.amazon.awssdk" % "dynamodb" % "2.15.31",
"com.amazon.emr" % "emr-dynamodb-hadoop" % "4.14.0"