SparkSQL错误:org.apache.hadoop.hive.ql.metadata.HiveException:无法实例化org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
我已在windows机器(单机版)中安装了Spark,并尝试使用Spark应用程序连接虚拟机中可用的HDP 2.6 hive metastore 我使用了NAT作为hdp2.6VM的网络适配器。 当我试图从Spark应用程序(Windows机器上的本地模式)连接hive metastore(HDP 2.6 VM)时,我收到以下错误消息SparkSQL错误:org.apache.hadoop.hive.ql.metadata.HiveException:无法实例化org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient,hive,apache-spark-sql,spark-dataframe,hiveql,Hive,Apache Spark Sql,Spark Dataframe,Hiveql,我已在windows机器(单机版)中安装了Spark,并尝试使用Spark应用程序连接虚拟机中可用的HDP 2.6 hive metastore 我使用了NAT作为hdp2.6VM的网络适配器。 当我试图从Spark应用程序(Windows机器上的本地模式)连接hive metastore(HDP 2.6 VM)时,我收到以下错误消息 17/08/12 17:00:16 INFO metastore: Waiting 1 seconds before next connection attemp
17/08/12 17:00:16 INFO metastore: Waiting 1 seconds before next connection attempt.
17/08/12 17:00:17 INFO metastore: Trying to connect to metastore with URI thrift://172.0.0.1:9083
17/08/12 17:00:38 WARN metastore: Failed to connect to the MetaStore Server...
17/08/12 17:00:38 INFO metastore: Waiting 1 seconds before next connection attempt.
17/08/12 17:00:39 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1236)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
at org.apache.hadoop.hive.ql.metadata.Hive.<clinit>(Hive.java:166)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:191)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:623)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:691)
at com.psl.spark.RemoteHiveConnSpark1_6$.main(RemoteHiveConnSpark1_6.scala:29)
at com.psl.spark.RemoteHiveConnSpark1_6.main(RemoteHiveConnSpark1_6.scala)
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
有人能帮我解决这个问题吗?要连接到Hive metastore,需要以下配置: spark.warn.dist.files///apps/spark/hive site.xml、///apps/spark/datanucleus-rdbms-4.1.7.jar、///apps/spark/datanucleus-core-4.1.6.jar、///apps/spark/datanucleus-api-jdo-4.2.1.jar spark.sql.hive.metastore.version 请确认这些配置是否存在
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
/**
* @author prasanta_sahoo
*/
object RemoteHiveConnSpark1_6 {
def main(arg: Array[String]) {
//Create conf object
val conf = new SparkConf()
.setAppName("RemoteHiveConnSpark1_6")
.setMaster("local") // local mode
.set("spark.storage.memoryFraction", "1")
System.setProperty("hive.metastore.uris", "thrift://172.0.0.1:9083");
//create spark context object
val sc = new SparkContext(conf)
val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
//hiveContext.setConf("hive.metastore.uris", "thrift://172.0.0.1:9083");
//disable case sensitivity of SQL
//hiveContext.sql("set spark.sql.caseSensitive=false");
hiveContext.sql("FROM default.sample_07 SELECT code, description, total_emp, salary").collect().foreach(println)
}
}