cdmikechen opened a new issue #774: Matching question of the version in Spark 
and Hive2 
URL: https://github.com/apache/incubator-hudi/issues/774
 
 
   When I used spark 2.3.3 and hive 2.3.3 to test hoodie, I found that if spark 
session `enableHiveSupport()`, it will match hive version in 
`org.apache.spark.sql.hive.client.IsolatedClientLoader`.
   ```
     def hiveVersion(version: String): HiveVersion = version match {
       case "12" | "0.12" | "0.12.0" => hive.v12
       case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
       case "14" | "0.14" | "0.14.0" => hive.v14
       case "1.0" | "1.0.0" => hive.v1_0
       case "1.1" | "1.1.0" => hive.v1_1
       case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
       case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
       case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
     }
   ```
   If I use a higher hive version to do something sql like 
`sparksession.sql("use database")`, it report this error:
   ```
   scala.MatchError: 2.3.3 (of class java.lang.String)
        at 
org.apache.spark.sql.hive.client.IsolatedClientLoader$.hiveVersion(IsolatedClientLoader.scala:89)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:300) 
~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287) 
~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.<init>(HiveSessionStateBuilder.scala:69)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69)
 ~[spark-hive_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79) 
~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55) 
~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at 
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
 ~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74) 
~[spark-sql_2.11-2.3.3.jar:2.3.3]
        at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) 
~[spark-sql_2.11-2.3.3.jar:2.3.3]
   ...
   ```
   In QuickStart, it says hoodie can work in `spark-2.[1-3].x` with 
`hive-2.3.3`. So I think we may need to find an official recommended version to 
match hive2, like `hive-2.1.1`.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to