This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 1e4d91e43 [KYUUBI #5280] Hive engine compatible with Hive 2.3
1e4d91e43 is described below
commit 1e4d91e43ea826c186fe2747e34a5e6cf408dd2f
Author: Cheng Pan <[email protected]>
AuthorDate: Fri Nov 24 14:52:54 2023 +0800
[KYUUBI #5280] Hive engine compatible with Hive 2.3
### _Why are the changes needed?_
Try to make the Hive engine compatible with Hive 2.3.
It is not easy to align all dependencies with Hive 2.3, thus this PR just
added a new integration test to make sure that `kyuubi hive engine` compiled
against Hive 3.1.3 could work with Hive 2.3.9 runtime, instead of introducing
new a profile to make it pass building with Hive 2.3.9
There are potential class conflict issues, because the Apache Hive 2.3.9
and 3.1.3 official binary artifacts ship Scala 2.11 jars but Kyuubi uses Scala
2.12.
Also tested with `apache-hive-2.1.1-cdh6.3.2-bin.tar.gz` locally, which can
successfully bootstrap and run queries.
### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [x] [Run
test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests)
locally before make a pull request
### _Was this patch authored or co-authored using generative AI tooling?_
No
Closes #5280 from pan3793/hive-2.3.
Closes #5280
8ca02a351 [Cheng Pan] corss verify hive engine on hive 2.3
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
---
.github/workflows/master.yml | 16 +++++++--
externals/kyuubi-hive-sql-engine/pom.xml | 30 +++++++---------
.../engine/hive/session/HiveSessionManager.scala | 42 +++++++++++++++++-----
.../scala/org/apache/kyuubi/util/JdbcUtils.scala | 6 ++++
.../scala/org/apache/kyuubi/HiveEngineTests.scala | 22 ++++++------
5 files changed, 76 insertions(+), 40 deletions(-)
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 74c53ab08..fe92f6b6b 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -242,7 +242,12 @@ jobs:
matrix:
java:
- 8
+ hive-archive: [ "" ]
comment: [ "normal" ]
+ include:
+ - java: 8
+ hive-archive:
'-Dhive.archive.mirror=https://archive.apache.org/dist/hive/hive-2.3.9
-Dhive.archive.name=apache-hive-2.3.9-bin.tar.gz'
+ comment: 'verify-on-hive-2.3-binary'
steps:
- uses: actions/checkout@v3
- name: Tune Runner VM
@@ -261,8 +266,15 @@ jobs:
- name: Build and test Hive with maven w/o linters
run: |
TEST_MODULES="externals/kyuubi-hive-sql-engine,integration-tests/kyuubi-hive-it"
- ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install
-DskipTests
- ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} test
+ ./build/mvn ${MVN_OPT} ${{ matrix.hive-archive }} -pl
${TEST_MODULES} -am clean install -DskipTests
+ # Hive 2.3.9 ships Derby 10.10.2.0, which may fail to boostrap on
latest JDK 8
+ # https://github.com/apache/hive/pull/4895
+ if [[ "${{ matrix.hive-archive }}" == *apache-hive-2.3.9-bin.tar.gz*
]]; then
+
HIVE_239_LIB="$PWD/externals/kyuubi-download/target/apache-hive-2.3.9-bin/lib"
+ rm $HIVE_239_LIB/derby-*
+ wget
https://repo1.maven.org/maven2/org/apache/derby/derby/10.14.2.0/derby-10.14.2.0.jar
-P $HIVE_239_LIB
+ fi
+ ./build/mvn ${MVN_OPT} ${{ matrix.hive-archive }} -pl
${TEST_MODULES} test
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
diff --git a/externals/kyuubi-hive-sql-engine/pom.xml
b/externals/kyuubi-hive-sql-engine/pom.xml
index caed7e27c..89f2395f0 100644
--- a/externals/kyuubi-hive-sql-engine/pom.xml
+++ b/externals/kyuubi-hive-sql-engine/pom.xml
@@ -50,18 +50,6 @@
<version>${project.version}</version>
</dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-service-rpc</artifactId>
- <scope>provided</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libfb303</artifactId>
- <scope>provided</scope>
- </dependency>
-
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
@@ -73,12 +61,6 @@
<artifactId>commons-collections</artifactId>
</dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <scope>provided</scope>
- </dependency>
-
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>failureaccess</artifactId>
@@ -206,6 +188,18 @@
</excludes>
</filter>
</filters>
+ <relocations>
+ <relocation>
+ <pattern>com.fasterxml.jackson</pattern>
+
<shadedPattern>${kyuubi.shade.packageName}.com.fasterxml.jackson</shadedPattern>
+ <includes>
+ <include>com.fasterxml.jackson.**</include>
+ </includes>
+ </relocation>
+ </relocations>
+ <transformers>
+ <transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"></transformer>
+ </transformers>
</configuration>
<executions>
<execution>
diff --git
a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala
b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala
index d09912770..da6879d7e 100644
---
a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala
+++
b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala
@@ -18,6 +18,7 @@
package org.apache.kyuubi.engine.hive.session
import java.io.File
+import java.util.{List => JList}
import java.util.concurrent.Future
import scala.collection.JavaConverters._
@@ -34,6 +35,7 @@ import org.apache.kyuubi.engine.hive.HiveSQLEngine
import org.apache.kyuubi.engine.hive.operation.HiveOperationManager
import org.apache.kyuubi.operation.OperationManager
import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager}
+import org.apache.kyuubi.util.reflect.DynConstructors
class HiveSessionManager(engine: HiveSQLEngine) extends
SessionManager("HiveSessionManager") {
override protected def isServer: Boolean = false
@@ -78,15 +80,37 @@ class HiveSessionManager(engine: HiveSQLEngine) extends
SessionManager("HiveSess
val sessionHandle =
conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle())
val hive = {
- val sessionWithUGI = new ImportedHiveSessionImpl(
- new ImportedSessionHandle(sessionHandle.toTSessionHandle, protocol),
- protocol,
- user,
- password,
- HiveSQLEngine.hiveConf,
- ipAddress,
- null,
- Seq(ipAddress).asJava)
+
+ val sessionWithUGI = DynConstructors.builder()
+ .impl( // for Hive 3.1
+ classOf[ImportedHiveSessionImpl],
+ classOf[ImportedSessionHandle],
+ classOf[TProtocolVersion],
+ classOf[String],
+ classOf[String],
+ classOf[HiveConf],
+ classOf[String],
+ classOf[String],
+ classOf[JList[String]])
+ .impl( // for Hive 2.3
+ classOf[ImportedHiveSessionImpl],
+ classOf[ImportedSessionHandle],
+ classOf[TProtocolVersion],
+ classOf[String],
+ classOf[String],
+ classOf[HiveConf],
+ classOf[String],
+ classOf[String])
+ .build[ImportedHiveSessionImpl]()
+ .newInstance(
+ new ImportedSessionHandle(sessionHandle.toTSessionHandle,
protocol),
+ protocol,
+ user,
+ password,
+ HiveSQLEngine.hiveConf,
+ ipAddress,
+ null,
+ Seq(ipAddress).asJava)
val proxy = HiveSessionProxy.getProxy(sessionWithUGI,
sessionWithUGI.getSessionUgi)
sessionWithUGI.setProxySession(proxy)
proxy
diff --git
a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala
b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala
index 996589cb7..4951004b6 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala
@@ -98,6 +98,12 @@ object JdbcUtils extends Logging {
}
}
+ def mapResultSet[R](rs: ResultSet)(rowMapper: ResultSet => R): Seq[R] = {
+ val builder = Seq.newBuilder[R]
+ while (rs.next()) builder += rowMapper(rs)
+ builder.result
+ }
+
def redactPassword(password: Option[String]): String = {
password match {
case Some(s) if StringUtils.isNotBlank(s) => s"${"*" *
s.length}(length:${s.length})"
diff --git
a/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala
b/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala
index 028f755f6..61cb48b4c 100644
--- a/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala
+++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala
@@ -23,6 +23,7 @@ import org.apache.commons.lang3.{JavaVersion, SystemUtils}
import org.apache.kyuubi.operation.HiveJDBCTestHelper
import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._
+import org.apache.kyuubi.util.JdbcUtils
/**
* hive tests disabled for JAVA 11
@@ -229,14 +230,11 @@ trait HiveEngineTests extends HiveJDBCTestHelper {
assume(SystemUtils.isJavaVersionAtMost(JavaVersion.JAVA_1_8))
withJdbcStatement() { statement =>
val resultSet = statement.getConnection.getMetaData.getTableTypes
- val expected = Set("TABLE", "VIEW", "MATERIALIZED_VIEW")
- var tableTypes = Set[String]()
- while (resultSet.next()) {
- assert(expected.contains(resultSet.getString(TABLE_TYPE)))
- tableTypes += resultSet.getString(TABLE_TYPE)
- }
- assert(!resultSet.next())
- assert(expected.size === tableTypes.size)
+ // Hive3 removes support for INDEX_TABLE
+ val hive2Expected = Set("TABLE", "VIEW", "MATERIALIZED_VIEW",
"INDEX_TABLE")
+ val hive3Expected = Set("TABLE", "VIEW", "MATERIALIZED_VIEW")
+ val tableTypes = JdbcUtils.mapResultSet(resultSet) { rs =>
rs.getString(TABLE_TYPE) }.toSet
+ assert(tableTypes === hive2Expected || tableTypes === hive3Expected)
}
}
@@ -387,10 +385,12 @@ trait HiveEngineTests extends HiveJDBCTestHelper {
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.TIMESTAMP)
typeInfo.next()
- assert(typeInfo.getString(TYPE_NAME) === "TIMESTAMP WITH LOCAL TIME
ZONE")
- assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER)
+ // Hive3 supports TIMESTAMP WITH LOCAL TIME ZONE
+ if (typeInfo.getString(TYPE_NAME) == "TIMESTAMP WITH LOCAL TIME ZONE") {
+ assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER)
+ typeInfo.next()
+ }
- typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "INTERVAL_YEAR_MONTH")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER)