This is an automated email from the ASF dual-hosted git repository.
yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new d421c98075c Upgrade HCatalogIO to Hive 4.0.1 (#36901)
d421c98075c is described below
commit d421c98075ccaee3e63492356ac44a4d71402801
Author: Yi Hu <[email protected]>
AuthorDate: Tue Nov 25 16:08:39 2025 -0500
Upgrade HCatalogIO to Hive 4.0.1 (#36901)
---
.../beam_PreCommit_Java_HCatalog_IO_Direct.yml | 15 ---------------
.github/workflows/beam_PreCommit_Java_IOs_Direct.yml | 15 ---------------
sdks/java/extensions/sql/hcatalog/build.gradle | 4 ++--
sdks/java/io/hcatalog/build.gradle | 19 ++++++-------------
.../org/apache/beam/sdk/io/hcatalog/HCatalogIO.java | 2 +-
.../io/hcatalog/test/EmbeddedMetastoreService.java | 17 +++++++++--------
6 files changed, 18 insertions(+), 54 deletions(-)
diff --git a/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml
b/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml
index 5c3cf29419c..eb0dcbcc720 100644
--- a/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml
+++ b/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml
@@ -87,10 +87,6 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- with:
- java-version: |
- 8
- 11
- name: run HCatalog IO build script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
@@ -98,17 +94,6 @@ jobs:
arguments: |
-PdisableSpotlessCheck=true \
-PdisableCheckStyle=true \
- # TODO(https://github.com/apache/beam/issues/32189) remove when embedded
hive supports Java11
- - name: Test HCatalog IO on Java8
- uses: ./.github/actions/gradle-command-self-hosted-action
- with:
- gradle-command: :sdks:java:io:hcatalog:test
- arguments: |
- -PdisableSpotlessCheck=true \
- -PdisableCheckStyle=true \
- -Dfile.encoding=UTF-8 \
- -PtestJavaVersion=8 \
- -Pjava8Home=$JAVA_HOME_8_X64 \
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v4
if: ${{ !success() }}
diff --git a/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml
b/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml
index 9d4a347b336..844227a99ba 100644
--- a/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml
+++ b/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml
@@ -88,10 +88,6 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
- with:
- java-version: |
- 8
- 11
- name: run Java IOs PreCommit script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
@@ -100,17 +96,6 @@ jobs:
-PdisableSpotlessCheck=true \
-PdisableCheckStyle=true \
-Dfile.encoding=UTF-8 \
- # TODO(https://github.com/apache/beam/issues/32189) remove when embedded
hive supports Java11
- - name: run Java8 IOs PreCommit script
- uses: ./.github/actions/gradle-command-self-hosted-action
- with:
- gradle-command: :sdks:java:io:hcatalog:build
- arguments: |
- -PdisableSpotlessCheck=true \
- -PdisableCheckStyle=true \
- -Dfile.encoding=UTF-8 \
- -PtestJavaVersion=8 \
- -Pjava8Home=$JAVA_HOME_8_X64 \
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v4
if: ${{ !success() }}
diff --git a/sdks/java/extensions/sql/hcatalog/build.gradle
b/sdks/java/extensions/sql/hcatalog/build.gradle
index 0a267a6f424..3fe36b7bb81 100644
--- a/sdks/java/extensions/sql/hcatalog/build.gradle
+++ b/sdks/java/extensions/sql/hcatalog/build.gradle
@@ -25,7 +25,7 @@ applyJavaNature(
],
)
-def hive_version = "3.1.3"
+def hive_version = "4.0.1"
def netty_version = "4.1.110.Final"
/*
@@ -42,7 +42,7 @@ dependencies {
implementation project(":sdks:java:io:hcatalog")
implementation project(":sdks:java:core")
implementation library.java.vendored_guava_32_1_2_jre
-
+ testImplementation library.java.junit
testImplementation project(":sdks:java:io:hcatalog").sourceSets.test.output
// Needed for HCatalogTableProvider tests,
// they use HCat* types
diff --git a/sdks/java/io/hcatalog/build.gradle
b/sdks/java/io/hcatalog/build.gradle
index d07904f3465..d3bdd8f1076 100644
--- a/sdks/java/io/hcatalog/build.gradle
+++ b/sdks/java/io/hcatalog/build.gradle
@@ -29,8 +29,8 @@ applyJavaNature(
description = "Apache Beam :: SDKs :: Java :: IO :: HCatalog"
ext.summary = "IO to read and write for HCatalog source."
+// hive 4.x is compatible with Hadoop 3.x; Hive 3.x has been EOL as of Oct 2024
def hadoopVersions = [
- "2102": "2.10.2",
"324": "3.2.4",
"336": "3.3.6",
// "341": "3.4.1", // tests already exercised on the default version
@@ -38,7 +38,7 @@ def hadoopVersions = [
hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
-def hive_version = "3.1.3"
+def hive_version = "4.0.1"
dependencies {
implementation library.java.vendored_guava_32_1_2_jre
@@ -64,6 +64,10 @@ dependencies {
testImplementation library.java.hamcrest
testImplementation
"org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version:tests"
testImplementation "org.apache.hive:hive-exec:$hive_version"
+ // datanucleus dependency version should be in alignment with managed
dependencies of hive-standalone-metastore
+ testRuntimeOnly 'org.datanucleus:datanucleus-api-jdo:5.2.8'
+ testRuntimeOnly 'org.datanucleus:datanucleus-rdbms:5.2.10'
+ testRuntimeOnly 'org.datanucleus:javax.jdo:3.2.0-release'
testImplementation "org.apache.hive:hive-common:$hive_version"
testImplementation "org.apache.hive:hive-cli:$hive_version"
testImplementation
"org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version"
@@ -105,14 +109,3 @@ hadoopVersions.each { kv ->
include '**/*Test.class'
}
}
-
-project.tasks.withType(Test).configureEach {
- if (JavaVersion.VERSION_1_8.compareTo(JavaVersion.current()) < 0 &&
project.findProperty('testJavaVersion') != '8') {
- useJUnit {
- filter {
- excludeTestsMatching "org.apache.beam.sdk.io.hcatalog.HCatalogIOTest"
- excludeTestsMatching
"org.apache.beam.sdk.io.hcatalog.HCatalogBeamSchemaTest"
- }
- }
- }
-}
diff --git
a/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatalogIO.java
b/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatalogIO.java
index 98b13134e3b..ba2674653f6 100644
---
a/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatalogIO.java
+++
b/sdks/java/io/hcatalog/src/main/java/org/apache/beam/sdk/io/hcatalog/HCatalogIO.java
@@ -258,7 +258,7 @@ public class HCatalogIO {
}
Read withSplitId(int splitId) {
- checkArgument(splitId >= 0, "Invalid split id-%s", splitId);
+ checkArgument(splitId >= 0, "Invalid split id-" + splitId);
return toBuilder().setSplitId(splitId).build();
}
diff --git
a/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/EmbeddedMetastoreService.java
b/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/EmbeddedMetastoreService.java
index f68f969f29b..1f0774a92c9 100644
---
a/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/EmbeddedMetastoreService.java
+++
b/sdks/java/io/hcatalog/src/test/java/org/apache/beam/sdk/io/hcatalog/test/EmbeddedMetastoreService.java
@@ -28,7 +28,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.DriverFactory;
import org.apache.hadoop.hive.ql.IDriver;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
import org.apache.hadoop.hive.ql.session.SessionState;
/**
@@ -58,11 +58,11 @@ public final class EmbeddedMetastoreService implements
AutoCloseable {
String testWarehouseDirPath = makePathASafeFileName(testDataDirPath +
"/warehouse");
hiveConf = new HiveConf(getClass());
- hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, "");
- hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, "");
+ hiveConf.setVar(HiveConf.ConfVars.PRE_EXEC_HOOKS, "");
+ hiveConf.setVar(HiveConf.ConfVars.POST_EXEC_HOOKS, "");
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
- hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE,
testWarehouseDirPath);
- hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true);
+ hiveConf.setVar(HiveConf.ConfVars.METASTORE_WAREHOUSE,
testWarehouseDirPath);
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_METADATA_QUERIES,
true);
hiveConf.setVar(
HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider");
@@ -75,9 +75,10 @@ public final class EmbeddedMetastoreService implements
AutoCloseable {
/** Executes the passed query on the embedded metastore service. */
public void executeQuery(String query) {
- CommandProcessorResponse response = driver.run(query);
- if (response.failed()) {
- throw new RuntimeException(response.getException());
+ try {
+ driver.run(query);
+ } catch (CommandProcessorException e) {
+ throw new RuntimeException(e);
}
}