yihua commented on code in PR #9136:
URL: https://github.com/apache/hudi/pull/9136#discussion_r1267072983


##########
.github/workflows/bot.yml:
##########
@@ -112,6 +112,91 @@ jobs:
         run:
           mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
 
+  test-spark-java17:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'adopt'
+          architecture: x64
+      - name: Build Project
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-DskipTests=true $MVN_ARGS
+      - name: Set up JDK 17
+        uses: actions/setup-java@v3
+        with:
+          java-version: '17'
+          distribution: 'adopt'
+          architecture: x64
+      - name: Quickstart Test
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-pl hudi-examples/hudi-examples-spark $MVN_ARGS
+      - name: UT - Common & Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 
as it's covered by Azure CI
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: FT - Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 
as it's covered by Azure CI
+        run:
+          mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" 
-D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+
+  docker-test-java17:

Review Comment:
   Could this be run with `validate-bundles` since it already validates bundles 
on Java 17?  Any reason to have a separate job here?



##########
.github/workflows/bot.yml:
##########
@@ -112,6 +112,91 @@ jobs:
         run:
           mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
 
+  test-spark-java17:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up JDK 8
+        uses: actions/setup-java@v3
+        with:
+          java-version: '8'
+          distribution: 'adopt'
+          architecture: x64
+      - name: Build Project
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-DskipTests=true $MVN_ARGS
+      - name: Set up JDK 17
+        uses: actions/setup-java@v3
+        with:
+          java-version: '17'
+          distribution: 'adopt'
+          architecture: x64
+      - name: Quickstart Test
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-pl hudi-examples/hudi-examples-spark $MVN_ARGS
+      - name: UT - Common & Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 
as it's covered by Azure CI
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" 
-pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: FT - Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 
as it's covered by Azure CI
+        run:
+          mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" 
-D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+
+  docker-test-java17:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - flinkProfile: 'flink1.17'
+            sparkProfile: 'spark3.4'
+            sparkRuntime: 'spark3.4.0'

Review Comment:
   Should this use Spark 3.4.1?



##########
hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java:
##########
@@ -40,11 +44,29 @@ class TestHoodieWrapperFileSystem {
   private static MiniDFSCluster dfsCluster;
 
   @BeforeAll
-  public static void prepareFs() throws IOException {
-    hdfsTestService = new 
HdfsTestService(HoodieTestUtils.getDefaultHadoopConf());
-    dfsCluster = hdfsTestService.start(true);
-    fs = dfsCluster.getFileSystem();
-    basePath = fs.getWorkingDirectory().toString();
+  public static void setUp() throws IOException {
+    if (getJavaVersion() == 11 || getJavaVersion() == 17) {
+      // For Java 17, this unit test has to run in Docker due to issues with 
MiniDFSCluster
+      // Need to set -Drun.docker.java17.test=true in mvn command to run this 
test
+      
Assume.assumeTrue(Boolean.valueOf(System.getProperty("run.docker.java17.test", 
"false")));

Review Comment:
   rename `run.docker.java17.test` to `use.external.hdfs`?



##########
hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java:
##########
@@ -63,6 +66,9 @@ public class TestHoodieLogFormatAppendFailure {
 
   @BeforeAll
   public static void setUpClass() throws IOException {
+    // This test is not supported yet for Java 17 due to MiniDFSCluster can't 
initialize under Java 17
+    Assume.assumeFalse(getJavaVersion() == 11 || getJavaVersion() == 17);

Review Comment:
   Use `org.junit.jupiter.api.Assumptions.assumeFalse()` and the same class for 
the rest assumption statements?



##########
hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java:
##########
@@ -131,15 +134,27 @@ public class TestHoodieLogFormat extends 
HoodieCommonTestHarness {
   private String spillableBasePath;
 
   @BeforeAll
-  public static void setUpClass() throws IOException, InterruptedException {
-    // Append is not supported in LocalFileSystem. HDFS needs to be setup.
-    hdfsTestService = new HdfsTestService();
-    fs = hdfsTestService.start(true).getFileSystem();
+  public static void setUpClass() throws IOException {
+    if (getJavaVersion() == 11 || getJavaVersion() == 17) {
+      // For Java 17, this unit test has to run in Docker
+      // Need to set -Drun.docker.java17.test=true in mvn command to run this 
test
+      
Assume.assumeTrue(Boolean.valueOf(System.getProperty("run.docker.java17.test", 
"false")));

Review Comment:
   Same here on the naming.  Could you also wrap the logic of using external 
HDFS to a Util method?



##########
packaging/bundle-validation/Dockerfile:
##########
@@ -31,3 +34,21 @@ RUN if [[ $SPARK_HOME =~ 'spark-3.2' ]] || [[ $SPARK_HOME =~ 
'spark-3.3' ]]; \
     then printf "\nspark.sql.catalog.spark_catalog 
org.apache.spark.sql.hudi.catalog.HoodieCatalog\n" >> 
$SPARK_HOME/conf/spark-defaults.conf; fi
 RUN printf "\ntaskmanager.numberOfTaskSlots: 2\n" >> 
$FLINK_HOME/conf/flink-conf.yaml
 RUN printf "\nlocalhost\n" >> $FLINK_HOME/conf/workers
+
+# install maven
+RUN wget 
https://archive.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
+RUN mkdir -p /usr/share/maven
+RUN tar xzvf apache-maven-3.6.3-bin.tar.gz -C /usr/share/maven
+ENV MAVEN_HOME=/usr/share/maven/apache-maven-3.6.3
+ENV PATH=$MAVEN_HOME/bin:$PATH
+RUN mvn --version
+
+# Hadoop env
+ENV HDFS_NAMENODE_USER root
+ENV HDFS_DATANODE_USER root
+ENV HDFS_SECONDARYNAMENODE_USER root
+ENV YARN_RESOURCEMANAGER_USER root
+ENV YARN_NODEMANAGER_USER root
+
+# for RocksDb
+RUN apk add --no-cache libstdc++

Review Comment:
   Could you make this conditionally run for Java 17 / HDFS tests only?  same 
for the `core-site.xml` and `hdfs-site.xml`?



##########
hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java:
##########
@@ -450,10 +450,8 @@ public void testGenerateProjectionSchema() {
     assertTrue(fieldNames1.contains("_row_key"));
     assertTrue(fieldNames1.contains("timestamp"));
 
-    assertEquals("Field fake_field not found in log schema. Query cannot 
proceed! Derived Schema Fields: "
-            + "[non_pii_col, _hoodie_commit_time, _row_key, 
_hoodie_partition_path, _hoodie_record_key, pii_col,"
-            + " _hoodie_commit_seqno, _hoodie_file_name, timestamp]",
-        assertThrows(HoodieException.class, () ->
-            HoodieAvroUtils.generateProjectionSchema(originalSchema, 
Arrays.asList("_row_key", "timestamp", "fake_field"))).getMessage());
+    assertTrue(assertThrows(HoodieException.class, () ->
+            HoodieAvroUtils.generateProjectionSchema(originalSchema, 
Arrays.asList("_row_key", "timestamp", "fake_field")))
+        .getMessage().contains("Field fake_field not found in log schema. 
Query cannot proceed!"));

Review Comment:
   Why does this fail now?



##########
hudi-common/pom.xml:
##########
@@ -248,6 +248,13 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+      <version>${spark.version}</version>
+    </dependency>

Review Comment:
   Why is spark test dependency is included here in `hudi-common` which is 
engine-agnostic?



##########
packaging/bundle-validation/ci_run.sh:
##########
@@ -110,95 +112,116 @@ fi
 TMP_JARS_DIR=/tmp/jars/$(date +%s)
 mkdir -p $TMP_JARS_DIR
 
-if [[ "$HUDI_VERSION" == *"SNAPSHOT" ]]; then
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-flink-bundle/target/hudi-*-$HUDI_VERSION.jar 
$TMP_JARS_DIR/
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-hadoop-mr-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-kafka-connect-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-spark-bundle/target/hudi-*-$HUDI_VERSION.jar 
$TMP_JARS_DIR/
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-utilities-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-utilities-slim-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
-  cp 
${GITHUB_WORKSPACE}/packaging/hudi-metaserver-server-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
-  echo 'Validating jars below:'
-else
-  echo 'Adding environment variables for bundles in the release candidate'
-
-  HUDI_HADOOP_MR_BUNDLE_NAME=hudi-hadoop-mr-bundle
-  HUDI_KAFKA_CONNECT_BUNDLE_NAME=hudi-kafka-connect-bundle
-  HUDI_METASERVER_SERVER_BUNDLE_NAME=hudi-metaserver-server-bundle
-
-  if [[ ${SPARK_PROFILE} == 'spark' ]]; then
-    HUDI_SPARK_BUNDLE_NAME=hudi-spark-bundle_2.11
-    HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.11
-    HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.11
-  elif [[ ${SPARK_PROFILE} == 'spark2.4' ]]; then
-    HUDI_SPARK_BUNDLE_NAME=hudi-spark2.4-bundle_2.11
-    HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.11
-    HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.11
-  elif [[ ${SPARK_PROFILE} == 'spark3.1' ]]; then
-    HUDI_SPARK_BUNDLE_NAME=hudi-spark3.1-bundle_2.12
-    HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
-    HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
-  elif [[ ${SPARK_PROFILE} == 'spark3.2' ]]; then
-    HUDI_SPARK_BUNDLE_NAME=hudi-spark3.2-bundle_2.12
-    HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
-    HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
-  elif [[ ${SPARK_PROFILE} == 'spark3.3' ]]; then
-    HUDI_SPARK_BUNDLE_NAME=hudi-spark3.3-bundle_2.12
-    HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
-    HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
-  elif [[ ${SPARK_PROFILE} == 'spark3' ]]; then
-    HUDI_SPARK_BUNDLE_NAME=hudi-spark3-bundle_2.12
-    HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
-    HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
-  fi
+if [[ -z "$MODE" ]] || [[ "$MODE" != "java17" ]]; then
+  if [[ "$HUDI_VERSION" == *"SNAPSHOT" ]]; then
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-flink-bundle/target/hudi-*-$HUDI_VERSION.jar 
$TMP_JARS_DIR/
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-hadoop-mr-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-kafka-connect-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-spark-bundle/target/hudi-*-$HUDI_VERSION.jar 
$TMP_JARS_DIR/
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-utilities-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-utilities-slim-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
+    cp 
${GITHUB_WORKSPACE}/packaging/hudi-metaserver-server-bundle/target/hudi-*-$HUDI_VERSION.jar
 $TMP_JARS_DIR/
+    echo 'Validating jars below:'
+  else
+    echo 'Adding environment variables for bundles in the release candidate'
+
+    HUDI_HADOOP_MR_BUNDLE_NAME=hudi-hadoop-mr-bundle
+    HUDI_KAFKA_CONNECT_BUNDLE_NAME=hudi-kafka-connect-bundle
+    HUDI_METASERVER_SERVER_BUNDLE_NAME=hudi-metaserver-server-bundle
+
+    if [[ ${SPARK_PROFILE} == 'spark' ]]; then
+      HUDI_SPARK_BUNDLE_NAME=hudi-spark-bundle_2.11
+      HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.11
+      HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.11
+    elif [[ ${SPARK_PROFILE} == 'spark2.4' ]]; then
+      HUDI_SPARK_BUNDLE_NAME=hudi-spark2.4-bundle_2.11
+      HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.11
+      HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.11
+    elif [[ ${SPARK_PROFILE} == 'spark3.1' ]]; then
+      HUDI_SPARK_BUNDLE_NAME=hudi-spark3.1-bundle_2.12
+      HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
+      HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
+    elif [[ ${SPARK_PROFILE} == 'spark3.2' ]]; then
+      HUDI_SPARK_BUNDLE_NAME=hudi-spark3.2-bundle_2.12
+      HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
+      HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
+    elif [[ ${SPARK_PROFILE} == 'spark3.3' ]]; then
+      HUDI_SPARK_BUNDLE_NAME=hudi-spark3.3-bundle_2.12
+      HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
+      HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
+    elif [[ ${SPARK_PROFILE} == 'spark3' ]]; then
+      HUDI_SPARK_BUNDLE_NAME=hudi-spark3-bundle_2.12
+      HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
+      HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
+    fi
+
+    if [[ ${FLINK_PROFILE} == 'flink1.13' ]]; then
+      HUDI_FLINK_BUNDLE_NAME=hudi-flink1.13-bundle
+    elif [[ ${FLINK_PROFILE} == 'flink1.14' ]]; then
+      HUDI_FLINK_BUNDLE_NAME=hudi-flink1.14-bundle
+    elif [[ ${FLINK_PROFILE} == 'flink1.15' ]]; then
+      HUDI_FLINK_BUNDLE_NAME=hudi-flink1.15-bundle
+    elif [[ ${FLINK_PROFILE} == 'flink1.16' ]]; then
+      HUDI_FLINK_BUNDLE_NAME=hudi-flink1.16-bundle
+    elif [[ ${FLINK_PROFILE} == 'flink1.17' ]]; then
+      HUDI_FLINK_BUNDLE_NAME=hudi-flink1.17-bundle
+    fi
 
-  if [[ ${FLINK_PROFILE} == 'flink1.13' ]]; then
-    HUDI_FLINK_BUNDLE_NAME=hudi-flink1.13-bundle
-  elif [[ ${FLINK_PROFILE} == 'flink1.14' ]]; then
-    HUDI_FLINK_BUNDLE_NAME=hudi-flink1.14-bundle
-  elif [[ ${FLINK_PROFILE} == 'flink1.15' ]]; then
-    HUDI_FLINK_BUNDLE_NAME=hudi-flink1.15-bundle
-  elif [[ ${FLINK_PROFILE} == 'flink1.16' ]]; then
-    HUDI_FLINK_BUNDLE_NAME=hudi-flink1.16-bundle
-  elif [[ ${FLINK_PROFILE} == 'flink1.17' ]]; then
-    HUDI_FLINK_BUNDLE_NAME=hudi-flink1.17-bundle
+    echo "Downloading bundle jars from staging repo 
orgapachehudi-$STAGING_REPO_NUM ..."
+    
REPO_BASE_URL=https://repository.apache.org/content/repositories/orgapachehudi-$STAGING_REPO_NUM/org/apache/hudi
+    wget -q 
$REPO_BASE_URL/$HUDI_FLINK_BUNDLE_NAME/$HUDI_VERSION/$HUDI_FLINK_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    wget -q 
$REPO_BASE_URL/$HUDI_HADOOP_MR_BUNDLE_NAME/$HUDI_VERSION/$HUDI_HADOOP_MR_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    wget -q 
$REPO_BASE_URL/$HUDI_KAFKA_CONNECT_BUNDLE_NAME/$HUDI_VERSION/$HUDI_KAFKA_CONNECT_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    wget -q 
$REPO_BASE_URL/$HUDI_SPARK_BUNDLE_NAME/$HUDI_VERSION/$HUDI_SPARK_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    wget -q 
$REPO_BASE_URL/$HUDI_UTILITIES_BUNDLE_NAME/$HUDI_VERSION/$HUDI_UTILITIES_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    wget -q 
$REPO_BASE_URL/$HUDI_UTILITIES_SLIM_BUNDLE_NAME/$HUDI_VERSION/$HUDI_UTILITIES_SLIM_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    wget -q 
$REPO_BASE_URL/$HUDI_METASERVER_SERVER_BUNDLE_NAME/$HUDI_VERSION/$HUDI_METASERVER_SERVER_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
+    echo "Downloaded these jars from $REPO_BASE_URL for validation:"
   fi
 
-  echo "Downloading bundle jars from staging repo 
orgapachehudi-$STAGING_REPO_NUM ..."
-  
REPO_BASE_URL=https://repository.apache.org/content/repositories/orgapachehudi-$STAGING_REPO_NUM/org/apache/hudi
-  wget -q 
$REPO_BASE_URL/$HUDI_FLINK_BUNDLE_NAME/$HUDI_VERSION/$HUDI_FLINK_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  wget -q 
$REPO_BASE_URL/$HUDI_HADOOP_MR_BUNDLE_NAME/$HUDI_VERSION/$HUDI_HADOOP_MR_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  wget -q 
$REPO_BASE_URL/$HUDI_KAFKA_CONNECT_BUNDLE_NAME/$HUDI_VERSION/$HUDI_KAFKA_CONNECT_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  wget -q 
$REPO_BASE_URL/$HUDI_SPARK_BUNDLE_NAME/$HUDI_VERSION/$HUDI_SPARK_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  wget -q 
$REPO_BASE_URL/$HUDI_UTILITIES_BUNDLE_NAME/$HUDI_VERSION/$HUDI_UTILITIES_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  wget -q 
$REPO_BASE_URL/$HUDI_UTILITIES_SLIM_BUNDLE_NAME/$HUDI_VERSION/$HUDI_UTILITIES_SLIM_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  wget -q 
$REPO_BASE_URL/$HUDI_METASERVER_SERVER_BUNDLE_NAME/$HUDI_VERSION/$HUDI_METASERVER_SERVER_BUNDLE_NAME-$HUDI_VERSION.jar
 -P $TMP_JARS_DIR/
-  echo "Downloaded these jars from $REPO_BASE_URL for validation:"
-fi
+  ls -l $TMP_JARS_DIR
+
+  # Copy test dataset
+  TMP_DATA_DIR=/tmp/data/$(date +%s)
+  mkdir -p $TMP_DATA_DIR/stocks/data
+  cp ${GITHUB_WORKSPACE}/docker/demo/data/*.json $TMP_DATA_DIR/stocks/data/
+  cp ${GITHUB_WORKSPACE}/docker/demo/config/schema.avsc $TMP_DATA_DIR/stocks/
+
+  # build docker image
+  cd ${GITHUB_WORKSPACE}/packaging/bundle-validation || exit 1
+  docker build \
+  --build-arg HADOOP_VERSION=$HADOOP_VERSION \
+  --build-arg HIVE_VERSION=$HIVE_VERSION \
+  --build-arg DERBY_VERSION=$DERBY_VERSION \
+  --build-arg FLINK_VERSION=$FLINK_VERSION \
+  --build-arg SPARK_VERSION=$SPARK_VERSION \
+  --build-arg SPARK_HADOOP_VERSION=$SPARK_HADOOP_VERSION \
+  --build-arg CONFLUENT_VERSION=$CONFLUENT_VERSION \
+  --build-arg KAFKA_CONNECT_HDFS_VERSION=$KAFKA_CONNECT_HDFS_VERSION \
+  --build-arg IMAGE_TAG=$IMAGE_TAG \
+  -t hudi-ci-bundle-validation:$IMAGE_TAG \
+  .
 
-ls -l $TMP_JARS_DIR
-
-# Copy test dataset
-TMP_DATA_DIR=/tmp/data/$(date +%s)
-mkdir -p $TMP_DATA_DIR/stocks/data
-cp ${GITHUB_WORKSPACE}/docker/demo/data/*.json $TMP_DATA_DIR/stocks/data/
-cp ${GITHUB_WORKSPACE}/docker/demo/config/schema.avsc $TMP_DATA_DIR/stocks/
-
-# build docker image
-cd ${GITHUB_WORKSPACE}/packaging/bundle-validation || exit 1
-docker build \
---build-arg HADOOP_VERSION=$HADOOP_VERSION \
---build-arg HIVE_VERSION=$HIVE_VERSION \
---build-arg DERBY_VERSION=$DERBY_VERSION \
---build-arg FLINK_VERSION=$FLINK_VERSION \
---build-arg SPARK_VERSION=$SPARK_VERSION \
---build-arg SPARK_HADOOP_VERSION=$SPARK_HADOOP_VERSION \
---build-arg CONFLUENT_VERSION=$CONFLUENT_VERSION \
---build-arg KAFKA_CONNECT_HDFS_VERSION=$KAFKA_CONNECT_HDFS_VERSION \
---build-arg IMAGE_TAG=$IMAGE_TAG \
--t hudi-ci-bundle-validation:$IMAGE_TAG \
-.
-
-# run validation script in docker
-docker run -v $TMP_JARS_DIR:/opt/bundle-validation/jars -v 
$TMP_DATA_DIR:/opt/bundle-validation/data \
-  -i hudi-ci-bundle-validation:$IMAGE_TAG bash validate.sh 
$JAVA_RUNTIME_VERSION
+  # run validation script in docker
+  docker run -v $TMP_JARS_DIR:/opt/bundle-validation/jars -v 
$TMP_DATA_DIR:/opt/bundle-validation/data \
+    -i hudi-ci-bundle-validation:$IMAGE_TAG bash validate.sh 
$JAVA_RUNTIME_VERSION
+else
+  echo "Running Docker test for Java 17, skipping jar copying"
+  # build docker image
+  cd ${GITHUB_WORKSPACE}/packaging/bundle-validation || exit 1
+  docker build \
+  --build-arg HADOOP_VERSION=$HADOOP_VERSION \
+  --build-arg HIVE_VERSION=$HIVE_VERSION \
+  --build-arg DERBY_VERSION=$DERBY_VERSION \
+  --build-arg FLINK_VERSION=$FLINK_VERSION \
+  --build-arg SPARK_VERSION=$SPARK_VERSION \
+  --build-arg SPARK_HADOOP_VERSION=$SPARK_HADOOP_VERSION \
+  --build-arg CONFLUENT_VERSION=$CONFLUENT_VERSION \
+  --build-arg KAFKA_CONNECT_HDFS_VERSION=$KAFKA_CONNECT_HDFS_VERSION \
+  --build-arg IMAGE_TAG=$IMAGE_TAG \
+  -t hudi-ci-bundle-validation:$IMAGE_TAG \
+  .
+
+  docker run -v ${GITHUB_WORKSPACE}:/opt/bundle-validation/docker-test \
+    -i hudi-ci-bundle-validation:$IMAGE_TAG bash docker_test_java17.sh 
$JAVA_RUNTIME_VERSION $SPARK_PROFILE $SCALA_PROFILE

Review Comment:
   Either we consolidate the Java 17 tests into bundle validation 
(conditionally) or we create a new script for Java 17 test only.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to