(polaris) branch main updated: feat: intial hudi reg test (#3641)

yufei Tue, 17 Feb 2026 16:52:19 -0800

This is an automated email from the ASF dual-hosted git repository.

yufei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git



The following commit(s) were added to refs/heads/main by this push:
     new 893722cfb feat: intial hudi reg test (#3641)
893722cfb is described below

commit 893722cfbb2a1867442771da952fcfbd7314a279
Author: Rahil C <[email protected]>
AuthorDate: Tue Feb 17 19:51:40 2026 -0500

    feat: intial hudi reg test (#3641)
---
 plugins/spark/v3.5/regtests/README.md              |  45 +++++++
 plugins/spark/v3.5/regtests/run.sh                 | 149 ++++++++++++++-------
 plugins/spark/v3.5/regtests/setup.sh               |  54 +++++++-
 .../{spark_sql.ref => suites/spark_sql_delta.ref}  |   0
 .../{spark_sql.sh => suites/spark_sql_delta.sh}    |   0
 .../spark/v3.5/regtests/suites/spark_sql_hudi.ref  |  45 +++++++
 .../{spark_sql.sh => suites/spark_sql_hudi.sh}     |  62 +++++----
 7 files changed, 277 insertions(+), 78 deletions(-)

diff --git a/plugins/spark/v3.5/regtests/README.md 
b/plugins/spark/v3.5/regtests/README.md
index 06a0ccd13..de3355204 100755
--- a/plugins/spark/v3.5/regtests/README.md
+++ b/plugins/spark/v3.5/regtests/README.md
@@ -84,3 +84,48 @@ Note: the regression tests expect Polaris to run with 
certain options, e.g. with
 storage, default realm `POLARIS` and root credentials `root:secret`; if you 
run the above command,
 this will be the case. If you run Polaris in a different way, make sure that 
Polaris is configured
 appropriately.
+
+## Running Specific Test Suites
+
+By default, `run.sh` auto-discovers and executes all test suites in the 
`suites/` directory.
+To run a specific suite, use the `REGTEST_SUITE` environment variable with 
just the test name:
+
+```bash
+# Run only Delta tests
+env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_delta 
./plugins/spark/v3.5/regtests/run.sh
+
+# Run only Hudi tests
+env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_hudi 
./plugins/spark/v3.5/regtests/run.sh
+```
+
+## Adding a New Test Suite
+
+Test suites are auto-discovered from the `suites/` directory. To add a new 
test:
+
+1. Create `suites/<descriptive_name>_<table_format>.sh` (must be executable)
+2. Create `suites/<descriptive_name>_<table_format>.ref` (expected output)
+3. The table format is automatically parsed from the last segment before `.sh`
+4. Supported table formats: `delta`, `hudi`
+
+## Table Format Support
+
+The regression tests support multiple table formats through the 
`--tableFormat` parameter in `setup.sh`:
+
+- **Delta** (default): Uses `DeltaCatalog` for `spark_catalog`. Tests both 
Iceberg and Delta tables.
+- **Hudi**: Uses `HoodieCatalog` for `spark_catalog`. Tests both Iceberg and 
Hudi tables.
+
+Each test suite is isolated with its own Spark configuration and catalog 
setup. The `spark_catalog`
+can only be configured to one catalog implementation at a time, which is why 
separate test suites
+are needed for Delta and Hudi formats.
+
+### Manual Setup
+
+You can manually run `setup.sh` with a specific table format:
+
+```bash
+# Setup for Delta tables (default)
+./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion 
2.12 --polarisVersion 0.1.0 --tableFormat delta
+
+# Setup for Hudi tables
+./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion 
2.12 --polarisVersion 0.1.0 --tableFormat hudi
+```
diff --git a/plugins/spark/v3.5/regtests/run.sh 
b/plugins/spark/v3.5/regtests/run.sh
index cc84c0411..d925f7e75 100755
--- a/plugins/spark/v3.5/regtests/run.sh
+++ b/plugins/spark/v3.5/regtests/run.sh
@@ -70,6 +70,52 @@ SPARK_VERSION="3.5.6"
 
 SPARK_SHELL_OPTIONS=("PACKAGE" "JAR")
 
+# Auto-discover test suites from the suites/ directory
+# Test files must follow naming convention: <name>_<table_format>.sh
+SUITES_DIR="${SCRIPT_DIR}/suites"
+
+if [[ ! -d "$SUITES_DIR" ]]; then
+  logred "Error: Test suites directory not found: ${SUITES_DIR}"
+  exit 1
+fi
+
+# Parses a test suite filename (e.g. "spark_sql_delta.sh") to extract:
+#   TABLE_FORMAT    - the table format suffix after the last '_' (e.g. "delta")
+#   TEST_SHORTNAME  - the base name without the .sh extension (e.g. 
"spark_sql_delta")
+#   TEST_FILE       - the full path to the suite file under SUITES_DIR
+parse_test_suite() {
+  local filename="$1"
+  local base="${filename%.sh}"
+  TABLE_FORMAT="${base##*_}"
+  TEST_SHORTNAME="${base}"
+  TEST_FILE="${SUITES_DIR}/${filename}"
+}
+
+declare -a TEST_SUITES=()
+for test_file in "${SUITES_DIR}"/*.sh; do
+  [[ -f "$test_file" ]] || continue
+  TEST_SUITES+=("$(basename "$test_file")")
+done
+
+if [[ ${#TEST_SUITES[@]} -eq 0 ]]; then
+  logred "Error: No test suites found in ${SUITES_DIR}"
+  exit 1
+fi
+
+# Allow running specific test via environment variable
+echo "REGTEST_SUITE=${REGTEST_SUITE}"
+if [[ -n "$REGTEST_SUITE" ]]; then
+  REGTEST_SUITE="${REGTEST_SUITE%.sh}"
+  SUITE_FILE="${REGTEST_SUITE}.sh"
+  if [[ ! -f "${SUITES_DIR}/${SUITE_FILE}" ]]; then
+    logred "Error: Test suite not found: ${SUITES_DIR}/${SUITE_FILE}"
+    exit 1
+  fi
+  echo "Overriding TEST_SUITES to run only: ${REGTEST_SUITE}"
+  TEST_SUITES=("${SUITE_FILE}")
+fi
+echo "Will run test suites: ${TEST_SUITES[@]}"
+
 for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
   echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, 
SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}"
   # find the project jar
@@ -89,55 +135,64 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
   fi
 
   for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do
-    # clean up the default configuration if exists
-    if [ -f "${SPARK_HOME}" ]; then
-      SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
-          if [ -f ${SPARK_CONF} ]; then
-            rm ${SPARK_CONF}
-          fi
-    fi
-
-    if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
-      # run the setup without jar configuration
-      source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} 
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION}
-    else
-      source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} 
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar 
${JAR_PATH}
-    fi
-
-    # run the spark_sql test
-    loginfo "Starting test spark_sql.sh"
-
-    TEST_FILE="spark_sql.sh"
-    TEST_SHORTNAME="spark_sql"
-    
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
-    TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
-    TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
-
-    mkdir -p ${TEST_TMPDIR}
-    if (( ${VERBOSE} )); then
-      ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' 
| tee ${TEST_STDOUT}
-    else
-      ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' 
> ${TEST_STDOUT}
-    fi
-    loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
-
-    TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
-    if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
-      loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
-    else
-      logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
-      echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-      echo "meld ${TEST_STDOUT} ${TEST_REF}" >> 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-      chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-      logred "To compare and fix diffs (if 'meld' installed): 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
-      logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
-      logred "See stderr from test run for additional diagnostics: 
${TEST_STDERR}"
-      diff ${TEST_STDOUT} ${TEST_REF}
-      NUM_FAILURES=$(( NUM_FAILURES + 1 ))
-    fi
+    # Loop through each test suite
+    for TEST_SUITE_FILE in "${TEST_SUITES[@]}"; do
+      parse_test_suite "$TEST_SUITE_FILE"
+
+      loginfo "Setting up for test suite: ${TEST_SHORTNAME} with table format: 
${TABLE_FORMAT}"
+
+      # clean up the default configuration if exists
+      if [ -d "${SPARK_HOME}" ]; then
+        SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+        if [ -f "${SPARK_CONF}" ]; then
+          echo "Clean spark conf file"
+          rm ${SPARK_CONF}
+        fi
+      fi
+
+      echo "finish SPARK_HOME check"
+
+      # Run setup with appropriate table format
+      if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
+        # run the setup without jar configuration
+        source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} 
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} 
--tableFormat ${TABLE_FORMAT}
+      else
+        source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} 
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar 
${JAR_PATH} --tableFormat ${TABLE_FORMAT}
+      fi
+
+      # run the test
+      loginfo "Starting test ${TEST_SHORTNAME}"
+
+      
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}_${SPARK_SHELL_OPTION}_${TABLE_FORMAT}"
+      TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
+      TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
+
+      mkdir -p ${TEST_TMPDIR}
+      if (( ${VERBOSE} )); then
+        ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee 
${TEST_STDOUT}
+      else
+        ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > 
${TEST_STDOUT}
+      fi
+      loginfo "Test run concluded for ${TEST_SHORTNAME}"
+
+      # Compare output with reference
+      TEST_REF="${SUITES_DIR}/${TEST_SHORTNAME}.ref"
+      if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
+        loggreen "Test SUCCEEDED: ${TEST_SHORTNAME}"
+      else
+        logred "Test FAILED: ${TEST_SHORTNAME}"
+        echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+        echo "meld ${TEST_STDOUT} ${TEST_REF}" >> 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+        chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+        logred "To compare and fix diffs (if 'meld' installed): 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
+        logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
+        logred "See stderr from test run for additional diagnostics: 
${TEST_STDERR}"
+        diff ${TEST_STDOUT} ${TEST_REF}
+        NUM_FAILURES=$(( NUM_FAILURES + 1 ))
+      fi
+    done
   done
 
-  # clean up
   if [ "${SPARK_EXISTS}" = "FALSE" ]; then
     rm -rf ${SPARK_HOME}
     export SPARK_HOME=""
diff --git a/plugins/spark/v3.5/regtests/setup.sh 
b/plugins/spark/v3.5/regtests/setup.sh
index 1a23d3b5a..50b8ff2dd 100755
--- a/plugins/spark/v3.5/regtests/setup.sh
+++ b/plugins/spark/v3.5/regtests/setup.sh
@@ -25,12 +25,15 @@
 # Warning - it will set the SPARK_HOME environment variable with the spark 
setup
 #
 # The script can be called independently like following
-#   ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} 
--jar ${JAR_PATH}
+#   ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} 
--jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT}
 # Required Parameters:
 #   --sparkVersion   : the spark version to setup
 #   --scalaVersion   : the scala version of spark to setup
 #   --jar            : path to the local Polaris Spark client jar
 #
+# Optional Parameters:
+#   --tableFormat    : table format to configure (delta|hudi). Default: delta
+#
 
 set -x
 
@@ -40,6 +43,7 @@ SPARK_VERSION=3.5.6
 SCALA_VERSION=2.12
 POLARIS_CLIENT_JAR=""
 POLARIS_VERSION=""
+TABLE_FORMAT="delta"
 while [[ $# -gt 0 ]]; do
   case "$1" in
     --sparkVersion)
@@ -62,13 +66,24 @@ while [[ $# -gt 0 ]]; do
       shift # past argument
       shift # past value
       ;;
+    --tableFormat)
+      TABLE_FORMAT="$2"
+      shift # past argument
+      shift # past value
+      ;;
     --) shift;
       break
       ;;
   esac
 done
 
-echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} 
POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
+echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} 
POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR} 
TABLE_FORMAT=${TABLE_FORMAT}"
+
+# Validate table format
+if [[ "$TABLE_FORMAT" != "delta" && "$TABLE_FORMAT" != "hudi" ]]; then
+  echo "Error: Invalid table format '${TABLE_FORMAT}'. Must be 'delta' or 
'hudi'."
+  exit 1
+fi
 
 if [ "$SCALA_VERSION" == "2.12" ]; then
   SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3
@@ -141,14 +156,32 @@ else
 if [[ -z "$POLARIS_CLIENT_JAR" ]]; then
   cat << EOF >> ${SPARK_CONF}
 # POLARIS Spark client test conf
+EOF
+  if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+    cat << EOF >> ${SPARK_CONF}
+spark.jars.packages 
org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1
+# Note: Hudi package is passed via --packages on command line in 
spark_sql_hudi.sh
+# to ensure it's resolved before Kryo initialization
+EOF
+  else
+    cat << EOF >> ${SPARK_CONF}
 spark.jars.packages 
org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1
 EOF
+  fi
 else
   cat << EOF >> ${SPARK_CONF}
 # POLARIS Spark client test conf
 spark.jars $POLARIS_CLIENT_JAR
+EOF
+  if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+    cat << EOF >> ${SPARK_CONF}
+spark.jars.packages org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1
+EOF
+  else
+    cat << EOF >> ${SPARK_CONF}
 spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1
 EOF
+  fi
 fi
 
 cat << EOF >> ${SPARK_CONF}
@@ -157,9 +190,26 @@ spark.sql.variable.substitute true
 
 spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME}
 
+EOF
+
+if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+  cat << EOF >> ${SPARK_CONF}
+spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+# this configuration is needed for hudi table
+spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog
+spark.serializer=org.apache.spark.serializer.KryoSerializer
+spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar
+hoodie.metadata.enable=false
+EOF
+else
+  cat << EOF >> ${SPARK_CONF}
 
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension
 # this configuration is needed for delta table
 spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog
+EOF
+fi
+
+cat << EOF >> ${SPARK_CONF}
 spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog
 
spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog
 # this configuration is currently only used for iceberg tables, generic tables 
currently
diff --git a/plugins/spark/v3.5/regtests/spark_sql.ref 
b/plugins/spark/v3.5/regtests/suites/spark_sql_delta.ref
similarity index 100%
rename from plugins/spark/v3.5/regtests/spark_sql.ref
rename to plugins/spark/v3.5/regtests/suites/spark_sql_delta.ref
diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh 
b/plugins/spark/v3.5/regtests/suites/spark_sql_delta.sh
similarity index 100%
copy from plugins/spark/v3.5/regtests/spark_sql.sh
copy to plugins/spark/v3.5/regtests/suites/spark_sql_delta.sh
diff --git a/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref 
b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref
new file mode 100644
index 000000000..a40b4ddac
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref
@@ -0,0 +1,45 @@
+{"defaults":{"default-base-location":"file:///tmp/spark_hudi_catalog"},"overrides":{"prefix":"spark_hudi_catalog"},"endpoints":["GET
 /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD 
/v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST 
/v1/{prefix}/namespaces/{namespace}/properties","DELETE 
/v1/{prefix}/namespaces/{namespace}","GET 
/v1/{prefix}/namespaces/{namespace}/tables","GET 
/v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{pr [...]
+Catalog created
+spark-sql (default)> use polaris;
+spark-sql ()> create namespace hudi_db1;
+spark-sql ()> create namespace hudi_db2;
+spark-sql ()> show namespaces;
+hudi_db1
+hudi_db2
+spark-sql ()> 
+            > create namespace hudi_db1.schema1;
+spark-sql ()> show namespaces in hudi_db1;
+hudi_db1.schema1
+spark-sql ()> 
+            > create table hudi_db1.schema1.hudi_tb1 (id int, name string) 
using hudi location 'file:///tmp/spark_hudi_catalog/hudi_tb1';
+spark-sql ()> show tables in hudi_db1;
+spark-sql ()> show tables in hudi_db1.schema1;
+spark-sql ()> 
+            > use hudi_db1.schema1;
+spark-sql (hudi_db1.schema1)> insert into hudi_tb1 values (1, 'alice'), (2, 
'bob');
+spark-sql (hudi_db1.schema1)> select * from hudi_tb1 order by id;
+spark-sql (hudi_db1.schema1)> 
+                            > create table hudi_tb2 (name string, age int, 
country string) using hudi partitioned by (country) location 
'file:///tmp/spark_hudi_catalog/hudi_tb2';
+spark-sql (hudi_db1.schema1)> insert into hudi_tb2 values ('anna', 10, 'US'), 
('james', 32, 'US'), ('yan', 16, 'CHINA');
+spark-sql (hudi_db1.schema1)> select name, country from hudi_tb2 order by age;
+spark-sql (hudi_db1.schema1)> 
+                            > show tables;
+spark-sql (hudi_db1.schema1)> 
+                            > use hudi_db1;
+spark-sql (hudi_db1)> create table iceberg_tb (col1 int);
+spark-sql (hudi_db1)> insert into iceberg_tb values (100), (200);
+spark-sql (hudi_db1)> select * from iceberg_tb order by col1;
+100
+200
+spark-sql (hudi_db1)> 
+                    > show tables;
+iceberg_tb
+spark-sql (hudi_db1)> show tables in hudi_db1.schema1;
+spark-sql (hudi_db1)> 
+                    > drop table hudi_db1.schema1.hudi_tb1;
+spark-sql (hudi_db1)> drop table hudi_db1.schema1.hudi_tb2;
+spark-sql (hudi_db1)> drop namespace hudi_db1.schema1;
+spark-sql (hudi_db1)> drop table iceberg_tb;
+spark-sql (hudi_db1)> drop namespace hudi_db1;
+spark-sql (hudi_db1)> drop namespace hudi_db2;
+spark-sql (hudi_db1)> 
diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh 
b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh
similarity index 60%
rename from plugins/spark/v3.5/regtests/spark_sql.sh
rename to plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh
index fe036664c..0a230a4d0 100755
--- a/plugins/spark/v3.5/regtests/spark_sql.sh
+++ b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh
@@ -21,10 +21,13 @@
 
 SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}"
 
-CATALOG_NAME="spark_sql_catalog"
+# Determine Scala version (default to 2.12 if not set)
+SCALA_VERSION="${SCALA_VERSION:-2.12}"
+
+CATALOG_NAME="spark_hudi_catalog"
 curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
   http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-  -d '{"name": "spark_sql_catalog", "id": 100, "type": "INTERNAL", "readOnly": 
false, "properties": {"default-base-location": "file:///tmp/spark_catalog"}, 
"storageConfigInfo": {"storageType": "FILE", "allowedLocations": 
["file:///tmp"]}}' > /dev/stderr
+  -d '{"name": "spark_hudi_catalog", "id": 200, "type": "INTERNAL", 
"readOnly": false, "properties": {"default-base-location": 
"file:///tmp/spark_hudi_catalog"}, "storageConfigInfo": {"storageType": "FILE", 
"allowedLocations": ["file:///tmp"]}}' > /dev/stderr
 
 # Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it 
can only manage access and metadata
 curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
@@ -37,45 +40,46 @@ echo
 echo "Catalog created"
 cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf 
spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf 
spark.sql.catalog.polaris.warehouse=${CATALOG_NAME}
 use polaris;
-create namespace db1;
-create namespace db2;
+create namespace hudi_db1;
+create namespace hudi_db2;
 show namespaces;
 
-create namespace db1.schema1;
-show namespaces in db1;
+create namespace hudi_db1.schema1;
+show namespaces in hudi_db1;
 
-create table db1.schema1.iceberg_tb (col1 int);
-show tables in db1;
-show tables in db1.schema1;
+create table hudi_db1.schema1.hudi_tb1 (id int, name string) using hudi 
location 'file:///tmp/spark_hudi_catalog/hudi_tb1';
+show tables in hudi_db1;
+show tables in hudi_db1.schema1;
 
-use db1.schema1;
-insert into iceberg_tb values (123), (234), (111);
-select * from iceberg_tb order by col1;
+use hudi_db1.schema1;
+insert into hudi_tb1 values (1, 'alice'), (2, 'bob');
+select * from hudi_tb1 order by id;
 
-create table delta_tb1(col1 string) using delta location 
'file:///tmp/spark_catalog/delta_tb1';
-insert into delta_tb1 values ('ab'), ('bb'), ('dd');
-select * from delta_tb1 order by col1;
+create table hudi_tb2 (name string, age int, country string) using hudi 
partitioned by (country) location 'file:///tmp/spark_hudi_catalog/hudi_tb2';
+insert into hudi_tb2 values ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 
16, 'CHINA');
+select name, country from hudi_tb2 order by age;
 
 show tables;
 
-use db1;
-create table delta_tb2(col1 int) using delta location 
'file:///tmp/spark_catalog/delta_tb2';
-insert into delta_tb2 values (1), (2), (3) order by col1;
-select * from delta_tb2;
+use hudi_db1;
+create table iceberg_tb (col1 int);
+insert into iceberg_tb values (100), (200);
+select * from iceberg_tb order by col1;
 
 show tables;
-show tables in db1.schema1;
-
-drop table db1.schema1.iceberg_tb;
-drop table db1.schema1.delta_tb1;
-drop namespace db1.schema1;
-drop table delta_tb2;
-drop namespace db1;
-drop namespace db2;
+show tables in hudi_db1.schema1;
+
+drop table hudi_db1.schema1.hudi_tb1;
+drop table hudi_db1.schema1.hudi_tb2;
+drop namespace hudi_db1.schema1;
+drop table iceberg_tb;
+drop namespace hudi_db1;
+drop namespace hudi_db2;
 EOF
 
-# clean up the spark_catalog dir
-rm -rf /tmp/spark_catalog/
+# clean up the spark_hudi_catalog dir
+rm -rf /tmp/spark_hudi_catalog/
 
 curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
   
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}
 > /dev/stderr
+

(polaris) branch main updated: feat: intial hudi reg test (#3641)

Reply via email to