This is an automated email from the ASF dual-hosted git repository.
yufei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push:
new 893722cfb feat: intial hudi reg test (#3641)
893722cfb is described below
commit 893722cfbb2a1867442771da952fcfbd7314a279
Author: Rahil C <[email protected]>
AuthorDate: Tue Feb 17 19:51:40 2026 -0500
feat: intial hudi reg test (#3641)
---
plugins/spark/v3.5/regtests/README.md | 45 +++++++
plugins/spark/v3.5/regtests/run.sh | 149 ++++++++++++++-------
plugins/spark/v3.5/regtests/setup.sh | 54 +++++++-
.../{spark_sql.ref => suites/spark_sql_delta.ref} | 0
.../{spark_sql.sh => suites/spark_sql_delta.sh} | 0
.../spark/v3.5/regtests/suites/spark_sql_hudi.ref | 45 +++++++
.../{spark_sql.sh => suites/spark_sql_hudi.sh} | 62 +++++----
7 files changed, 277 insertions(+), 78 deletions(-)
diff --git a/plugins/spark/v3.5/regtests/README.md
b/plugins/spark/v3.5/regtests/README.md
index 06a0ccd13..de3355204 100755
--- a/plugins/spark/v3.5/regtests/README.md
+++ b/plugins/spark/v3.5/regtests/README.md
@@ -84,3 +84,48 @@ Note: the regression tests expect Polaris to run with
certain options, e.g. with
storage, default realm `POLARIS` and root credentials `root:secret`; if you
run the above command,
this will be the case. If you run Polaris in a different way, make sure that
Polaris is configured
appropriately.
+
+## Running Specific Test Suites
+
+By default, `run.sh` auto-discovers and executes all test suites in the
`suites/` directory.
+To run a specific suite, use the `REGTEST_SUITE` environment variable with
just the test name:
+
+```bash
+# Run only Delta tests
+env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_delta
./plugins/spark/v3.5/regtests/run.sh
+
+# Run only Hudi tests
+env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_hudi
./plugins/spark/v3.5/regtests/run.sh
+```
+
+## Adding a New Test Suite
+
+Test suites are auto-discovered from the `suites/` directory. To add a new
test:
+
+1. Create `suites/<descriptive_name>_<table_format>.sh` (must be executable)
+2. Create `suites/<descriptive_name>_<table_format>.ref` (expected output)
+3. The table format is automatically parsed from the last segment before `.sh`
+4. Supported table formats: `delta`, `hudi`
+
+## Table Format Support
+
+The regression tests support multiple table formats through the
`--tableFormat` parameter in `setup.sh`:
+
+- **Delta** (default): Uses `DeltaCatalog` for `spark_catalog`. Tests both
Iceberg and Delta tables.
+- **Hudi**: Uses `HoodieCatalog` for `spark_catalog`. Tests both Iceberg and
Hudi tables.
+
+Each test suite is isolated with its own Spark configuration and catalog
setup. The `spark_catalog`
+can only be configured to one catalog implementation at a time, which is why
separate test suites
+are needed for Delta and Hudi formats.
+
+### Manual Setup
+
+You can manually run `setup.sh` with a specific table format:
+
+```bash
+# Setup for Delta tables (default)
+./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion
2.12 --polarisVersion 0.1.0 --tableFormat delta
+
+# Setup for Hudi tables
+./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion
2.12 --polarisVersion 0.1.0 --tableFormat hudi
+```
diff --git a/plugins/spark/v3.5/regtests/run.sh
b/plugins/spark/v3.5/regtests/run.sh
index cc84c0411..d925f7e75 100755
--- a/plugins/spark/v3.5/regtests/run.sh
+++ b/plugins/spark/v3.5/regtests/run.sh
@@ -70,6 +70,52 @@ SPARK_VERSION="3.5.6"
SPARK_SHELL_OPTIONS=("PACKAGE" "JAR")
+# Auto-discover test suites from the suites/ directory
+# Test files must follow naming convention: <name>_<table_format>.sh
+SUITES_DIR="${SCRIPT_DIR}/suites"
+
+if [[ ! -d "$SUITES_DIR" ]]; then
+ logred "Error: Test suites directory not found: ${SUITES_DIR}"
+ exit 1
+fi
+
+# Parses a test suite filename (e.g. "spark_sql_delta.sh") to extract:
+# TABLE_FORMAT - the table format suffix after the last '_' (e.g. "delta")
+# TEST_SHORTNAME - the base name without the .sh extension (e.g.
"spark_sql_delta")
+# TEST_FILE - the full path to the suite file under SUITES_DIR
+parse_test_suite() {
+ local filename="$1"
+ local base="${filename%.sh}"
+ TABLE_FORMAT="${base##*_}"
+ TEST_SHORTNAME="${base}"
+ TEST_FILE="${SUITES_DIR}/${filename}"
+}
+
+declare -a TEST_SUITES=()
+for test_file in "${SUITES_DIR}"/*.sh; do
+ [[ -f "$test_file" ]] || continue
+ TEST_SUITES+=("$(basename "$test_file")")
+done
+
+if [[ ${#TEST_SUITES[@]} -eq 0 ]]; then
+ logred "Error: No test suites found in ${SUITES_DIR}"
+ exit 1
+fi
+
+# Allow running specific test via environment variable
+echo "REGTEST_SUITE=${REGTEST_SUITE}"
+if [[ -n "$REGTEST_SUITE" ]]; then
+ REGTEST_SUITE="${REGTEST_SUITE%.sh}"
+ SUITE_FILE="${REGTEST_SUITE}.sh"
+ if [[ ! -f "${SUITES_DIR}/${SUITE_FILE}" ]]; then
+ logred "Error: Test suite not found: ${SUITES_DIR}/${SUITE_FILE}"
+ exit 1
+ fi
+ echo "Overriding TEST_SUITES to run only: ${REGTEST_SUITE}"
+ TEST_SUITES=("${SUITE_FILE}")
+fi
+echo "Will run test suites: ${TEST_SUITES[@]}"
+
for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION},
SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}"
# find the project jar
@@ -89,55 +135,64 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
fi
for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do
- # clean up the default configuration if exists
- if [ -f "${SPARK_HOME}" ]; then
- SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
- if [ -f ${SPARK_CONF} ]; then
- rm ${SPARK_CONF}
- fi
- fi
-
- if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
- # run the setup without jar configuration
- source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION}
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION}
- else
- source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION}
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar
${JAR_PATH}
- fi
-
- # run the spark_sql test
- loginfo "Starting test spark_sql.sh"
-
- TEST_FILE="spark_sql.sh"
- TEST_SHORTNAME="spark_sql"
-
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
- TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
- TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
-
- mkdir -p ${TEST_TMPDIR}
- if (( ${VERBOSE} )); then
- ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings'
| tee ${TEST_STDOUT}
- else
- ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings'
> ${TEST_STDOUT}
- fi
- loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
-
- TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
- if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
- loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
- else
- logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
- echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
- echo "meld ${TEST_STDOUT} ${TEST_REF}" >>
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
- chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
- logred "To compare and fix diffs (if 'meld' installed):
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
- logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
- logred "See stderr from test run for additional diagnostics:
${TEST_STDERR}"
- diff ${TEST_STDOUT} ${TEST_REF}
- NUM_FAILURES=$(( NUM_FAILURES + 1 ))
- fi
+ # Loop through each test suite
+ for TEST_SUITE_FILE in "${TEST_SUITES[@]}"; do
+ parse_test_suite "$TEST_SUITE_FILE"
+
+ loginfo "Setting up for test suite: ${TEST_SHORTNAME} with table format:
${TABLE_FORMAT}"
+
+ # clean up the default configuration if exists
+ if [ -d "${SPARK_HOME}" ]; then
+ SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+ if [ -f "${SPARK_CONF}" ]; then
+ echo "Clean spark conf file"
+ rm ${SPARK_CONF}
+ fi
+ fi
+
+ echo "finish SPARK_HOME check"
+
+ # Run setup with appropriate table format
+ if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
+ # run the setup without jar configuration
+ source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION}
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION}
--tableFormat ${TABLE_FORMAT}
+ else
+ source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION}
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar
${JAR_PATH} --tableFormat ${TABLE_FORMAT}
+ fi
+
+ # run the test
+ loginfo "Starting test ${TEST_SHORTNAME}"
+
+
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}_${SPARK_SHELL_OPTION}_${TABLE_FORMAT}"
+ TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
+ TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
+
+ mkdir -p ${TEST_TMPDIR}
+ if (( ${VERBOSE} )); then
+ ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee
${TEST_STDOUT}
+ else
+ ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' >
${TEST_STDOUT}
+ fi
+ loginfo "Test run concluded for ${TEST_SHORTNAME}"
+
+ # Compare output with reference
+ TEST_REF="${SUITES_DIR}/${TEST_SHORTNAME}.ref"
+ if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
+ loggreen "Test SUCCEEDED: ${TEST_SHORTNAME}"
+ else
+ logred "Test FAILED: ${TEST_SHORTNAME}"
+ echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+ echo "meld ${TEST_STDOUT} ${TEST_REF}" >>
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+ chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+ logred "To compare and fix diffs (if 'meld' installed):
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
+ logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
+ logred "See stderr from test run for additional diagnostics:
${TEST_STDERR}"
+ diff ${TEST_STDOUT} ${TEST_REF}
+ NUM_FAILURES=$(( NUM_FAILURES + 1 ))
+ fi
+ done
done
- # clean up
if [ "${SPARK_EXISTS}" = "FALSE" ]; then
rm -rf ${SPARK_HOME}
export SPARK_HOME=""
diff --git a/plugins/spark/v3.5/regtests/setup.sh
b/plugins/spark/v3.5/regtests/setup.sh
index 1a23d3b5a..50b8ff2dd 100755
--- a/plugins/spark/v3.5/regtests/setup.sh
+++ b/plugins/spark/v3.5/regtests/setup.sh
@@ -25,12 +25,15 @@
# Warning - it will set the SPARK_HOME environment variable with the spark
setup
#
# The script can be called independently like following
-# ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION}
--jar ${JAR_PATH}
+# ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION}
--jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT}
# Required Parameters:
# --sparkVersion : the spark version to setup
# --scalaVersion : the scala version of spark to setup
# --jar : path to the local Polaris Spark client jar
#
+# Optional Parameters:
+# --tableFormat : table format to configure (delta|hudi). Default: delta
+#
set -x
@@ -40,6 +43,7 @@ SPARK_VERSION=3.5.6
SCALA_VERSION=2.12
POLARIS_CLIENT_JAR=""
POLARIS_VERSION=""
+TABLE_FORMAT="delta"
while [[ $# -gt 0 ]]; do
case "$1" in
--sparkVersion)
@@ -62,13 +66,24 @@ while [[ $# -gt 0 ]]; do
shift # past argument
shift # past value
;;
+ --tableFormat)
+ TABLE_FORMAT="$2"
+ shift # past argument
+ shift # past value
+ ;;
--) shift;
break
;;
esac
done
-echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION}
POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
+echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION}
POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}
TABLE_FORMAT=${TABLE_FORMAT}"
+
+# Validate table format
+if [[ "$TABLE_FORMAT" != "delta" && "$TABLE_FORMAT" != "hudi" ]]; then
+ echo "Error: Invalid table format '${TABLE_FORMAT}'. Must be 'delta' or
'hudi'."
+ exit 1
+fi
if [ "$SCALA_VERSION" == "2.12" ]; then
SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3
@@ -141,14 +156,32 @@ else
if [[ -z "$POLARIS_CLIENT_JAR" ]]; then
cat << EOF >> ${SPARK_CONF}
# POLARIS Spark client test conf
+EOF
+ if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+ cat << EOF >> ${SPARK_CONF}
+spark.jars.packages
org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1
+# Note: Hudi package is passed via --packages on command line in
spark_sql_hudi.sh
+# to ensure it's resolved before Kryo initialization
+EOF
+ else
+ cat << EOF >> ${SPARK_CONF}
spark.jars.packages
org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1
EOF
+ fi
else
cat << EOF >> ${SPARK_CONF}
# POLARIS Spark client test conf
spark.jars $POLARIS_CLIENT_JAR
+EOF
+ if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+ cat << EOF >> ${SPARK_CONF}
+spark.jars.packages org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1
+EOF
+ else
+ cat << EOF >> ${SPARK_CONF}
spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1
EOF
+ fi
fi
cat << EOF >> ${SPARK_CONF}
@@ -157,9 +190,26 @@ spark.sql.variable.substitute true
spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME}
+EOF
+
+if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+ cat << EOF >> ${SPARK_CONF}
+spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+# this configuration is needed for hudi table
+spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog
+spark.serializer=org.apache.spark.serializer.KryoSerializer
+spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar
+hoodie.metadata.enable=false
+EOF
+else
+ cat << EOF >> ${SPARK_CONF}
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension
# this configuration is needed for delta table
spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog
+EOF
+fi
+
+cat << EOF >> ${SPARK_CONF}
spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog
spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog
# this configuration is currently only used for iceberg tables, generic tables
currently
diff --git a/plugins/spark/v3.5/regtests/spark_sql.ref
b/plugins/spark/v3.5/regtests/suites/spark_sql_delta.ref
similarity index 100%
rename from plugins/spark/v3.5/regtests/spark_sql.ref
rename to plugins/spark/v3.5/regtests/suites/spark_sql_delta.ref
diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh
b/plugins/spark/v3.5/regtests/suites/spark_sql_delta.sh
similarity index 100%
copy from plugins/spark/v3.5/regtests/spark_sql.sh
copy to plugins/spark/v3.5/regtests/suites/spark_sql_delta.sh
diff --git a/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref
b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref
new file mode 100644
index 000000000..a40b4ddac
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref
@@ -0,0 +1,45 @@
+{"defaults":{"default-base-location":"file:///tmp/spark_hudi_catalog"},"overrides":{"prefix":"spark_hudi_catalog"},"endpoints":["GET
/v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD
/v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST
/v1/{prefix}/namespaces/{namespace}/properties","DELETE
/v1/{prefix}/namespaces/{namespace}","GET
/v1/{prefix}/namespaces/{namespace}/tables","GET
/v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{pr [...]
+Catalog created
+spark-sql (default)> use polaris;
+spark-sql ()> create namespace hudi_db1;
+spark-sql ()> create namespace hudi_db2;
+spark-sql ()> show namespaces;
+hudi_db1
+hudi_db2
+spark-sql ()>
+ > create namespace hudi_db1.schema1;
+spark-sql ()> show namespaces in hudi_db1;
+hudi_db1.schema1
+spark-sql ()>
+ > create table hudi_db1.schema1.hudi_tb1 (id int, name string)
using hudi location 'file:///tmp/spark_hudi_catalog/hudi_tb1';
+spark-sql ()> show tables in hudi_db1;
+spark-sql ()> show tables in hudi_db1.schema1;
+spark-sql ()>
+ > use hudi_db1.schema1;
+spark-sql (hudi_db1.schema1)> insert into hudi_tb1 values (1, 'alice'), (2,
'bob');
+spark-sql (hudi_db1.schema1)> select * from hudi_tb1 order by id;
+spark-sql (hudi_db1.schema1)>
+ > create table hudi_tb2 (name string, age int,
country string) using hudi partitioned by (country) location
'file:///tmp/spark_hudi_catalog/hudi_tb2';
+spark-sql (hudi_db1.schema1)> insert into hudi_tb2 values ('anna', 10, 'US'),
('james', 32, 'US'), ('yan', 16, 'CHINA');
+spark-sql (hudi_db1.schema1)> select name, country from hudi_tb2 order by age;
+spark-sql (hudi_db1.schema1)>
+ > show tables;
+spark-sql (hudi_db1.schema1)>
+ > use hudi_db1;
+spark-sql (hudi_db1)> create table iceberg_tb (col1 int);
+spark-sql (hudi_db1)> insert into iceberg_tb values (100), (200);
+spark-sql (hudi_db1)> select * from iceberg_tb order by col1;
+100
+200
+spark-sql (hudi_db1)>
+ > show tables;
+iceberg_tb
+spark-sql (hudi_db1)> show tables in hudi_db1.schema1;
+spark-sql (hudi_db1)>
+ > drop table hudi_db1.schema1.hudi_tb1;
+spark-sql (hudi_db1)> drop table hudi_db1.schema1.hudi_tb2;
+spark-sql (hudi_db1)> drop namespace hudi_db1.schema1;
+spark-sql (hudi_db1)> drop table iceberg_tb;
+spark-sql (hudi_db1)> drop namespace hudi_db1;
+spark-sql (hudi_db1)> drop namespace hudi_db2;
+spark-sql (hudi_db1)>
diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh
b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh
similarity index 60%
rename from plugins/spark/v3.5/regtests/spark_sql.sh
rename to plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh
index fe036664c..0a230a4d0 100755
--- a/plugins/spark/v3.5/regtests/spark_sql.sh
+++ b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh
@@ -21,10 +21,13 @@
SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}"
-CATALOG_NAME="spark_sql_catalog"
+# Determine Scala version (default to 2.12 if not set)
+SCALA_VERSION="${SCALA_VERSION:-2.12}"
+
+CATALOG_NAME="spark_hudi_catalog"
curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept:
application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
- -d '{"name": "spark_sql_catalog", "id": 100, "type": "INTERNAL", "readOnly":
false, "properties": {"default-base-location": "file:///tmp/spark_catalog"},
"storageConfigInfo": {"storageType": "FILE", "allowedLocations":
["file:///tmp"]}}' > /dev/stderr
+ -d '{"name": "spark_hudi_catalog", "id": 200, "type": "INTERNAL",
"readOnly": false, "properties": {"default-base-location":
"file:///tmp/spark_hudi_catalog"}, "storageConfigInfo": {"storageType": "FILE",
"allowedLocations": ["file:///tmp"]}}' > /dev/stderr
# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it
can only manage access and metadata
curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept:
application/json' -H 'Content-Type: application/json' \
@@ -37,45 +40,46 @@ echo
echo "Catalog created"
cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf
spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf
spark.sql.catalog.polaris.warehouse=${CATALOG_NAME}
use polaris;
-create namespace db1;
-create namespace db2;
+create namespace hudi_db1;
+create namespace hudi_db2;
show namespaces;
-create namespace db1.schema1;
-show namespaces in db1;
+create namespace hudi_db1.schema1;
+show namespaces in hudi_db1;
-create table db1.schema1.iceberg_tb (col1 int);
-show tables in db1;
-show tables in db1.schema1;
+create table hudi_db1.schema1.hudi_tb1 (id int, name string) using hudi
location 'file:///tmp/spark_hudi_catalog/hudi_tb1';
+show tables in hudi_db1;
+show tables in hudi_db1.schema1;
-use db1.schema1;
-insert into iceberg_tb values (123), (234), (111);
-select * from iceberg_tb order by col1;
+use hudi_db1.schema1;
+insert into hudi_tb1 values (1, 'alice'), (2, 'bob');
+select * from hudi_tb1 order by id;
-create table delta_tb1(col1 string) using delta location
'file:///tmp/spark_catalog/delta_tb1';
-insert into delta_tb1 values ('ab'), ('bb'), ('dd');
-select * from delta_tb1 order by col1;
+create table hudi_tb2 (name string, age int, country string) using hudi
partitioned by (country) location 'file:///tmp/spark_hudi_catalog/hudi_tb2';
+insert into hudi_tb2 values ('anna', 10, 'US'), ('james', 32, 'US'), ('yan',
16, 'CHINA');
+select name, country from hudi_tb2 order by age;
show tables;
-use db1;
-create table delta_tb2(col1 int) using delta location
'file:///tmp/spark_catalog/delta_tb2';
-insert into delta_tb2 values (1), (2), (3) order by col1;
-select * from delta_tb2;
+use hudi_db1;
+create table iceberg_tb (col1 int);
+insert into iceberg_tb values (100), (200);
+select * from iceberg_tb order by col1;
show tables;
-show tables in db1.schema1;
-
-drop table db1.schema1.iceberg_tb;
-drop table db1.schema1.delta_tb1;
-drop namespace db1.schema1;
-drop table delta_tb2;
-drop namespace db1;
-drop namespace db2;
+show tables in hudi_db1.schema1;
+
+drop table hudi_db1.schema1.hudi_tb1;
+drop table hudi_db1.schema1.hudi_tb2;
+drop namespace hudi_db1.schema1;
+drop table iceberg_tb;
+drop namespace hudi_db1;
+drop namespace hudi_db2;
EOF
-# clean up the spark_catalog dir
-rm -rf /tmp/spark_catalog/
+# clean up the spark_hudi_catalog dir
+rm -rf /tmp/spark_hudi_catalog/
curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept:
application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}
> /dev/stderr
+