This is an automated email from the ASF dual-hosted git repository.

yufei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git


The following commit(s) were added to refs/heads/main by this push:
     new 6e45ef7e0 Spark: Add regtests for Spark client to test built jars 
(#1402)
6e45ef7e0 is described below

commit 6e45ef7e0fd30820bd1c2f196e86d20f9ac75ba5
Author: gh-yzou <167037035+gh-y...@users.noreply.github.com>
AuthorDate: Wed Apr 23 22:12:00 2025 -0700

    Spark: Add regtests for Spark client to test built jars (#1402)
---
 .github/workflows/spark_client_regtests.yml    |  62 +++++++++
 build.gradle.kts                               |   2 +
 plugins/spark/v3.5/regtests/Dockerfile         |  48 +++++++
 plugins/spark/v3.5/regtests/README.md          |  86 +++++++++++++
 plugins/spark/v3.5/regtests/docker-compose.yml |  46 +++++++
 plugins/spark/v3.5/regtests/run.sh             | 132 +++++++++++++++++++
 plugins/spark/v3.5/regtests/setup.sh           | 169 +++++++++++++++++++++++++
 plugins/spark/v3.5/regtests/spark_sql.ref      |  57 +++++++++
 plugins/spark/v3.5/regtests/spark_sql.sh       |  81 ++++++++++++
 plugins/spark/v3.5/spark/build.gradle.kts      |  43 +++++--
 10 files changed, 718 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/spark_client_regtests.yml 
b/.github/workflows/spark_client_regtests.yml
new file mode 100644
index 000000000..44e0fdca1
--- /dev/null
+++ b/.github/workflows/spark_client_regtests.yml
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: Spark Client Regression Tests
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  regtest:
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up JDK 21
+        uses: actions/setup-java@v4
+        with:
+          java-version: '21'
+          distribution: 'temurin'
+
+      - name: Fix permissions
+        run: mkdir -p regtests/output && chmod 777 regtests/output && chmod 
777 regtests/t_*/ref/*
+
+      - name: Project build without testing
+        run: ./gradlew assemble
+
+      - name: Image build
+        run: |
+          ./gradlew \
+              :polaris-quarkus-server:assemble \
+              :polaris-quarkus-server:quarkusAppPartsBuild --rerun \
+              -Dquarkus.container-image.build=true
+
+      # NOTE: the regression test runs with spark 3.5.5 and scala 2.12 in Java 
17. We also have integration
+      # tests runs with the existing gradle.yml, which only runs on Java 21. 
Since spark Java compatibility
+      # for 3.5 is 8, 11, and 17, we should run spark client with those 
compatible java versions.
+      # TODO: add separate spark client CI and run with Java 8, 11 and 17.
+      - name: Regression Test
+        run: |
+          docker compose -f plugins/spark/v3.5/regtests/docker-compose.yml up 
--build --exit-code-from regtest
diff --git a/build.gradle.kts b/build.gradle.kts
index e39abe385..feb6e368c 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -125,6 +125,8 @@ tasks.named<RatTask>("rat").configure {
 
   excludes.add("**/kotlin-compiler*")
   excludes.add("**/build-logic/.kotlin/**")
+
+  excludes.add("plugins/**/*.ref")
 }
 
 // Pass environment variables:
diff --git a/plugins/spark/v3.5/regtests/Dockerfile 
b/plugins/spark/v3.5/regtests/Dockerfile
new file mode 100755
index 000000000..1620c12ae
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/Dockerfile
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+FROM docker.io/apache/spark:3.5.5-java17
+ARG POLARIS_HOST=polaris
+ENV POLARIS_HOST=$POLARIS_HOST
+ENV SPARK_HOME=/opt/spark
+ENV CURRENT_SCALA_VERSION='2.12'
+ENV LANGUAGE='en_US:en'
+
+USER root
+RUN apt update
+RUN apt-get install -y diffutils wget curl
+RUN mkdir -p /home/spark &&  \
+    chown -R spark /home/spark && \
+    mkdir -p /tmp/polaris-regtests && \
+    chown -R spark /tmp/polaris-regtests
+RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
+
+USER spark
+
+WORKDIR /home/spark/polaris
+
+COPY --chown=spark ./v3.5 /home/spark/polaris/v3.5
+
+# /home/spark/regtests might not be writable in all situations, see 
https://github.com/apache/polaris/pull/205
+USER root
+RUN chmod -R go+rwx /home/spark/polaris
+RUN chmod -R 777 ./v3.5/regtests
+USER spark
+
+ENTRYPOINT ["./v3.5/regtests/run.sh"]
diff --git a/plugins/spark/v3.5/regtests/README.md 
b/plugins/spark/v3.5/regtests/README.md
new file mode 100755
index 000000000..75dd57a5a
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/README.md
@@ -0,0 +1,86 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+# End-to-end regression tests
+
+regtests provides basic end-to-end tests for spark_sql using spark client jars.
+
+Regression tests are either run in Docker, using docker-compose to orchestrate 
the tests, or
+locally.
+
+**NOTE** regtests are supposed to be a light-weight testing to ensure jars can 
be used to start
+spark and run basic SQL commands. Please use integration for detailed testing.
+
+## Prerequisites
+
+It is recommended to clean the `regtests/output` directory before running 
tests. This can be done by
+running:
+
+```shell
+rm -rf ./plugins/spark/v3.5/regtests/output && mkdir -p 
./plugins/spark/v3.5/regtests/output && chmod -R 777 
./plugins/spark/v3.5/regtests/output
+```
+
+## Run Tests With Docker Compose
+
+Tests can be run with docker-compose using the provided 
`./plugins/spark/v3.5/regtests/docker-compose.yml` file, as
+follows:
+
+```shell
+./gradlew build
+./gradlew \
+  :polaris-quarkus-server:assemble \
+  :polaris-quarkus-server:quarkusAppPartsBuild --rerun \
+  -Dquarkus.container-image.build=true
+docker compose -f ./plugins/spark/v3.5/regtests/docker-compose.yml up --build 
--exit-code-from regtest
+```
+
+In this setup, a Polaris container will be started in a docker-compose group, 
using the image
+previously built by the Gradle build. Then another container, including a 
Spark SQL shell, will run
+the tests. The exit code will be the same as the exit code of the Spark 
container. 
+**NOTE** Docker compose only support testing with scala 2.12, because no scala 
2.13 image is available
+for spark 3.5. Scala 2.13 will be supported for Spark 4.0.
+
+This is the flow used in CI and should be done locally before pushing to 
GitHub to ensure that no
+environmental factors contribute to the outcome of the tests.
+
+**Important**: if you are also using minikube, for example to test the Helm 
chart, you may need to
+_unset_ the Docker environment that was pointing to the Minikube Docker 
daemon, otherwise the image
+will be built by the Minikube Docker daemon and will not be available to the 
local Docker daemon.
+This can be done by running, _before_ building the image and running the tests:
+
+```shell
+eval $(minikube -p minikube docker-env --unset)
+```
+
+## Run Tests Locally
+
+Regression tests can be run locally as well, using the test harness. For local 
testing, both
+Scala 2.12 and Scala 2.13 are supported.
+
+To run regression tests locally, run the following:
+- `./gradlew build` -- build the Polaris project and Spark Client jars.
+- `./gradlew run` -- start a Polaris server on localhost:8181.
+- `env POLARIS_HOST=localhost ./plugins/spark/v3.5/regtests/run.sh` -- run 
regtests.
+
+Note: the regression tests expect Polaris to run with certain options, e.g. 
with support for `FILE`
+storage, default realm `POLARIS` and root credentials `root:secret`; if you 
run the above command,
+this will be the case. If you run Polaris in a different way, make sure that 
Polaris is configured
+appropriately.
diff --git a/plugins/spark/v3.5/regtests/docker-compose.yml 
b/plugins/spark/v3.5/regtests/docker-compose.yml
new file mode 100755
index 000000000..e1ea1a898
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/docker-compose.yml
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+services:
+  polaris:
+    image: apache/polaris:latest
+    ports:
+      - "8181"
+      - "8182"
+    environment:
+      AWS_REGION: us-west-2
+      POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,secret
+      quarkus.log.file.enable: "false"
+      quarkus.otel.sdk.disabled: "true"
+    healthcheck:
+      test: ["CMD", "curl", "http://localhost:8182/q/health";]
+      interval: 10s
+      timeout: 10s
+      retries: 5
+  regtest:
+    build:
+      context: ../..
+      dockerfile: v3.5/regtests/Dockerfile
+      args:
+        POLARIS_HOST: polaris
+    depends_on:
+      polaris:
+        condition: service_healthy
+    volumes:
+      - ./output:/tmp/polaris-regtests/
diff --git a/plugins/spark/v3.5/regtests/run.sh 
b/plugins/spark/v3.5/regtests/run.sh
new file mode 100755
index 000000000..d850a4465
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/run.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Run without args to run all tests.
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SPARK_ROOT_DIR=$(dirname ${SCRIPT_DIR})
+export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP 
address binding
+
+FMT_RED='\033[0;31m'
+FMT_GREEN='\033[0;32m'
+FMT_NC='\033[0m'
+
+function loginfo() {
+  echo "$(date): ${@}"
+}
+function loggreen() {
+  echo -e "${FMT_GREEN}$(date): ${@}${FMT_NC}"
+}
+function logred() {
+  echo -e "${FMT_RED}$(date): ${@}${FMT_NC}"
+}
+
+# Allow bearer token to be provided if desired
+if [[ -z "$REGTEST_ROOT_BEARER_TOKEN" ]]; then
+  if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" 
"http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens"; \
+    -d "grant_type=client_credentials" \
+    -d "client_id=root" \
+    -d "client_secret=secret" \
+    -d "scope=PRINCIPAL_ROLE:ALL"); then
+    logred "Error: Failed to retrieve bearer token"
+    exit 1
+  fi
+
+  token=$(echo "$output" | awk -F\" '{print $4}')
+
+  if [ "$token" == "unauthorized_client" ]; then
+    logred "Error: Failed to retrieve bearer token"
+    exit 1
+  fi
+
+  export REGTEST_ROOT_BEARER_TOKEN=$token
+fi
+
+echo "Root bearer token: ${REGTEST_ROOT_BEARER_TOKEN}"
+
+NUM_FAILURES=0
+
+SCALA_VERSIONS=("2.12" "2.13")
+if [[ -n "$CURRENT_SCALA_VERSION" ]]; then
+  SCALA_VERSIONS=("${CURRENT_SCALA_VERSION}")
+fi
+SPARK_MAJOR_VERSION="3.5"
+SPARK_VERSION="3.5.5"
+
+for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
+  echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, 
SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}"
+  # find the project jar
+  SPARK_DIR=${SPARK_ROOT_DIR}/spark
+  JAR_PATH=$(find ${SPARK_DIR} -name 
"polaris-iceberg-*.*-spark-runtime-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.jar"
 -print -quit)
+  echo "find jar ${JAR_PATH}"
+
+  SPARK_EXISTS="TRUE"
+  if [ -z "${SPARK_HOME}" ]; then
+    SPARK_EXISTS="FALSE"
+  fi
+
+  source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion 
${SCALA_VERSION} --jar ${JAR_PATH}
+
+  # run the spark_sql test
+  loginfo "Starting test spark_sql.sh"
+
+  TEST_FILE="spark_sql.sh"
+  TEST_SHORTNAME="spark_sql"
+  
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
+  TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
+  TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
+
+  mkdir -p ${TEST_TMPDIR}
+  if (( ${VERBOSE} )); then
+    ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | 
tee ${TEST_STDOUT}
+  else
+    ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > 
${TEST_STDOUT}
+  fi
+  loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
+
+  TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
+  if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
+    loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
+  else
+    logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
+    echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+    echo "meld ${TEST_STDOUT} ${TEST_REF}" >> 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+    chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+    logred "To compare and fix diffs (if 'meld' installed): 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
+    logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
+    logred "See stderr from test run for additional diagnostics: 
${TEST_STDERR}"
+    diff ${TEST_STDOUT} ${TEST_REF}
+    NUM_FAILURES=$(( NUM_FAILURES + 1 ))
+  fi
+
+  # clean up
+  if [ "${SPARK_EXISTS}" = "FALSE" ]; then
+    rm -rf ${SPARK_HOME}
+    export SPARK_HOME=""
+  fi
+done
+
+# clean the output dir
+rm -rf ${SCRIPT_DIR}/output
+
+loginfo "Tests completed with ${NUM_FAILURES} failures"
+if (( ${NUM_FAILURES} > 0 )); then
+  exit 1
+else
+  exit 0
+fi
diff --git a/plugins/spark/v3.5/regtests/setup.sh 
b/plugins/spark/v3.5/regtests/setup.sh
new file mode 100755
index 000000000..072b08f6d
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/setup.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+###################################
+# Idempotent setup for spark regression tests. Run manually or let run.sh 
auto-run.
+#
+# Warning - first time setup may download large amounts of files
+# Warning - may clobber conf/spark-defaults.conf
+# Warning - it will set the SPARK_HOME environment variable with the spark 
setup
+#
+# The script can be called independently like following
+#   ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} 
--jar ${JAR_PATH}
+# Required Parameters:
+#   --sparkVersion   : the spark version to setup
+#   --scalaVersion   : the scala version of spark to setup
+#   --jar            : path to the local Polaris Spark client jar
+#
+
+set -x
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+SPARK_VERSION=3.5.5
+SCALA_VERSION=2.12
+POLARIS_CLIENT_JAR=""
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --sparkVersion)
+      SPARK_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --scalaVersion)
+      SCALA_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --jar)
+      POLARIS_CLIENT_JAR="$2"
+      shift # past argument
+      shift # past value
+      ;;
+    --) shift;
+      break
+      ;;
+  esac
+done
+
+echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} 
POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
+
+if [ "$SCALA_VERSION" == "2.12" ]; then
+  SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3
+else
+  SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3-scala${SCALA_VERSION}
+fi
+
+echo "Getting spark distribution ${SPARK_DISTRIBUTION}"
+
+if [ -z "${SPARK_HOME}" ]; then
+  SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION})
+fi
+SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+DERBY_HOME="/tmp/derby"
+
+# Ensure binaries are downloaded locally
+echo 'Verifying Spark binaries...'
+if ! [ -f ${SPARK_HOME}/bin/spark-sql ]; then
+  echo 'Setting up Spark...'
+  if [ -z "${SPARK_VERSION}" ] || [ -z "${SPARK_DISTRIBUTION}" ]; then
+    echo 'SPARK_VERSION or SPARK_DISTRIBUTION not set. Please set 
SPARK_VERSION and SPARK_DISTRIBUTION to the desired version.'
+    exit 1
+  fi
+  if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then
+    echo 'Downloading spark distro...'
+    wget -O ~/${SPARK_DISTRIBUTION}.tgz 
https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz
+    if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then
+      if [[ "${OSTYPE}" == "darwin"* ]]; then
+        echo "Detected OS: mac. Running 'brew install wget' to try again."
+        brew install wget
+        wget -O ~/${SPARK_DISTRIBUTION}.tgz 
https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz
+      fi
+    fi
+  else
+    echo 'Found existing Spark tarball'
+  fi
+  # check if the download was successful
+  if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then
+    echo 'Failed to download Spark distribution. Please check the logs.'
+    exit 1
+  fi
+  tar xzvf ~/${SPARK_DISTRIBUTION}.tgz -C ~/${TEST_ROOT_DIR}
+  if [ $? -ne 0 ]; then
+    echo 'Failed to extract Spark distribution. Please check the logs.'
+    exit 1
+  else
+    echo 'Extracted Spark distribution.'
+    rm ~/${SPARK_DISTRIBUTION}.tgz
+  fi
+  SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION})
+  SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+else
+  echo 'Verified Spark distro already installed.'
+fi
+
+echo "SPARK_HOME=${SPARK_HOME}"
+echo "SPARK_CONF=${SPARK_CONF}"
+
+# Ensure Spark boilerplate conf is set
+echo 'Verifying Spark conf...'
+if grep 'POLARIS_TESTCONF_V5' ${SPARK_CONF} 2>/dev/null; then
+  echo 'Verified spark conf'
+else
+  echo 'Setting spark conf...'
+  # Instead of clobbering existing spark conf, just comment it all out in case 
it was customized carefully.
+  sed -i 's/^/# /' ${SPARK_CONF}
+cat << EOF >> ${SPARK_CONF}
+
+# POLARIS Spark client test conf
+spark.jars $POLARIS_CLIENT_JAR
+spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1
+spark.sql.variable.substitute true
+
+spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME}
+
+spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension
+# this configuration is needed for delta table
+spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog
+spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog
+spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog
+# this configuration is currently only used for iceberg tables, generic tables 
currently
+# don't support credential vending
+spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials
+spark.sql.catalog.polaris.client.region=us-west-2
+# configuration required to ensure DataSourceV2 load works correctly for
+# different table formats
+spark.sql.sources.useV1SourceList=''
+EOF
+  echo 'Success!'
+fi
+
+# cleanup derby home if existed
+if [ -d "${DERBY_HOME}" ]; then
+  echo "Directory ${DERBY_HOME} exists. Deleting it..."
+  rm -rf "${DERBY_HOME}"
+fi
+
+echo "Launch spark-sql at ${SPARK_HOME}/bin/spark-sql"
+# bootstrap dependencies so that future queries don't need to wait for the 
downloads.
+# this is mostly useful for building the Docker image with all needed 
dependencies
+${SPARK_HOME}/bin/spark-sql -e "SELECT 1"
+
+# ensure SPARK_HOME is setup for later tests
+export SPARK_HOME=$SPARK_HOME
diff --git a/plugins/spark/v3.5/regtests/spark_sql.ref 
b/plugins/spark/v3.5/regtests/spark_sql.ref
new file mode 100755
index 000000000..5825d0931
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/spark_sql.ref
@@ -0,0 +1,57 @@
+{"defaults":{"default-base-location":"file:///tmp/spark_catalog"},"overrides":{"prefix":"spark_sql_catalog"},"endpoints":["GET
 /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD 
/v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST 
/v1/{prefix}/namespaces/{namespace}/properties","DELETE 
/v1/{prefix}/namespaces/{namespace}","GET 
/v1/{prefix}/namespaces/{namespace}/tables","GET 
/v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{prefix}/ [...]
+Catalog created
+spark-sql (default)> use polaris;
+spark-sql ()> create namespace db1;
+spark-sql ()> create namespace db2;
+spark-sql ()> show namespaces;
+db1
+db2
+spark-sql ()> 
+            > create namespace db1.schema1;
+spark-sql ()> show namespaces in db1;
+db1.schema1
+spark-sql ()> 
+            > create table db1.schema1.iceberg_tb (col1 int);
+spark-sql ()> show tables in db1;
+spark-sql ()> show tables in db1.schema1;
+iceberg_tb
+spark-sql ()> 
+            > use db1.schema1;
+spark-sql (db1.schema1)> insert into iceberg_tb values (123), (234), (111);
+spark-sql (db1.schema1)> select * from iceberg_tb order by col1;
+111
+123
+234
+spark-sql (db1.schema1)> 
+                       > create table delta_tb1(col1 string) using delta 
location 'file:///tmp/spark_catalog/delta_tb1';
+spark-sql (db1.schema1)> insert into delta_tb1 values ('ab'), ('bb'), ('dd');
+spark-sql (db1.schema1)> select * from delta_tb1 order by col1;
+ab
+bb
+dd
+spark-sql (db1.schema1)> 
+                       > show tables;
+iceberg_tb
+delta_tb1
+spark-sql (db1.schema1)> 
+                       > use db1;
+spark-sql (db1)> create table delta_tb2(col1 int) using delta location 
'file:///tmp/spark_catalog/delta_tb2';
+spark-sql (db1)> insert into delta_tb2 values (1), (2), (3) order by col1;
+spark-sql (db1)> select * from delta_tb2;
+1
+2
+3
+spark-sql (db1)> 
+               > show tables;
+delta_tb2
+spark-sql (db1)> show tables in db1.schema1;
+iceberg_tb
+delta_tb1
+spark-sql (db1)> 
+               > drop table db1.schema1.iceberg_tb;
+spark-sql (db1)> drop table db1.schema1.delta_tb1;
+spark-sql (db1)> drop namespace db1.schema1;
+spark-sql (db1)> drop table delta_tb2;
+spark-sql (db1)> drop namespace db1;
+spark-sql (db1)> drop namespace db2;
+spark-sql (db1)> 
diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh 
b/plugins/spark/v3.5/regtests/spark_sql.sh
new file mode 100755
index 000000000..fe036664c
--- /dev/null
+++ b/plugins/spark/v3.5/regtests/spark_sql.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}"
+
+CATALOG_NAME="spark_sql_catalog"
+curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
+  http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
+  -d '{"name": "spark_sql_catalog", "id": 100, "type": "INTERNAL", "readOnly": 
false, "properties": {"default-base-location": "file:///tmp/spark_catalog"}, 
"storageConfigInfo": {"storageType": "FILE", "allowedLocations": 
["file:///tmp"]}}' > /dev/stderr
+
+# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it 
can only manage access and metadata
+curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
+  
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}/catalog-roles/catalog_admin/grants
 \
+  -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr
+
+curl -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
+  
"http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=${CATALOG_NAME}";
+echo
+echo "Catalog created"
+cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf 
spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf 
spark.sql.catalog.polaris.warehouse=${CATALOG_NAME}
+use polaris;
+create namespace db1;
+create namespace db2;
+show namespaces;
+
+create namespace db1.schema1;
+show namespaces in db1;
+
+create table db1.schema1.iceberg_tb (col1 int);
+show tables in db1;
+show tables in db1.schema1;
+
+use db1.schema1;
+insert into iceberg_tb values (123), (234), (111);
+select * from iceberg_tb order by col1;
+
+create table delta_tb1(col1 string) using delta location 
'file:///tmp/spark_catalog/delta_tb1';
+insert into delta_tb1 values ('ab'), ('bb'), ('dd');
+select * from delta_tb1 order by col1;
+
+show tables;
+
+use db1;
+create table delta_tb2(col1 int) using delta location 
'file:///tmp/spark_catalog/delta_tb2';
+insert into delta_tb2 values (1), (2), (3) order by col1;
+select * from delta_tb2;
+
+show tables;
+show tables in db1.schema1;
+
+drop table db1.schema1.iceberg_tb;
+drop table db1.schema1.delta_tb1;
+drop namespace db1.schema1;
+drop table delta_tb2;
+drop namespace db1;
+drop namespace db2;
+EOF
+
+# clean up the spark_catalog dir
+rm -rf /tmp/spark_catalog/
+
+curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
+  
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}
 > /dev/stderr
diff --git a/plugins/spark/v3.5/spark/build.gradle.kts 
b/plugins/spark/v3.5/spark/build.gradle.kts
index ddf27ce1f..5ce7e73c0 100644
--- a/plugins/spark/v3.5/spark/build.gradle.kts
+++ b/plugins/spark/v3.5/spark/build.gradle.kts
@@ -19,10 +19,7 @@
 
 import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
 
-plugins {
-  id("polaris-client")
-  alias(libs.plugins.jandex)
-}
+plugins { id("polaris-client") }
 
 // get version information
 val sparkMajorVersion = "3.5"
@@ -38,13 +35,45 @@ val scalaLibraryVersion =
   }
 
 dependencies {
+  // TODO: extract a polaris-rest module as a thin layer for
+  //  client to depends on.
   implementation(project(":polaris-api-iceberg-service")) {
     // exclude the iceberg dependencies, use the ones pulled
     // by iceberg-core
     exclude("org.apache.iceberg", "*")
+    // exclude all cloud and quarkus specific dependencies to avoid
+    // running into problems with signature files.
+    exclude("com.azure", "*")
+    exclude("software.amazon.awssdk", "*")
+    exclude("com.google.cloud", "*")
+    exclude("io.airlift", "*")
+    exclude("io.smallrye", "*")
+    exclude("io.smallrye.common", "*")
+    exclude("io.swagger", "*")
+    exclude("org.apache.commons", "*")
+  }
+  implementation(project(":polaris-api-catalog-service")) {
+    exclude("org.apache.iceberg", "*")
+    exclude("com.azure", "*")
+    exclude("software.amazon.awssdk", "*")
+    exclude("com.google.cloud", "*")
+    exclude("io.airlift", "*")
+    exclude("io.smallrye", "*")
+    exclude("io.smallrye.common", "*")
+    exclude("io.swagger", "*")
+    exclude("org.apache.commons", "*")
+  }
+  implementation(project(":polaris-core")) {
+    exclude("org.apache.iceberg", "*")
+    exclude("com.azure", "*")
+    exclude("software.amazon.awssdk", "*")
+    exclude("com.google.cloud", "*")
+    exclude("io.airlift", "*")
+    exclude("io.smallrye", "*")
+    exclude("io.smallrye.common", "*")
+    exclude("io.swagger", "*")
+    exclude("org.apache.commons", "*")
   }
-  implementation(project(":polaris-api-catalog-service"))
-  implementation(project(":polaris-core")) { exclude("org.apache.iceberg", 
"*") }
 
   implementation("org.apache.iceberg:iceberg-core:${icebergVersion}")
 
@@ -127,8 +156,6 @@ tasks.register<ShadowJar>("createPolarisSparkJar") {
     
"polaris-iceberg-${icebergVersion}-spark-runtime-${sparkMajorVersion}_${scalaVersion}"
   isZip64 = true
 
-  mergeServiceFiles()
-
   // pack both the source code and dependencies
 
   from(sourceSets.main.get().output)

Reply via email to