This is an automated email from the ASF dual-hosted git repository.
roryqi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git
The following commit(s) were added to refs/heads/main by this push:
new 87da209 [MINOR] Update verstion to 1.1.0 (#151)
87da209 is described below
commit 87da209244e2745b9928cd5403de4c85d88647fc
Author: roryqi <[email protected]>
AuthorDate: Tue Dec 23 12:05:54 2025 +0800
[MINOR] Update verstion to 1.1.0 (#151)
### What changes were proposed in this pull request?
Update verstion to 1.1.0
### Why are the changes needed?
Update the version to 1.1.0
Update Iceberg version to 1.10.0. Because we need the Iceberg basic
authentication
Add two demo cases:
1. Metadata action system demo
2. IRC Access control demo
### Does this PR introduce _any_ user-facing change?
No need.
### How was this patch tested?
By hand.
---
.env | 8 +-
README.md | 428 +++++++++++++++++++++
docker-enable-auth-override.yaml | 23 ++
init/gravitino/gravitino.conf | 7 +-
init/gravitino/init.sh | 23 ++
.../gravitino-access-control-example.ipynb | 90 +++--
init/jupyter/gravitino-fileset-example.ipynb | 4 +-
init/jupyter/gravitino-spark-trino-example.ipynb | 4 +-
init/jupyter/gravitino_llamaIndex_demo.ipynb | 3 +-
init/spark/spark-dependency.sh | 2 +-
playground.sh | 46 ++-
11 files changed, 572 insertions(+), 66 deletions(-)
diff --git a/.env b/.env
index c98cf30..ad37e53 100644
--- a/.env
+++ b/.env
@@ -1,8 +1,8 @@
# Image tags for the docker-compose file
HIVE_IMAGE_TAG=hive-0.1.15
RANGER_IMAGE_TAG=ranger-0.1.0
-GRAVITINO_IMAGE_TAG=1.0.0
-TRINO_IMAGE_TAG=trino-435-gravitino-1.0.0
+GRAVITINO_IMAGE_TAG=1.1.0
+TRINO_IMAGE_TAG=trino-435-gravitino-1.1.0
POSTGRESQL_IMAGE_TAG=13
MYSQL_IMAGE_TAG=8.0
SPARK_IMAGE_TAG=3.4.1-scala2.12-java11-ubuntu
@@ -11,6 +11,6 @@ PROMETHEUS_IMAGE_TAG=v2.55.0
GRAFANA_IMAGE_TAG=11.3.0-ubuntu
# Gravitino jars for containers
-SPARK_CONNECTOR_JAR=gravitino-spark-connector-runtime-3.4_2.12-1.0.0.jar
+SPARK_CONNECTOR_JAR=gravitino-spark-connector-runtime-3.4_2.12-1.1.0.jar
-GRAVITINO_VERSION=1.0.0
+GRAVITINO_VERSION=1.1.0
diff --git a/README.md b/README.md
index 8f19cf2..5627db0 100644
--- a/README.md
+++ b/README.md
@@ -334,6 +334,434 @@ You can run the command
The demo is located in the `jupyter` folder, you can open the
`gravitino-access-control-example.ipynb`
demo via Jupyter Notebook by [http://localhost:18888](http://localhost:18888).
+### Using Gravitino Iceberg REST Server with Access Control
+
+Gravitino 1.1 introduced built-in access control for the Iceberg REST server,
enabling fine-grained
+authorization for Iceberg tables without requiring external authorization
services like Ranger.
+This feature allows you to manage user permissions through Gravitino's unified
API with native
+access control enforcement at the REST API level.
+
+**Security note (authentication)**: The Iceberg REST catalog examples shown
here use HTTP Basic Authentication only as a transport to pass the username
through the `Authorization` header. Gravitino currently **does not verify the
Basic Auth password** and instead fully trusts the username provided in the
header for access control decisions. As a result, this mechanism **does not
provide real authentication**: any client that can reach the REST endpoint
could impersonate any user by choos [...]
+
+This behavior is intended **for local/demo use only** (such as when running
the playground) and **must not be relied upon in production** or any
environment exposed to untrusted clients. For secure deployments, you must
front the Iceberg REST server with a real authentication mechanism (for
example, an authenticating reverse proxy, API gateway, or other identity
provider) and configure Gravitino to validate the authenticated identity,
rather than trusting arbitrary usernames from the `Au [...]
+#### Demo Steps
+
+**Step 1: Start the Playground with Auth Enabled**
+
+```shell
+./playground.sh start --enable-auth
+```
+
+**Note**: The `--enable-auth` flag enables Gravitino's access control by
removing the PassThroughAuthorizer, which allows proper privilege enforcement
for the Iceberg REST catalog.
+
+**Step 2: Create Users**
+
+Create users through Gravitino's REST API:
+
+```shell
+# Add manager user
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{"name":"manager"}' \
+ http://localhost:8090/api/metalakes/metalake_demo/users
+
+# Add data_analyst user
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{"name":"data_analyst"}' \
+ http://localhost:8090/api/metalakes/metalake_demo/users
+
+# Set manager as owner of the metalake
+curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{"name":"manager"}' \
+ http://localhost:8090/api/metalakes/metalake_demo/owners
+```
+
+**Step 3: Create Database and Table with Manager**
+
+Login to Spark container:
+
+```shell
+docker exec -it playground-spark bash
+```
+
+Start spark-sql as manager:
+
+```shell
+cd /opt/spark && /bin/bash bin/spark-sql --conf
spark.sql.catalog.catalog_rest.rest.auth.type=basic --conf
spark.sql.catalog.catalog_rest.rest.auth.basic.username=manager --conf
spark.sql.catalog.catalog_rest.rest.auth.basic.password=123
+```
+
+Create database and table:
+
+```sql
+USE catalog_rest;
+CREATE DATABASE IF NOT EXISTS demo_db;
+USE demo_db;
+
+CREATE TABLE IF NOT EXISTS employees (
+ employee_id INT,
+ name STRING,
+ department STRING,
+ salary DECIMAL(10,2)
+) USING iceberg;
+
+INSERT INTO employees VALUES
+ (1, 'Alice Johnson', 'Engineering', 95000.00),
+ (2, 'Bob Smith', 'Sales', 75000.00);
+
+SELECT * FROM employees;
+```
+
+**Step 4: Test Access Control Before Granting Privileges**
+
+
+Exit spark-sql and start a new session as data_analyst (without any privileges
yet):
+
+```shell
+export HADOOP_USER_NAME=data_analyst
+cd /opt/spark
+/bin/bash bin/spark-sql --conf
spark.sql.catalog.catalog_rest.rest.auth.type=basic --conf
spark.sql.catalog.catalog_rest.rest.auth.basic.username=data_analyst --conf
spark.sql.catalog.catalog_rest.rest.auth.basic.password=123
+```
+
+Try to query the table (this should FAIL):
+
+```sql
+USE catalog_rest.demo_db;
+
+-- This should FAIL - schema doesn't exist, because we don't have USE_SCHEMA
privilege
+```
+
+**Step 5: Create Role with Privileges and Assign to User**
+
+Exit spark-sql and create a role with the necessary privileges:
+
+```shell
+# Create role with all required privileges
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "analyst_role",
+ "securableObjects": [
+ {
+ "fullName": "catalog_iceberg",
+ "type": "CATALOG",
+ "privileges": [
+ {"name": "USE_CATALOG", "condition": "ALLOW"}
+ ]
+ },
+ {
+ "fullName": "catalog_iceberg.demo_db",
+ "type": "SCHEMA",
+ "privileges": [
+ {"name": "USE_SCHEMA", "condition": "ALLOW"}
+ ]
+ },
+ {
+ "fullName": "catalog_iceberg.demo_db.employees",
+ "type": "TABLE",
+ "privileges": [
+ {"name": "SELECT_TABLE", "condition": "ALLOW"}
+ ]
+ }
+ ]
+ }' \
+ http://localhost:8090/api/metalakes/metalake_demo/roles
+
+# Assign role to user
+curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" -d '{
+ "roleNames": ["analyst_role"]
+}'
http://localhost:8090/api/metalakes/metalake_demo/permissions/users/data_analyst/grant
+```
+
+Start spark-sql as data_analyst again and test:
+
+```shell
+cd /opt/spark && /bin/bash bin/spark-sql \
+ --conf spark.sql.catalog.catalog_rest.rest.auth.type=basic \
+ --conf spark.sql.catalog.catalog_rest.rest.auth.basic.username=data_analyst \
+ --conf spark.sql.catalog.catalog_rest.rest.auth.basic.password=123
+```
+
+Try to query the table again (this should SUCCEED now):
+
+```sql
+USE catalog_rest.demo_db;
+
+-- This should succeed - now has SELECT_TABLE privilege
+SELECT * FROM employees;
+```
+
+This demonstrates how Gravitino's access control works:
+- Before granting privileges: Access denied
+- After granting privileges: Access allowed
+
+For more details, refer to the [Gravitino
documentation](https://gravitino.apache.org/docs/latest/security/access-control).
+
+### Using Gravitino Policies, Statistics, and Jobs to Drop Unused Tables
+
+Gravitino 1.0+ provides a powerful combination of policies, statistics, and
jobs that enables automated data governance tasks. This demo shows how to
identify and drop tables that haven't been accessed for a long time, helping
you manage data lifecycle and reduce storage costs.
+
+**Workflow Overview:**
+1. **Statistics** - Track table usage with custom statistics (e.g.,
`custom-lastAccessTime`)
+2. **Policies** - Define rules for identifying unused tables (e.g., not
accessed for 90 days)
+3. **Jobs** - Execute automated actions to drop unused tables
+
+#### Demo Steps
+
+**Step 1: Start the Playground**
+
+```shell
+./playground.sh start
+```
+
+**Step 2: Update Statistics for an Existing Table**
+
+The playground already has tables in the Hive catalog. We'll use one of the
existing tables and update its statistics to simulate an old, unused table:
+
+```shell
+# First, verify the existing table
+docker exec -it playground-trino trino --execute "SELECT * FROM
catalog_hive.sales.customers LIMIT 5"
+
+# Calculate a date 100 days ago (more than the 90-day threshold)
+OLD_DATE=$(date -u -d '100 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date
-u -v-100d +%Y-%m-%dT%H:%M:%SZ)
+
+# Update last access time for the table to make it appear unused
+curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"updates\": {
+ \"custom-lastAccessTime\": \"$OLD_DATE\",
+ \"custom-rowCount\": \"10\"
+ }
+ }" \
+
http://localhost:8090/api/metalakes/metalake_demo/objects/table/catalog_hive.sales.customers/statistics
+
+# Check statistics to verify they were set
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+
http://localhost:8090/api/metalakes/metalake_demo/objects/table/catalog_hive.sales.customers/statistics
+```
+
+You should see output like:
+```json
+{
+ "statistics": {
+ "custom-lastAccessTime": {
+ "value": "2024-09-08T10:30:00Z"
+ },
+ "custom-rowCount": {
+ "value": "10"
+ }
+ }
+}
+```
+
+**Step 3: Create a Policy for Unused Tables**
+
+Create a custom policy to identify tables not accessed for more than 90 days:
+
+```shell
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "unused_table_policy",
+ "comment": "Policy to identify tables not accessed for 90+ days",
+ "policyType": "custom",
+ "enabled": true,
+ "content": {
+ "customRules": {
+ "maxIdleDays": 90,
+ "action": "drop"
+ },
+ "supportedObjectTypes": ["TABLE"],
+ "properties": {
+ "checkStatistic": "custom-lastAccessTime",
+ "threshold": "90d"
+ }
+ }
+ }' \
+ http://localhost:8090/api/metalakes/metalake_demo/policies
+```
+
+**Step 4: Associate Policy with Tables**
+
+Associate the policy with the existing customers table:
+
+```shell
+# Associate policy with the customers table
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "policiesToAdd": ["unused_table_policy"]
+ }' \
+
http://localhost:8090/api/metalakes/metalake_demo/objects/table/catalog_hive.sales.customers/policies
+
+# Verify the policy was associated
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+
http://localhost:8090/api/metalakes/metalake_demo/objects/table/catalog_hive.sales.customers/policies
+```
+
+Alternatively, you can associate the policy with the entire schema to monitor
all tables:
+
+```shell
+# Associate with the entire schema (will apply to all tables in sales)
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "policiesToAdd": ["unused_table_policy"]
+ }' \
+
http://localhost:8090/api/metalakes/metalake_demo/objects/schema/catalog_hive.sales/policies
+```
+
+**Step 5: Register a Job Template to Drop Unused Tables**
+
+Create a shell script job template to drop tables:
+
+```shell
+# First, create the drop script on the host
+cat > /tmp/drop_unused_tables.sh << 'EOF'
+#!/bin/bash
+# Script to drop unused tables based on policy evaluation
+CATALOG=$1
+SCHEMA=$2
+TABLE=$3
+
+echo "Checking if table ${CATALOG}.${SCHEMA}.${TABLE} should be dropped..."
+
+# Get table statistics (use localhost since script runs on host or in
container with port mapping)
+STATS=$(curl -s -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+
"http://localhost:8090/api/metalakes/metalake_demo/objects/table/${CATALOG}.${SCHEMA}.${TABLE}/statistics")
+
+echo "Statistics response: $STATS"
+
+# Parse the statistics array to find custom-lastAccessTime
+LAST_ACCESS=$(echo $STATS | jq -r '.statistics[] |
select(.name=="custom-lastAccessTime") | .value')
+echo "Last access time: $LAST_ACCESS"
+
+# Calculate days since last access
+if [ -n "$LAST_ACCESS" ] && [ "$LAST_ACCESS" != "null" ]; then
+ CURRENT_DATE=$(date +%s)
+ LAST_ACCESS_DATE=$(date -d "$LAST_ACCESS" +%s 2>/dev/null || date -j -f
"%Y-%m-%dT%H:%M:%SZ" "$LAST_ACCESS" +%s)
+ DAYS_IDLE=$(( ($CURRENT_DATE - $LAST_ACCESS_DATE) / 86400 ))
+
+ echo "Days since last access: $DAYS_IDLE"
+
+ if [ $DAYS_IDLE -gt 90 ]; then
+ echo "Table has been idle for more than 90 days. Dropping table..."
+ # Drop table via Gravitino API
+ DROP_RESPONSE=$(curl -s -X DELETE -H "Accept:
application/vnd.gravitino.v1+json" \
+
"http://localhost:8090/api/metalakes/metalake_demo/catalogs/${CATALOG}/schemas/${SCHEMA}/tables/${TABLE}")
+ echo "Drop response: $DROP_RESPONSE"
+ echo "Table ${CATALOG}.${SCHEMA}.${TABLE} dropped successfully"
+ else
+ echo "Table is still active. No action needed."
+ fi
+else
+ echo "No last access time found. Skipping..."
+fi
+EOF
+
+chmod +x /tmp/drop_unused_tables.sh
+
+# Copy the script into the Gravitino container
+docker cp /tmp/drop_unused_tables.sh
playground-gravitino:/tmp/drop_unused_tables.sh
+
+# Make it executable in the container
+docker exec playground-gravitino chmod +x /tmp/drop_unused_tables.sh
+
+# Register the job template
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jobTemplate": {
+ "name": "drop_unused_table_job",
+ "jobType": "shell",
+ "comment": "Job to drop unused tables based on policy",
+ "executable": "file:///tmp/drop_unused_tables.sh",
+ "arguments": ["{{catalog}}", "{{schema}}", "{{table}}"],
+ "environments": {},
+ "customFields": {},
+ "scripts": []
+ }
+ }' \
+ http://localhost:8090/api/metalakes/metalake_demo/jobs/templates
+```
+
+**Step 6: Run the Job to Drop Unused Tables**
+
+Execute the job for the customers table:
+
+```shell
+# Run job for the customers table (should drop it since it's > 90 days old)
+curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jobTemplateName": "drop_unused_table_job",
+ "jobConf": {
+ "catalog": "catalog_hive",
+ "schema": "sales",
+ "table": "customers"
+ }
+ }' \
+ http://localhost:8090/api/metalakes/metalake_demo/jobs/runs
+```
+
+The response will contain a `jobRunId` that you can use to check the job
status.
+
+**Step 7: Verify the Job Result**
+
+Check the job execution status and result:
+
+```shell
+# Get the job run details (replace {jobRunId} with the actual ID from Step 6
response)
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/metalake_demo/jobs/runs/{jobRunId}
+
+# Example: If jobRunId is "job-123"
+curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \
+ http://localhost:8090/api/metalakes/metalake_demo/jobs/runs/job-123
+```
+
+The response will show:
+- **status**: Job status (`QUEUED`, `RUNNING`, `SUCCEEDED`, `FAILED`,
`CANCELLING`, `CANCELLED`)
+- **startTime**: When the job started
+- **endTime**: When the job completed
+- **output**: Job execution output/logs
+
+You can also verify the table was actually dropped:
+
+```shell
+# Check if the table still exists (should show it's gone)
+docker exec -it playground-trino trino --execute "SHOW TABLES FROM
catalog_hive.sales"
+
+# Or try to query the dropped table (should fail with "Table not found")
+docker exec -it playground-trino trino --execute "SELECT * FROM
catalog_hive.sales.customers LIMIT 1"
+```
+
+If the table was successfully dropped, you'll see an error like:
+```
+Query failed: line 1:15: Table 'hive.sales.customers' does not exist
+```
+
+**Key Concepts:**
+
+- **Statistics**: Track custom metrics like `custom-lastAccessTime` to monitor
table usage
+- **Policies**: Define governance rules to identify tables that meet certain
criteria (e.g., idle for 90+ days)
+- **Jobs**: Execute automated actions (drop tables) based on policy evaluation
+- **Metadata-driven actions**: Use Gravitino's metadata (statistics, policies)
to drive data governance decisions
+
+This approach enables:
+- ✅ Automated data lifecycle management
+- ✅ Cost reduction by removing unused data
+- ✅ Compliance with data retention policies
+- ✅ Centralized governance across multiple catalogs
+
+For more details, refer to:
+- [Manage Statistics in
Gravitino](https://gravitino.apache.org/docs/latest/manage-statistics-in-gravitino)
+- [Manage Policies in
Gravitino](https://gravitino.apache.org/docs/latest/manage-policies-in-gravitino)
+- [Manage Jobs in
Gravitino](https://gravitino.apache.org/docs/latest/manage-jobs-in-gravitino)
+
## NOTICE
If you want to clean cache files, you can delete the directory `data` of this
repo.
diff --git a/docker-enable-auth-override.yaml b/docker-enable-auth-override.yaml
new file mode 100644
index 0000000..2f0f279
--- /dev/null
+++ b/docker-enable-auth-override.yaml
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+services:
+ gravitino:
+ environment:
+ - GRAVITINO_AUTH_ENABLE=true
+
diff --git a/init/gravitino/gravitino.conf b/init/gravitino/gravitino.conf
index 0cc515e..bd9368c 100755
--- a/init/gravitino/gravitino.conf
+++ b/init/gravitino/gravitino.conf
@@ -82,4 +82,9 @@ gravitino.auxService.iceberg-rest.uri =
jdbc:mysql://mysql:3306/db
gravitino.auxService.iceberg-rest.warehouse =
hdfs://hive:9000/user/iceberg/warehouse/
gravitino.auxService.iceberg-rest.jdbc.user = mysql
gravitino.auxService.iceberg-rest.jdbc.password = mysql
-gravitino.auxService.iceberg-rest.jdbc-driver = com.mysql.cj.jdbc.Driver
\ No newline at end of file
+gravitino.auxService.iceberg-rest.jdbc-driver = com.mysql.cj.jdbc.Driver
+gravitino.iceberg-rest.catalog-config-provider = dynamic-config-provider
+gravitino.iceberg-rest.gravitino-uri = http://gravitino:8090
+gravitino.iceberg-rest.gravitino-metalake = metalake_demo
+gravitino.iceberg-rest.default-catalog-name = catalog_iceberg
+gravitino.iceberg-rest.gravitino-simple.user-name = anonymous
diff --git a/init/gravitino/init.sh b/init/gravitino/init.sh
index ed6c80c..9676205 100644
--- a/init/gravitino/init.sh
+++ b/init/gravitino/init.sh
@@ -27,6 +27,29 @@ cp
/root/gravitino/catalogs/jdbc-mysql/libs/mysql-connector-java-8.0.27.jar /roo
cp /tmp/gravitino/gravitino.conf /root/gravitino/conf
+# If auth is enabled, remove the PassThroughAuthorizer configuration
+if [ "${GRAVITINO_AUTH_ENABLE}" == "true" ]; then
+ echo "Auth is enabled, removing PassThroughAuthorizer configuration..."
+ sed -i '/gravitino.authorization.impl =
org.apache.gravitino.server.authorization.PassThroughAuthorizer/d'
/root/gravitino/conf/gravitino.conf
+fi
+
+# Ensure jq is installed; install quietly if missing
+if ! command -v jq >/dev/null 2>&1; then
+ apt-get update -qq && apt-get install -y -qq jq || { echo "Failed to install
jq" >&2; exit 1; }
+fi
+
+# Ensure host command is installed; install quietly if missing
+if ! command -v host >/dev/null 2>&1; then
+ apt-get update -qq && apt-get install -y -qq host || { echo "Failed to
install host package" >&2; exit 1; }
+fi
+
+# Resolve hive hostname to IP address and add to /etc/hosts
+IP=$(host hive 2>/dev/null | awk '/has address/ {print $4; exit}')
+if [ -z "$IP" ]; then
+ echo "Failed to resolve hostname 'hive'" >&2
+ exit 1
+fi
+echo "$IP hive" >> /etc/hosts
echo "Finish downloading"
echo "Start the Gravitino Server"
/bin/bash /root/gravitino/bin/gravitino.sh start &
diff --git a/init/jupyter/authorization/gravitino-access-control-example.ipynb
b/init/jupyter/authorization/gravitino-access-control-example.ipynb
index 4cda534..e85f80b 100644
--- a/init/jupyter/authorization/gravitino-access-control-example.ipynb
+++ b/init/jupyter/authorization/gravitino-access-control-example.ipynb
@@ -134,7 +134,7 @@
"spark = SparkSession.builder \\\n",
" .appName(\"PySpark SQL Example\") \\\n",
" .config(\"spark.plugins\",
\"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
- " .config(\"spark.jars\",
f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n",
+ " .config(\"spark.jars\",
f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.10.0.jar,\\\n",
"
/tmp/gravitino/packages/{gravitino_connector_jar},\\\n",
"
/tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n",
" .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
@@ -240,51 +240,6 @@
},
"id": "7113e44ad213ff45"
},
- {
- "cell_type": "markdown",
- "source": [
- "### You should click the jupyter button to restart the notebook, we will
start a new spark context with user lisa"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "ee84f44711c7a939"
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "outputs": [],
- "source": [
- "import pyspark\n",
- "import os\n",
- "from pyspark.sql import SparkSession\n",
- "os.environ['HADOOP_USER_NAME']=\"lisa\"\n",
- "gravitino_connector_jar = os.getenv('SPARK_CONNECTOR_JAR')\n",
- "\n",
- "spark = SparkSession.builder \\\n",
- " .appName(\"PySpark SQL Example\") \\\n",
- " .config(\"spark.plugins\",
\"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
- " .config(\"spark.jars\",
f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n",
- "
/tmp/gravitino/packages/{gravitino_connector_jar},\\\n",
- "
/tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n",
- " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
- " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
- " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
- " .config(\"spark.sql.catalog.catalog_rest\",
\"org.apache.iceberg.spark.SparkCatalog\") \\\n",
- " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n",
- " .config(\"spark.sql.catalog.catalog_rest.uri\",
\"http://gravitino:9001/iceberg/\") \\\n",
- " .config(\"spark.locality.wait.node\", \"0\") \\\n",
- " .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n",
- " .config(\"spark.sql.extensions\",
\"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n",
- " .config(\"spark.sql.warehouse.dir\",
\"hdfs://hive:9000/user/hive/warehouse\") \\\n",
- " .enableHiveSupport() \\\n",
- " .getOrCreate()"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "81f1b47f026aa59d"
- },
{
"cell_type": "markdown",
"source": [
@@ -323,6 +278,49 @@
},
"id": "25ca43caa7aa5a30"
},
+ {
+ "metadata": {
+ "collapsed": false
+ },
+ "cell_type": "markdown",
+ "source": "### You should click the jupyter button to restart the notebook,
we will start a new spark context with user lisa",
+ "id": "3b6fdd67ac2ea859"
+ },
+ {
+ "metadata": {
+ "collapsed": false
+ },
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null,
+ "source": [
+ "import pyspark\n",
+ "import os\n",
+ "from pyspark.sql import SparkSession\n",
+ "os.environ['HADOOP_USER_NAME']=\"lisa\"\n",
+ "gravitino_connector_jar = os.getenv('SPARK_CONNECTOR_JAR')\n",
+ "\n",
+ "spark = SparkSession.builder \\\n",
+ " .appName(\"PySpark SQL Example\") \\\n",
+ " .config(\"spark.plugins\",
\"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
+ " .config(\"spark.jars\",
f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.10.0.jar,\\\n",
+ "
/tmp/gravitino/packages/{gravitino_connector_jar},\\\n",
+ "
/tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n",
+ " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
+ " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
+ " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
+ " .config(\"spark.sql.catalog.catalog_rest\",
\"org.apache.iceberg.spark.SparkCatalog\") \\\n",
+ " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n",
+ " .config(\"spark.sql.catalog.catalog_rest.uri\",
\"http://gravitino:9001/iceberg/\") \\\n",
+ " .config(\"spark.locality.wait.node\", \"0\") \\\n",
+ " .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n",
+ " .config(\"spark.sql.extensions\",
\"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n",
+ " .config(\"spark.sql.warehouse.dir\",
\"hdfs://hive:9000/user/hive/warehouse\") \\\n",
+ " .enableHiveSupport() \\\n",
+ " .getOrCreate()"
+ ],
+ "id": "888c22229d335a5a"
+ },
{
"cell_type": "markdown",
"source": [
diff --git a/init/jupyter/gravitino-fileset-example.ipynb
b/init/jupyter/gravitino-fileset-example.ipynb
index 4ceb408..7c00543 100644
--- a/init/jupyter/gravitino-fileset-example.ipynb
+++ b/init/jupyter/gravitino-fileset-example.ipynb
@@ -35,9 +35,7 @@
"id": "fcfb73be-e369-4543-b78d-fb1cd061c9e1",
"metadata": {},
"outputs": [],
- "source": [
- "pip install apache-gravitino==1.0.0"
- ]
+ "source": "pip install apache-gravitino==1.1.0"
},
{
"cell_type": "code",
diff --git a/init/jupyter/gravitino-spark-trino-example.ipynb
b/init/jupyter/gravitino-spark-trino-example.ipynb
index c0e0db1..8e8fe14 100644
--- a/init/jupyter/gravitino-spark-trino-example.ipynb
+++ b/init/jupyter/gravitino-spark-trino-example.ipynb
@@ -26,7 +26,7 @@
"spark = SparkSession.builder \\\n",
" .appName(\"PySpark SQL Example\") \\\n",
" .config(\"spark.plugins\",
\"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
- " .config(\"spark.jars\",
f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,/tmp/gravitino/packages/{gravitino_connector_jar}\")
\\\n",
+ " .config(\"spark.jars\",
f\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.10.0.jar,/tmp/gravitino/packages/{gravitino_connector_jar}\")
\\\n",
" .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
" .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
" .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
@@ -90,7 +90,7 @@
"metadata": {},
"outputs": [],
"source": [
- "%pip install trino"
+ "%pip install trino==0.335.0"
]
},
{
diff --git a/init/jupyter/gravitino_llamaIndex_demo.ipynb
b/init/jupyter/gravitino_llamaIndex_demo.ipynb
index 00ec374..a50d261 100644
--- a/init/jupyter/gravitino_llamaIndex_demo.ipynb
+++ b/init/jupyter/gravitino_llamaIndex_demo.ipynb
@@ -50,11 +50,12 @@
},
"outputs": [],
"source": [
- "%pip install apache-gravitino==1.0.0\n",
"%pip install trino==0.335.0\n",
"%pip install llama-index-readers-wikipedia\n",
"%pip install llama-index-llms-openai\n",
"%pip install llama-index\n",
+ "%pip install \"setuptools<61.0\"\n",
+ "%pip install apache-gravitino==1.1.0\n",
"%pip install sqlalchemy-trino"
]
},
diff --git a/init/spark/spark-dependency.sh b/init/spark/spark-dependency.sh
index 1485762..342c124 100755
--- a/init/spark/spark-dependency.sh
+++ b/init/spark/spark-dependency.sh
@@ -29,7 +29,7 @@ if [[ ! -d "${spark_dir}/packages" ]]; then
mkdir -p "${spark_dir}/packages"
fi
-ICEBERG_SPARK_RUNTIME_JAR="https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/1.5.2/iceberg-spark-runtime-3.4_2.12-1.5.2.jar"
+ICEBERG_SPARK_RUNTIME_JAR="https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/1.10.0/iceberg-spark-runtime-3.4_2.12-1.10.0.jar"
ICEBERG_SPARK_RUNTIME_MD5="${ICEBERG_SPARK_RUNTIME_JAR}.md5"
download_and_verify "${ICEBERG_SPARK_RUNTIME_JAR}"
"${ICEBERG_SPARK_RUNTIME_MD5}" "${spark_dir}"
diff --git a/playground.sh b/playground.sh
index d24a3d0..1785a79 100755
--- a/playground.sh
+++ b/playground.sh
@@ -194,11 +194,17 @@ start() {
./init/jupyter/jupyter-dependency.sh
logSuffix=$(date +%Y%m%d%H%M%s)
+
+ # Build docker-compose command with appropriate override files
+ composeFiles="-f docker-compose.yaml"
if [ "${enableRanger}" == true ]; then
- ${dockerComposeCommand} -f docker-compose.yaml -f
docker-enable-ranger-hive-override.yaml -p ${playgroundRuntimeName} up --detach
- else
- ${dockerComposeCommand} -p ${playgroundRuntimeName} up --detach
+ composeFiles="${composeFiles} -f docker-enable-ranger-hive-override.yaml"
+ fi
+ if [ "${enableAuth}" == true ]; then
+ composeFiles="${composeFiles} -f docker-enable-auth-override.yaml"
fi
+
+ ${dockerComposeCommand} ${composeFiles} -p ${playgroundRuntimeName} up
--detach
${dockerComposeCommand} -p ${playgroundRuntimeName} logs -f
>${playground_dir}/playground-${logSuffix}.log 2>&1 &
echo "[INFO] Check log details:
${playground_dir}/playground-${logSuffix}.log"
pruneLegacyLogs
@@ -223,11 +229,35 @@ stop() {
case "$1" in
start)
- if [[ "$2" == "--enable-ranger" ]]; then
- enableRanger=true
- else
- enableRanger=false
+ enableRanger=false
+ enableAuth=false
+
+ # Parse options
+ shift
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --enable-ranger)
+ enableRanger=true
+ shift
+ ;;
+ --enable-auth)
+ enableAuth=true
+ shift
+ ;;
+ *)
+ echo "Unknown option: $1"
+ echo "Usage: playground.sh start [--enable-ranger] [--enable-auth]"
+ exit 1
+ ;;
+ esac
+ done
+
+ # Check for mutually exclusive options
+ if [ "${enableRanger}" == true ] && [ "${enableAuth}" == true ]; then
+ echo "[ERROR] --enable-ranger and --enable-auth cannot be used together.
Please choose one."
+ exit 1
fi
+
start
;;
status)
@@ -237,7 +267,7 @@ stop)
stop
;;
*)
- echo "Usage: $0 <start|status|stop> [--enable-ranger]"
+ echo "Usage: $0 <start|status|stop> [--enable-ranger] [--enable-auth]"
exit 1
;;
esac