This is an automated email from the ASF dual-hosted git repository.
liuxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git
The following commit(s) were added to refs/heads/main by this push:
new b269620 feat(helm-chart): remove helm-chart support (#110)
b269620 is described below
commit b269620c804a4135ff65ee10ba6241c311caa1c3
Author: Eric Chang <[email protected]>
AuthorDate: Mon Dec 16 20:15:30 2024 +0800
feat(helm-chart): remove helm-chart support (#110)
Resolve #108
---------
Co-authored-by: JUN <[email protected]>
---
README.md | 67 +------
docker-compose.yaml | 12 --
healthcheck/gravitino-healthcheck.sh | 4 +-
healthcheck/hive-healthcheck.sh | 6 +-
healthcheck/mysql-healthcheck.sh | 3 +-
healthcheck/trino-healthcheck.sh | 2 +-
helm-chart/.helmignore | 29 ----
helm-chart/Chart.yaml | 9 -
helm-chart/healthcheck | 1 -
helm-chart/init | 1 -
helm-chart/templates/NOTES.txt | 24 ---
helm-chart/templates/_helpers.tpl | 62 -------
helm-chart/templates/gravitino.yaml | 84 ---------
helm-chart/templates/hive.yaml | 76 --------
helm-chart/templates/jupyter.yaml | 80 ---------
helm-chart/templates/mysql.yaml | 53 ------
helm-chart/templates/postgresql.yaml | 46 -----
helm-chart/templates/spark.yaml | 54 ------
helm-chart/templates/tests/test-connection.yaml | 15 --
helm-chart/templates/trino.yaml | 76 --------
helm-chart/values.yaml | 212 -----------------------
init/common/init_metalake_catalog.sh | 20 +--
init/gravitino/gravitino.conf | 4 +-
init/gravitino/init.sh | 3 -
init/hive/init.sh | 7 +-
init/jupyter/gravitino-fileset-example.ipynb | 16 +-
init/jupyter/gravitino-spark-trino-example.ipynb | 12 +-
init/jupyter/gravitino-trino-example.ipynb | 12 +-
init/jupyter/gravitino_llamaIndex_demo.ipynb | 8 +-
init/spark/init.sh | 4 -
init/spark/spark-defaults.conf | 6 +-
init/trino/init.sh | 2 +-
playground.sh | 129 +++-----------
33 files changed, 65 insertions(+), 1074 deletions(-)
diff --git a/README.md b/README.md
index feab84a..5f21797 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,6 @@ Depending on your network and computer, startup time may take
3-5 minutes. Once
## Prerequisites
Install Git (optional), Docker, Docker Compose.
-Docker Desktop (or Orbstack) with Kubernetes enabled and helm CLI is required
if you use helm-chart to deploy services.
## System Resource Requirements
@@ -63,81 +62,21 @@ git clone [email protected]:apache/gravitino-playground.git
cd gravitino-playground
```
-#### Docker
-
-##### Start
+### Start
```
./playground.sh docker start
```
-##### Check status
+### Check status
```shell
./playground.sh docker status
```
-##### Stop playground
+### Stop playground
```shell
./playground.sh docker stop
```
-#### Kubernetes
-
-Enable Kubernetes in Docker Desktop or Orbstack.
-
-In the project root directory, execute this command:
-
-```
-helm upgrade --install gravitino-playground ./helm-chart/ --create-namespace
--namespace gravitino-playground --set projectRoot=$(pwd)
-```
-
-##### Start
-
-```
-./playground.sh k8s start
-```
-
-##### Check status
-```shell
-./playground.sh k8s status
-```
-
-##### Port Forwarding
-
-To access the pods or services at `localhost`, you need to do these steps:
-
-1. Log in to the Gravitino playground Trino pod using the following command:
-
-```
-TRINO_POD=$(kubectl get pods --namespace gravitino-playground -l app=trino -o
jsonpath="{.items[0].metadata.name}")
-kubectl exec $TRINO_POD -n gravitino-playground -it -- /bin/bash
-```
-2. Log in to the Gravitino playground Spark pod using the following command:
-
-```
-SPARK_POD=$(kubectl get pods --namespace gravitino-playground -l app=spark -o
jsonpath="{.items[0].metadata.name}")
-kubectl exec $SPARK_POD -n gravitino-playground -it -- /bin/bash
-```
-
-3. Port-forward the Gravitino service to access it at `localhost:8090`.
-
-```
-kubectl port-forward svc/gravitino -n gravitino-playground 8090:8090
-```
-
-4. Port-forward the Jupyter Notebook service to access it at `localhost:8888`.
-
-```
-kubectl port-forward svc/jupyternotebook -n gravitino-playground 8888:8888
-```
-
-##### Stop playground
-```shell
-./playground.sh k8s stop
-```
-
-
-
-
## Experiencing Apache Gravitino with Trino SQL
### Using Trino CLI in Docker Container
diff --git a/docker-compose.yaml b/docker-compose.yaml
index be97d8d..6020f08 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -65,9 +65,6 @@ services:
- "8090:8090"
- "9001:9001"
container_name: playground-gravitino
- environment:
- - MYSQL_HOST_IP=mysql
- - HIVE_HOST_IP=hive
depends_on:
ranger :
condition: service_healthy
@@ -160,10 +157,6 @@ services:
entrypoint: /bin/bash /tmp/spark/init.sh
environment:
- HADOOP_USER_NAME=root
- - GRAVITINO_HOST_IP=gravitino
- - GRAVITINO_HOST_PORT=8090
- - HIVE_HOST_IP=hive
- - TRINO_HOST_IP=trino
ports:
- "14040:4040"
volumes:
@@ -173,11 +166,6 @@ services:
jupyter:
image: jupyter/pyspark-notebook:spark-3.4.1
container_name: playground-jupyter
- environment:
- - GRAVITINO_HOST_IP=gravitino
- - HIVE_HOST_IP=hive
- - TRINO_HOST_IP=trino
- - POSTGRES_HOST_IP=postgresql
ports:
- "18888:8888"
volumes:
diff --git a/healthcheck/gravitino-healthcheck.sh
b/healthcheck/gravitino-healthcheck.sh
index 48462b3..a825e21 100755
--- a/healthcheck/gravitino-healthcheck.sh
+++ b/healthcheck/gravitino-healthcheck.sh
@@ -23,10 +23,8 @@ max_attempts=3
attempt=0
success=false
-HOST_IP=${GRAVITINO_HOST_IP:-localhost}
-
while [ $attempt -lt $max_attempts ]; do
- response=$(curl -X GET -H "Content-Type: application/json"
http://${HOST_IP}:8090/api/version)
+ response=$(curl -X GET -H "Content-Type: application/json"
http://127.0.0.1:8090/api/version)
if echo "$response" | grep -q "\"code\":0"; then
success=true
diff --git a/healthcheck/hive-healthcheck.sh b/healthcheck/hive-healthcheck.sh
index f921f45..1876495 100755
--- a/healthcheck/hive-healthcheck.sh
+++ b/healthcheck/hive-healthcheck.sh
@@ -19,12 +19,8 @@
#
set -ex
-# Set Hive connection details
-HOST_IP=${HIVE_HOST_IP:-localhost}
-HIVE_PORT="10000"
-
# Attempt to connect to Hive using curl
-curl -s -o /dev/null -w "%{http_code}" http://${HOST_IP}:${HIVE_PORT}
+curl -s -o /dev/null -w "%{http_code}" http://localhost:10000
# Check the HTTP status code
if [ $? -eq 0 ]; then
diff --git a/healthcheck/mysql-healthcheck.sh b/healthcheck/mysql-healthcheck.sh
index 83f18e6..cd3b066 100755
--- a/healthcheck/mysql-healthcheck.sh
+++ b/healthcheck/mysql-healthcheck.sh
@@ -19,8 +19,7 @@
#
set -ex
-HOST_IP=${MYSQL_HOST_IP:-localhost}
-mysqladmin ping -h ${HOST_IP} -p${MYSQL_ROOT_PASSWORD}
+mysqladmin ping -h localhost -p${MYSQL_ROOT_PASSWORD}
if [ $? -eq 0 ]; then
echo "MySQL container started successfully."
exit 0
diff --git a/healthcheck/trino-healthcheck.sh b/healthcheck/trino-healthcheck.sh
index a0750d8..df741fc 100755
--- a/healthcheck/trino-healthcheck.sh
+++ b/healthcheck/trino-healthcheck.sh
@@ -20,7 +20,7 @@
set -ex
# Because trino-connector must first synchronize a default metalake from the
Gravitino server
-response=$(trino --server ${TRINO_HOST_IP}:8080 --execute "SHOW CATALOGS LIKE
'catalog_hive'")
+response=$(trino --server localhost:8080 --execute "SHOW CATALOGS LIKE
'catalog_hive'")
if echo "$response" | grep -q catalog_hive; then
echo "Gravitino Trino connector has finished synchronizing metadata"
else
diff --git a/helm-chart/.helmignore b/helm-chart/.helmignore
deleted file mode 100644
index d90886a..0000000
--- a/helm-chart/.helmignore
+++ /dev/null
@@ -1,29 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
-
-# Ignore these directories because they are too large, we use local-path pv to
mount it into Pod
-init/*/data/
-init/*/packages/
-
-
diff --git a/helm-chart/Chart.yaml b/helm-chart/Chart.yaml
deleted file mode 100644
index ef6fbb2..0000000
--- a/helm-chart/Chart.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-apiVersion: v2
-name: gravitino-playground
-description: A Helm chart for Gravitino Playground
-type: application
-version: 0.1.0
-appVersion: "1.0.0"
-maintainers:
- - name: Your Name
- email: [email protected]
\ No newline at end of file
diff --git a/helm-chart/healthcheck b/helm-chart/healthcheck
deleted file mode 120000
index 0e8d7eb..0000000
--- a/helm-chart/healthcheck
+++ /dev/null
@@ -1 +0,0 @@
-../healthcheck
\ No newline at end of file
diff --git a/helm-chart/init b/helm-chart/init
deleted file mode 120000
index d9db1a1..0000000
--- a/helm-chart/init
+++ /dev/null
@@ -1 +0,0 @@
-../init
\ No newline at end of file
diff --git a/helm-chart/templates/NOTES.txt b/helm-chart/templates/NOTES.txt
deleted file mode 100644
index ed129d9..0000000
--- a/helm-chart/templates/NOTES.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-1. Log in to the Gravitino playground Trino pod using the following command:
-
-```
-TRINO_POD=$(kubectl get pods --namespace gravitino-playground -l app=trino -o
jsonpath="{.items[0].metadata.name}")
-kubectl exec $TRINO_POD -n gravitino-playground -it -- /bin/bash
-```
-2. Log in to the Gravitino playground Spark pod using the following command:
-
-```
-SPARK_POD=$(kubectl get pods --namespace gravitino-playground -l app=spark -o
jsonpath="{.items[0].metadata.name}")
-kubectl exec $SPARK_POD -n gravitino-playground -it -- /bin/bash
-```
-
-3. Port-forwarding Gravitino Service, so that you can access it at
`localhost:8090`.
-
-```
-kubectl port-forward svc/gravitino -n gravitino-playground 8090:8090
-```
-
-4. Port-forwarding Jupyter Notebook Service, so that you can access it at
`localhost:8888`.
-
-```
-kubectl port-forward svc/jupyternotebook -n gravitino-playground 8888:8888
-```
\ No newline at end of file
diff --git a/helm-chart/templates/_helpers.tpl
b/helm-chart/templates/_helpers.tpl
deleted file mode 100644
index e40aca5..0000000
--- a/helm-chart/templates/_helpers.tpl
+++ /dev/null
@@ -1,62 +0,0 @@
-{{/*
-Expand the name of the chart.
-*/}}
-{{- define "gravitino-playground.name" -}}
-{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Create a default fully qualified app name.
-We truncate at 63 chars because some Kubernetes name fields are limited to
this (by the DNS naming spec).
-If release name contains chart name it will be used as a full name.
-*/}}
-{{- define "gravitino-playground.fullname" -}}
-{{- if .Values.fullnameOverride }}
-{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- $name := default .Chart.Name .Values.nameOverride }}
-{{- if contains $name .Release.Name }}
-{{- .Release.Name | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
-{{- end }}
-{{- end }}
-{{- end }}
-
-{{/*
-Create chart name and version as used by the chart label.
-*/}}
-{{- define "gravitino-playground.chart" -}}
-{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 |
trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Common labels
-*/}}
-{{- define "gravitino-playground.labels" -}}
-helm.sh/chart: {{ include "gravitino-playground.chart" . }}
-{{ include "gravitino-playground.selectorLabels" . }}
-{{- if .Chart.AppVersion }}
-app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
-{{- end }}
-app.kubernetes.io/managed-by: {{ .Release.Service }}
-{{- end }}
-
-{{/*
-Selector labels
-*/}}
-{{- define "gravitino-playground.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "gravitino-playground.name" . }}
-app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}
-
-{{/*
-Create the name of the service account to use
-*/}}
-{{- define "gravitino-playground.serviceAccountName" -}}
-{{- if .Values.serviceAccount.create }}
-{{- default (include "gravitino-playground.fullname" .)
.Values.serviceAccount.name }}
-{{- else }}
-{{- default "default" .Values.serviceAccount.name }}
-{{- end }}
-{{- end }}
diff --git a/helm-chart/templates/gravitino.yaml
b/helm-chart/templates/gravitino.yaml
deleted file mode 100644
index dce35df..0000000
--- a/helm-chart/templates/gravitino.yaml
+++ /dev/null
@@ -1,84 +0,0 @@
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.gravitino.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.gravitino.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.gravitino.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.gravitino.serviceName }}
- spec:
- containers:
- - name: gravitino
- image: "{{ .Values.gravitino.image.repository }}:{{
.Values.gravitino.image.tag }}"
- ports:
- - containerPort: 8090
- - containerPort: 9001
- command: ["/bin/bash", "/tmp/gravitino/init.sh"]
- env:
- {{- toYaml .Values.gravitino.env | nindent 12 }}
- volumeMounts:
- - name: gravitino-healthcheck-scripts
- mountPath: /tmp/healthcheck
- - name: gravitino-artifacts
- mountPath: /tmp/gravitino
- readinessProbe:
- exec:
- command:
- - /bin/sh
- - -c
- - /tmp/healthcheck/gravitino-healthcheck.sh
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 3
- resources:
- {{- toYaml .Values.gravitino.resources | nindent 12 }}
- initContainers:
- - name: wait-for-hive
- image: "{{ .Values.gravitino.image.repository }}:{{
.Values.gravitino.image.tag }}"
- command: ["/bin/bash", "/tmp/healthcheck/hive-healthcheck.sh"]
- env:
- {{- toYaml .Values.gravitino.env | nindent 12 }}
- volumeMounts:
- - name: gravitino-healthcheck-scripts
- mountPath: /tmp/healthcheck
- - name: wait-for-mysql
- image: "{{ .Values.mysql.image.repository }}:{{
.Values.mysql.image.tag }}"
- command: ["/bin/bash", "/tmp/healthcheck/mysql-healthcheck.sh"]
- env:
- {{- toYaml .Values.mysql.env | nindent 12 }}
- volumeMounts:
- - name: gravitino-healthcheck-scripts
- mountPath: /tmp/healthcheck
- volumes:
- - name: gravitino-artifacts
- hostPath:
- path: {{ printf "%s/init/gravitino/" .Values.projectRoot }}
- type: DirectoryOrCreate
- - name: gravitino-healthcheck-scripts
- hostPath:
- path: {{ printf "%s/healthcheck/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.gravitino.serviceName }}
-spec:
- selector:
- app: {{ .Values.gravitino.serviceName }}
- ports:
- - port: 8090
- targetPort: 8090
- name: api
- - port: 9001
- targetPort: 9001
- name: debug
diff --git a/helm-chart/templates/hive.yaml b/helm-chart/templates/hive.yaml
deleted file mode 100644
index e2df965..0000000
--- a/helm-chart/templates/hive.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-# templates/hive.yaml
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.hive.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.hive.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.hive.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.hive.serviceName }}
- spec:
- # we need this, otherwise, the hive hostname at location of spark table
will
- # be the pod name, not service name:
- # Location|hdfs://hive-6c8fbfcf8f-482r4:9000/...|
- # which cause pyspark unable to connect to hive
- hostname: hive
- containers:
- - name: hive
- image: "{{ .Values.hive.image.repository }}:{{ .Values.hive.image.tag
}}"
- ports:
- - containerPort: 3306
- - containerPort: 9000
- - containerPort: 9083
- - containerPort: 10000
- - containerPort: 50070
- readinessProbe:
- exec:
- command: ["/tmp/check-status.sh"]
- periodSeconds: 10
- timeoutSeconds: 60
- failureThreshold: 5
- env:
- {{- toYaml .Values.hive.env | nindent 12 }}
- volumeMounts:
- - name: hive-artifacts
- mountPath: /tmp/hive
- resources:
- {{- toYaml .Values.hive.resources | nindent 12 }}
- command: ["/bin/bash", "/tmp/hive/init.sh"]
- volumes:
- - name: hive-artifacts
- hostPath:
- path: {{ printf "%s/init/hive/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.hive.serviceName }}
-spec:
- selector:
- app: {{ .Values.hive.serviceName }}
- ports:
- - port: 3306
- targetPort: 3306
- name: mysql
- - port: 9000
- targetPort: 9000
- name: hdfs
- - port: 9083
- targetPort: 9083
- name: metastore
- - port: 10000
- targetPort: 10000
- name: hiveserver2
- - port: 50070
- targetPort: 50070
- name: namenode
diff --git a/helm-chart/templates/jupyter.yaml
b/helm-chart/templates/jupyter.yaml
deleted file mode 100644
index 7322e24..0000000
--- a/helm-chart/templates/jupyter.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.jupyter.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.jupyter.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.jupyter.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.jupyter.serviceName }}
- spec:
- containers:
- - name: jupyter
- image: "{{ .Values.jupyter.image.repository }}:{{
.Values.jupyter.image.tag }}"
- ports:
- - containerPort: 8888
- command: ["/bin/bash", "/tmp/gravitino/init.sh"]
- securityContext:
- privileged: true
- runAsUser: 0
- env:
- {{- toYaml .Values.jupyter.env | nindent 12 }}
- volumeMounts:
- - name: jupyter-artifacts
- mountPath: /tmp/gravitino
- - name: jupyter-healthcheck-scripts
- mountPath: /tmp/healthcheck
- resources:
- {{- toYaml .Values.jupyter.resources | nindent 12 }}
- initContainers:
- - name: wait-for-gravitino
- image: "{{ .Values.jupyter.image.repository }}:{{
.Values.jupyter.image.tag }}"
- command: ["/bin/bash", "/tmp/healthcheck/gravitino-healthcheck.sh"]
- env:
- {{- toYaml .Values.jupyter.env | nindent 12 }}
- volumeMounts:
- - name: jupyter-healthcheck-scripts
- mountPath: /tmp/healthcheck
- - name: wait-for-hive
- image: "{{ .Values.hive.image.repository }}:{{
.Values.hive.image.tag }}"
- command: ["/bin/bash", "/tmp/healthcheck/hive-healthcheck.sh"]
- env:
- {{- toYaml .Values.jupyter.env | nindent 12 }}
- volumeMounts:
- - name: jupyter-healthcheck-scripts
- mountPath: /tmp/healthcheck
- - name: wait-for-trino
- image: "{{ .Values.trino.image.repository }}:{{
.Values.trino.image.tag }}"
- command: ["/bin/bash", "/tmp/healthcheck/trino-healthcheck.sh"]
- env:
- {{- toYaml .Values.jupyter.env | nindent 12 }}
- volumeMounts:
- - name: jupyter-healthcheck-scripts
- mountPath: /tmp/healthcheck
- volumes:
- - name: jupyter-artifacts
- hostPath:
- path: {{ printf "%s/init/jupyter/" .Values.projectRoot }}
- type: DirectoryOrCreate
- - name: jupyter-healthcheck-scripts
- hostPath:
- path: {{ printf "%s/healthcheck/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.jupyter.serviceName }}
-spec:
- selector:
- app: {{ .Values.jupyter.serviceName }}
- ports:
- - port: 8888
- targetPort: 8888
\ No newline at end of file
diff --git a/helm-chart/templates/mysql.yaml b/helm-chart/templates/mysql.yaml
deleted file mode 100644
index b7efd76..0000000
--- a/helm-chart/templates/mysql.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-# templates/mysql.yaml
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.mysql.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.mysql.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.mysql.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.mysql.serviceName }}
- spec:
- containers:
- - name: mysql
- image: "{{ .Values.mysql.image.repository }}:{{
.Values.mysql.image.tag }}"
- ports:
- - containerPort: 3306
- env:
- {{- toYaml .Values.mysql.env | nindent 12 }}
- args:
- - --default-authentication-plugin=mysql_native_password
- - --character-set-server=utf8mb4
- - --collation-server=utf8mb4_general_ci
- - --explicit_defaults_for_timestamp=true
- - --lower_case_table_names=1
- volumeMounts:
- - name: mysql-artifacts
- mountPath: /docker-entrypoint-initdb.d
- resources:
- {{- toYaml .Values.mysql.resources | nindent 12 }}
- volumes:
- - name: mysql-artifacts
- hostPath:
- path: {{ printf "%s/init/mysql/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.mysql.serviceName }}
-spec:
- selector:
- app: {{ .Values.mysql.serviceName }}
- ports:
- - port: 3306
- targetPort: 3306
\ No newline at end of file
diff --git a/helm-chart/templates/postgresql.yaml
b/helm-chart/templates/postgresql.yaml
deleted file mode 100644
index 5088f80..0000000
--- a/helm-chart/templates/postgresql.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.postgresql.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.postgresql.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.postgresql.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.postgresql.serviceName }}
- spec:
- containers:
- - name: postgresql
- image: "{{ .Values.postgresql.image.repository }}:{{
.Values.postgresql.image.tag }}"
- ports:
- - containerPort: 5432
- env:
- {{- toYaml .Values.postgresql.env | nindent 12 }}
- volumeMounts:
- - name: postgresql-artifacts
- mountPath: /docker-entrypoint-initdb.d
- resources:
- {{- toYaml .Values.postgresql.resources | nindent 12 }}
- volumes:
- - name: postgresql-artifacts
- hostPath:
- path: {{ printf "%s/init/postgres/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.postgresql.serviceName }}
-spec:
- selector:
- app: {{ .Values.postgresql.serviceName }}
- ports:
- - port: 5432
- targetPort: 5432
diff --git a/helm-chart/templates/spark.yaml b/helm-chart/templates/spark.yaml
deleted file mode 100644
index 9f1610a..0000000
--- a/helm-chart/templates/spark.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# templates/spark.yaml
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.spark.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.spark.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.spark.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.spark.serviceName }}
- spec:
- containers:
- - name: spark
- image: "{{ .Values.spark.image.repository }}:{{
.Values.spark.image.tag }}"
- ports:
- - containerPort: 4040
- command: ["/bin/bash", "/tmp/spark/init.sh"]
- env:
- {{- toYaml .Values.spark.env | nindent 12 }}
- volumeMounts:
- - name: spark-artifacts
- mountPath: /tmp/spark
- - name: spark-common-artifacts
- mountPath: /tmp/common
- resources:
- {{- toYaml .Values.spark.resources | nindent 12 }}
- volumes:
- - name: spark-artifacts
- hostPath:
- path: {{ printf "%s/init/spark/" .Values.projectRoot }}
- type: DirectoryOrCreate
- - name: spark-common-artifacts
- hostPath:
- path: {{ printf "%s/init/common/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.spark.serviceName }}
-spec:
- selector:
- app: {{ .Values.spark.serviceName }}
- ports:
- - port: 4040
- targetPort: 4040
\ No newline at end of file
diff --git a/helm-chart/templates/tests/test-connection.yaml
b/helm-chart/templates/tests/test-connection.yaml
deleted file mode 100644
index 7ed158b..0000000
--- a/helm-chart/templates/tests/test-connection.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
- name: "{{ include "gravitino-playground.fullname" . }}-test-connection"
- labels:
- {{- include "gravitino-playground.labels" . | nindent 4 }}
- annotations:
- "helm.sh/hook": test
-spec:
- containers:
- - name: wget
- image: busybox
- command: ['wget']
- args: ['{{ include "gravitino-playground.fullname" . }}:{{
.Values.service.port }}']
- restartPolicy: Never
diff --git a/helm-chart/templates/trino.yaml b/helm-chart/templates/trino.yaml
deleted file mode 100644
index b6d46a9..0000000
--- a/helm-chart/templates/trino.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-# templates/trino.yaml
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: {{ .Values.trino.serviceName }}
- namespace: {{ .Values.global.namespace }}
- labels:
- app: {{ .Values.trino.serviceName }}
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: {{ .Values.trino.serviceName }}
- template:
- metadata:
- labels:
- app: {{ .Values.trino.serviceName }}
- spec:
- containers:
- - name: trino
- image: "{{ .Values.trino.image.repository }}:{{
.Values.trino.image.tag }}"
- ports:
- - containerPort: 8080
- command: ["/bin/bash", "/tmp/trino/init.sh"]
- env:
- {{- toYaml .Values.trino.env | nindent 12 }}
- volumeMounts:
- - name: trino-artifacts
- mountPath: /tmp/trino
- - name: trino-common-init-scripts
- mountPath: /tmp/common
- resources:
- {{- toYaml .Values.trino.resources | nindent 12 }}
- initContainers:
- - name: wait-for-hive
- image: "{{ .Values.hive.image.repository }}:{{ .Values.hive.image.tag
}}"
- command: ["/bin/bash", "/tmp/healthcheck/hive-healthcheck.sh"]
- env:
- {{- toYaml .Values.trino.env | nindent 12 }}
- volumeMounts:
- - name: trino-healthcheck-scripts
- mountPath: /tmp/healthcheck
- - name: wait-for-gravitino
- image: "{{ .Values.gravitino.image.repository }}:{{
.Values.gravitino.image.tag }}"
- command: ["/bin/bash", "/tmp/healthcheck/gravitino-healthcheck.sh"]
- env:
- {{- toYaml .Values.trino.env | nindent 12 }}
- volumeMounts:
- - name: trino-healthcheck-scripts
- mountPath: /tmp/healthcheck
- volumes:
- - name: trino-artifacts
- hostPath:
- path: {{ printf "%s/init/trino/" .Values.projectRoot }}
- type: DirectoryOrCreate
- - name: trino-common-init-scripts
- hostPath:
- path: {{ printf "%s/init/common/" .Values.projectRoot }}
- type: DirectoryOrCreate
- - name: trino-healthcheck-scripts
- hostPath:
- path: {{ printf "%s/healthcheck/" .Values.projectRoot }}
- type: DirectoryOrCreate
----
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ .Values.trino.serviceName }}
-spec:
- selector:
- app: {{ .Values.trino.serviceName }}
- ports:
- - port: 8080
- targetPort: 8080
- name: http
\ No newline at end of file
diff --git a/helm-chart/values.yaml b/helm-chart/values.yaml
deleted file mode 100644
index c87211f..0000000
--- a/helm-chart/values.yaml
+++ /dev/null
@@ -1,212 +0,0 @@
-# Global settings
-global:
- storageClass: ""
- namespace: gravitino-playground
- projectRoot: ""
-
-# PostgreSQL settings
-postgresql:
- serviceName: &postgres_host_ip postgresql
- image:
- repository: postgres
- tag: "13"
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 1
- memory: 500Mi
- requests:
- cpu: 200m
- memory: 200Mi
- env:
- - name: POSTGRES_USER
- value: postgres
- - name: POSTGRES_PASSWORD
- value: postgres
- - name: ALLOW_IP_RANGE
- value: 0.0.0.0/0
-
-# MySQL settings
-mysql:
- serviceName: &mysql_host_ip mysql
- image:
- repository: mysql
- tag: "8.0"
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 1
- memory: 500Mi
- requests:
- cpu: 200m
- memory: 200Mi
- env:
- - name: MYSQL_HOST_IP
- value: *mysql_host_ip
- - name: MYSQL_ROOT_PASSWORD
- value: mysql
- - name: MYSQL_USER
- value: mysql
- - name: MYSQL_PASSWORD
- value: mysql
- - name: MYSQL_DATABASE
- value: db
-
-# Hive settings
-hive:
- serviceName: &hive_host_ip hive
- image:
- repository: apache/gravitino-playground
- tag: hive-2.7.3
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 3
- memory: 3Gi
- requests:
- cpu: 1
- memory: 1Gi
- env:
- - name: HIVE_HOST_IP
- value: *hive_host_ip
- - name: HADOOP_USER_NAME
- value: &hadoop_user_name root
-
-# Gravitino settings
-gravitino:
- serviceName: &gravitino_host_ip gravitino
- image:
- repository: apache/gravitino
- tag: 0.7.0-incubating
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 500m
- memory: 1.5Gi
- requests:
- cpu: 300m
- memory: 1.5Gi
- env:
- - name: HIVE_HOST_IP
- value: *hive_host_ip
- - name: MYSQL_HOST_IP
- value: *mysql_host_ip
-
-# Trino settings
-trino:
- serviceName: &trino_host_ip trino
- image:
- repository: apache/gravitino-playground
- tag: trino-435-gravitino-0.7.0-incubating
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 1
- memory: 2Gi
- requests:
- cpu: 500m
- memory: 1Gi
- env:
- - name: HADOOP_USER_NAME
- value: *hadoop_user_name
- - name: GRAVITINO_HOST_IP
- value: *gravitino_host_ip
- - name: GRAVITINO_HOST_PORT
- value: "8090"
- - name: GRAVITINO_METALAKE_NAME
- value: metalake_demo
- - name: HIVE_HOST_IP
- value: *hive_host_ip
- - name: MYSQL_HOST_IP
- value: *mysql_host_ip
- - name: POSTGRES_HOST_IP
- value: *postgres_host_ip
-
-# Spark settings
-spark:
- serviceName: &spark_host_ip spark
- image:
- repository: spark
- tag: 3.4.3-scala2.12-java11-python3-r-ubuntu
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 1
- memory: 1Gi
- requests:
- cpu: 500m
- memory: 500Mi
- env:
- - name: HADOOP_USER_NAME
- value: *hadoop_user_name
- - name: GRAVITINO_HOST_IP
- value: *gravitino_host_ip
- - name: GRAVITINO_HOST_PORT
- value: "8090"
- - name: HIVE_HOST_IP
- value: *hive_host_ip
- - name: TRINO_HOST_IP
- value: *trino_host_ip
-
-# Jupyter settings
-jupyter:
- # service can not be set to jupyter, because we will get this error in
jupyter container:
- # ValueError: invalid literal for int() with base 10:
'tcp://192.168.194.169:8888'
- serviceName: jupyternotebook
- image:
- repository: jupyter/pyspark-notebook
- tag: spark-3.4.1
- pullPolicy: IfNotPresent
- resources:
- limits:
- cpu: 500m
- memory: 1Gi
- requests:
- cpu: 200m
- memory: 200Mi
- env:
- - name: GRAVITINO_HOST_IP
- value: *gravitino_host_ip
- - name: HIVE_HOST_IP
- value: *hive_host_ip
- - name: TRINO_HOST_IP
- value: *trino_host_ip
- - name: POSTGRES_HOST_IP
- value: *postgres_host_ip
- - name: SPARK_HOST_IP
- value: *spark_host_ip
-
-# Service specific settings
-service:
- type: ClusterIP
- # If you want to expose services externally, you can use LoadBalancer
- # type: LoadBalancer
-
-# Ingress settings
-ingress:
- enabled: false
- # If you want to use ingress, uncomment and adjust the following:
- # annotations: {}
- # hosts:
- # - host: chart-example.local
- # paths: []
- # tls: []
-
-# Security settings
-securityContext:
- {}
- # capabilities:
- # drop:
- # - ALL
- # readOnlyRootFilesystem: true
- # runAsNonRoot: true
- # runAsUser: 1000
-
-# Node selector
-nodeSelector: {}
-
-# Tolerations
-tolerations: []
-
-# Affinity
-affinity: {}
diff --git a/init/common/init_metalake_catalog.sh
b/init/common/init_metalake_catalog.sh
index 9430f8d..48c43f0 100644
--- a/init/common/init_metalake_catalog.sh
+++ b/init/common/init_metalake_catalog.sh
@@ -17,11 +17,11 @@
# under the License.
#
-response=$(curl
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo)
+response=$(curl http://gravitino:8090/api/metalakes/metalake_demo)
if echo "$response" | grep -q "\"code\":0"; then
true
else
- response=$(curl -X POST -H "Content-Type: application/json" -d
'{"name":"metalake_demo","comment":"comment","properties":{}}'
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes)
+ response=$(curl -X POST -H "Content-Type: application/json" -d
'{"name":"metalake_demo","comment":"comment","properties":{}}'
http://gravitino:8090/api/metalakes)
if echo "$response" | grep -q "\"code\":0"; then
true # Placeholder, do nothing
else
@@ -30,12 +30,12 @@ else
fi
fi
-response=$(curl
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_hive)
+response=$(curl
http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_hive)
if echo "$response" | grep -q "\"code\":0"; then
true
else
# Create Hive catalog for experience Gravitino service
- response=$(curl -X POST -H "Content-Type: application/json" -d
'{"name":"catalog_hive","type":"RELATIONAL", "provider":"hive",
"comment":"comment","properties":{"metastore.uris":"thrift://'${HIVE_HOST_IP}':9083"
}}'
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs)
+ response=$(curl -X POST -H "Content-Type: application/json" -d
'{"name":"catalog_hive","type":"RELATIONAL", "provider":"hive",
"comment":"comment","properties":{"metastore.uris":"thrift://'${HIVE_HOST_IP}':9083"
}}' http://gravitino:8090/api/metalakes/metalake_demo/catalogs)
if echo "$response" | grep -q "\"code\":0"; then
true # Placeholder, do nothing
else
@@ -44,12 +44,12 @@ else
fi
fi
-response=$(curl
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_postgres)
+response=$(curl
http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_postgres)
if echo "$response" | grep -q "\"code\":0"; then
true
else
# Create Postgresql catalog for experience Gravitino service
- response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H
"Content-Type: application/json" -d '{ "name":"catalog_postgres",
"type":"RELATIONAL", "provider":"jdbc-postgresql", "comment":"comment",
"properties":{ "jdbc-url":"jdbc:postgresql://postgresql/db",
"jdbc-user":"postgres", "jdbc-password":"postgres", "jdbc-database":"db",
"jdbc-driver": "org.postgresql.Driver" } }'
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs)
+ response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H
"Content-Type: application/json" -d '{ "name":"catalog_postgres",
"type":"RELATIONAL", "provider":"jdbc-postgresql", "comment":"comment",
"properties":{ "jdbc-url":"jdbc:postgresql://postgresql/db",
"jdbc-user":"postgres", "jdbc-password":"postgres", "jdbc-database":"db",
"jdbc-driver": "org.postgresql.Driver" } }'
http://gravitino:8090/api/metalakes/metalake_demo/catalogs)
if echo "$response" | grep -q "\"code\":0"; then
true # Placeholder, do nothing
else
@@ -58,12 +58,12 @@ else
fi
fi
-response=$(curl
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_mysql)
+response=$(curl
http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_mysql)
if echo "$response" | grep -q "\"code\":0"; then
true
else
# Create Mysql catalog for experience Gravitino service
- response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H
"Content-Type: application/json" -d '{ "name":"catalog_mysql",
"type":"RELATIONAL", "provider":"jdbc-mysql", "comment":"comment",
"properties":{ "jdbc-url":"jdbc:mysql://'${MYSQL_HOST_IP}':3306",
"jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver":
"com.mysql.cj.jdbc.Driver" } }'
http://${GRAVITINO_HOST_IP}:8090/api/metalakes/metalake_demo/catalogs)
+ response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H
"Content-Type: application/json" -d '{ "name":"catalog_mysql",
"type":"RELATIONAL", "provider":"jdbc-mysql", "comment":"comment",
"properties":{ "jdbc-url":"jdbc:mysql://'${MYSQL_HOST_IP}':3306",
"jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver":
"com.mysql.cj.jdbc.Driver" } }'
http://gravitino:8090/api/metalakes/metalake_demo/catalogs)
if echo "$response" | grep -q "catalog_mysql"; then
true # Placeholder, do nothing
@@ -73,12 +73,12 @@ else
fi
fi
-response=$(curl
http://${GRAVITINO_HOST_IP}:${GRAVITINO_HOST_PORT}/api/metalakes/metalake_demo/catalogs/catalog_iceberg)
+response=$(curl
http://gravitino:8090/api/metalakes/metalake_demo/catalogs/catalog_iceberg)
if echo "$response" | grep -q "\"code\":0"; then
true
else
# Create Iceberg catalog for experience Gravitino service
- response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H
"Content-Type: application/json" -d '{ "name":"catalog_iceberg",
"type":"RELATIONAL", "provider":"lakehouse-iceberg", "comment":"comment",
"properties":{ "uri":"jdbc:mysql://'${MYSQL_HOST_IP}':3306/db",
"catalog-backend":"jdbc",
"warehouse":"hdfs://'${HIVE_HOST_IP}':9000/user/iceberg/warehouse/",
"jdbc-user":"mysql", "jdbc-password":"mysql",
"jdbc-driver":"com.mysql.cj.jdbc.Driver"} }' http://${GRAVITINO_HOST_IP} [...]
+ response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H
"Content-Type: application/json" -d '{ "name":"catalog_iceberg",
"type":"RELATIONAL", "provider":"lakehouse-iceberg", "comment":"comment",
"properties":{ "uri":"jdbc:mysql://'${MYSQL_HOST_IP}':3306/db",
"catalog-backend":"jdbc",
"warehouse":"hdfs://'${HIVE_HOST_IP}':9000/user/iceberg/warehouse/",
"jdbc-user":"mysql", "jdbc-password":"mysql",
"jdbc-driver":"com.mysql.cj.jdbc.Driver"} }' http://gravitino:8090/api/m [...]
if echo "$response" | grep -q "\"code\":0"; then
true # Placeholder, do nothing
else
diff --git a/init/gravitino/gravitino.conf b/init/gravitino/gravitino.conf
index 4cafe21..a155591 100755
--- a/init/gravitino/gravitino.conf
+++ b/init/gravitino/gravitino.conf
@@ -75,8 +75,8 @@ gravitino.auxService.iceberg-rest.host = 0.0.0.0
# Iceberg REST service http port
gravitino.auxService.iceberg-rest.httpPort = 9001
gravitino.auxService.iceberg-rest.catalog-backend = jdbc
-gravitino.auxService.iceberg-rest.uri = jdbc:mysql://__MYSQL_HOST_IP__:3306/db
-gravitino.auxService.iceberg-rest.warehouse =
hdfs://__HIVE_HOST_IP__:9000/user/iceberg/warehouse/
+gravitino.auxService.iceberg-rest.uri = jdbc:mysql://mysql:3306/db
+gravitino.auxService.iceberg-rest.warehouse =
hdfs://hive:9000/user/iceberg/warehouse/
gravitino.auxService.iceberg-rest.jdbc.user = mysql
gravitino.auxService.iceberg-rest.jdbc.password = mysql
gravitino.auxService.iceberg-rest.jdbc-driver = com.mysql.cj.jdbc.Driver
diff --git a/init/gravitino/init.sh b/init/gravitino/init.sh
index a427c6f..2d5a850 100644
--- a/init/gravitino/init.sh
+++ b/init/gravitino/init.sh
@@ -26,9 +26,6 @@ cp
/root/gravitino/catalogs/jdbc-postgresql/libs/postgresql-42.2.7.jar /root/gra
cp /root/gravitino/catalogs/jdbc-mysql/libs/mysql-connector-java-8.0.27.jar
/root/gravitino/iceberg-rest-server/libs
cp /tmp/gravitino/gravitino.conf /root/gravitino/conf
-# set env var
-sed -i 's/__MYSQL_HOST_IP__/'"$MYSQL_HOST_IP"'/g'
/root/gravitino/conf/gravitino.conf
-sed -i 's/__HIVE_HOST_IP__/'"$HIVE_HOST_IP"'/g'
/root/gravitino/conf/gravitino.conf
echo "Finish downloading"
echo "Start the Gravitino Server"
/bin/bash /root/gravitino/bin/gravitino.sh start &
diff --git a/init/hive/init.sh b/init/hive/init.sh
index 9508833..3e7baa2 100644
--- a/init/hive/init.sh
+++ b/init/hive/init.sh
@@ -17,11 +17,10 @@
# under the License.
#
-# remove command line `tail -f /dev/null` in the `/usr/local/sbin/start.sh`
-sed -i '$d' /usr/local/sbin/start.sh
-sed -i '$d' /usr/local/sbin/start.sh
+# remove command `tail -f /dev/null` in `/usr/local/sbin/start.sh`, so we can
run subsequent commands
+sed -i -E 's/tail -f \/dev\/null/\s/g' /usr/local/sbin/start.sh
+
cp /tmp/hive/core-site.xml /tmp/hadoop-conf
-sed -i "s|hdfs://localhost:9000|hdfs://${HIVE_HOST_IP}:9000|g"
/usr/local/hive/conf/hive-site.xml
/bin/bash /usr/local/sbin/start.sh
hdfs dfs -mkdir -p /user/gravitino
hdfs dfs -mkdir -p /user/iceberg/warehouse
diff --git a/init/jupyter/gravitino-fileset-example.ipynb
b/init/jupyter/gravitino-fileset-example.ipynb
index daa2a85..7f45ae8 100644
--- a/init/jupyter/gravitino-fileset-example.ipynb
+++ b/init/jupyter/gravitino-fileset-example.ipynb
@@ -20,10 +20,8 @@
"from hdfs import InsecureClient\n",
"import os\n",
"\n",
- "hive_host_ip=os.getenv('HIVE_HOST_IP')\n",
- "\n",
"# Create a HDFS connector client\n",
- "hdfs_client = InsecureClient(f\"http://{hive_host_ip}:50070\",
user='root')\n",
+ "hdfs_client = InsecureClient(\"http://hive:50070\", user='root')\n",
"\n",
"# List HDFS file and directories\n",
"print(hdfs_client.list('/user/gravitino'))\n",
@@ -52,10 +50,8 @@
"from gravitino import NameIdentifier, GravitinoAdminClient,
GravitinoClient, Catalog, Fileset, FilesetChange\n",
"import os \n",
"\n",
- "gravitino_host_ip=os.getenv('GRAVITINO_HOST_IP')\n",
- "\n",
"# Create Gravitino admin client\n",
- "gravitino_admin_client =
GravitinoAdminClient(uri=f\"http://{gravitino_host_ip}:8090\")\n",
+ "gravitino_admin_client =
GravitinoAdminClient(uri=\"http://gravitino:8090\")\n",
"\n",
"# Create metalake via Gravitino admin client\n",
"metalake_name=\"default\"\n",
@@ -73,7 +69,7 @@
"outputs": [],
"source": [
"# Create Gravitino client\n",
- "gravitino_client =
GravitinoClient(uri=f\"http://{gravitino_host_ip}:8090\",
metalake_name=metalake_name)"
+ "gravitino_client = GravitinoClient(uri=\"http://gravitino:8090\",
metalake_name=metalake_name)"
]
},
{
@@ -131,7 +127,7 @@
"# Create schema entity via Gravition client\n",
"schema_name=\"schema\"\n",
"schema_path=\"/user/gravitino/\"+schema_name\n",
- "schema_hdfs_path=f\"hdfs://{hive_host_ip}:9000{schema_path}\"\n",
+ "schema_hdfs_path=f\"hdfs://hive:9000{schema_path}\"\n",
"\n",
"catalog.as_schemas().create_schema(schema_name=schema_name, \n",
" comment=\"\", \n",
@@ -157,7 +153,7 @@
"# Create a managed type of Fileset\n",
"managed_fileset_name=\"managed_fileset\"\n",
"managed_fileset_path=\"/user/gravitino/\"+schema_name+\"/\"+managed_fileset_name\n",
-
"managed_fileset_hdfs_path=f\"hdfs://{hive_host_ip}:9000{managed_fileset_path}\"\n",
+ "managed_fileset_hdfs_path=f\"hdfs://hive:9000{managed_fileset_path}\"\n",
"\n",
"managed_fileset_ident: NameIdentifier = NameIdentifier.of(schema_name,
managed_fileset_name)\n",
"catalog.as_fileset_catalog().create_fileset(ident=managed_fileset_ident,\n",
@@ -184,7 +180,7 @@
"source": [
"external_fileset_name=\"external_fileset\"\n",
"external_fileset_path=\"/user/gravitino/\"+schema_name+\"/\"+external_fileset_name\n",
-
"external_fileset_hdfs_path=f\"hdfs://{hive_host_ip}:9000{external_fileset_path}\"\n",
+
"external_fileset_hdfs_path=f\"hdfs://hive:9000{external_fileset_path}\"\n",
"\n",
"# Create a fileset path in HDFS in advance\n",
"hdfs_client.makedirs(external_fileset_path)\n",
diff --git a/init/jupyter/gravitino-spark-trino-example.ipynb
b/init/jupyter/gravitino-spark-trino-example.ipynb
index ac88c49..4cbf078 100644
--- a/init/jupyter/gravitino-spark-trino-example.ipynb
+++ b/init/jupyter/gravitino-spark-trino-example.ipynb
@@ -19,10 +19,6 @@
"import os\n",
"from pyspark.sql import SparkSession\n",
"\n",
- "gravitino_host_ip=os.getenv('GRAVITINO_HOST_IP')\n",
- "hive_host_ip = os.getenv('HIVE_HOST_IP')\n",
- "trino_host_ip = os.getenv('TRINO_HOST_IP')\n",
- "spark_host_ip = os.getenv('SPARK_HOST_IP')\n",
"spark_home = os.getenv('SPARK_HOME')\n",
"os.environ['HADOOP_USER_NAME']=\"anonymous\"\n",
"\n",
@@ -30,14 +26,14 @@
" .appName(\"PySpark SQL Example\") \\\n",
" .config(\"spark.plugins\",
\"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n",
" .config(\"spark.jars\",
\"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,/tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar\")
\\\n",
- " .config(\"spark.sql.gravitino.uri\",
f\"http://{gravitino_host_ip}:8090\") \\\n",
+ " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n",
" .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n",
" .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n",
" .config(\"spark.sql.catalog.catalog_rest\",
\"org.apache.iceberg.spark.SparkCatalog\") \\\n",
" .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n",
- " .config(\"spark.sql.catalog.catalog_rest.uri\",
f\"http://{gravitino_host_ip}:9001/iceberg/\") \\\n",
+ " .config(\"spark.sql.catalog.catalog_rest.uri\",
\"http://gravitino:9001/iceberg/\") \\\n",
" .config(\"spark.locality.wait.node\", \"0\") \\\n",
- " .config(\"spark.sql.warehouse.dir\",
f\"hdfs://{hive_host_ip}:9000/user/hive/warehouse\") \\\n",
+ " .config(\"spark.sql.warehouse.dir\",
\"hdfs://hive:9000/user/hive/warehouse\") \\\n",
" .enableHiveSupport() \\\n",
" .getOrCreate()"
]
@@ -107,7 +103,7 @@
"\n",
"# Create a Trino connector client\n",
"conn = connect(\n",
- " host=trino_host_ip,\n",
+ " host=\"trino\",\n",
" port=8080,\n",
" user=\"admin\",\n",
" catalog=\"catalog_hive\",\n",
diff --git a/init/jupyter/gravitino-trino-example.ipynb
b/init/jupyter/gravitino-trino-example.ipynb
index 7130d0b..edd4e51 100644
--- a/init/jupyter/gravitino-trino-example.ipynb
+++ b/init/jupyter/gravitino-trino-example.ipynb
@@ -31,11 +31,9 @@
"from trino.dbapi import connect\n",
"import os\n",
"\n",
- "trino_host_ip=os.getenv('TRINO_HOST_IP')\n",
- "\n",
"# Create a Trino connector client\n",
"conn = connect(\n",
- " host=trino_host_ip,\n",
+ " host=\"trino\",\n",
" port=8080,\n",
" user=\"admin\",\n",
" catalog=\"catalog_hive\",\n",
@@ -52,7 +50,7 @@
"source": [
"## Prepare\n",
"\n",
- "Creates a schema named `catalog_hive.company` in Hive, with its location
set to`hdfs://{hive_host_ip}:9000/user/hive/warehouse/company.db` on HDFS."
+ "Creates a schema named `catalog_hive.company` in Hive, with its location
set to`hdfs://hive:9000/user/hive/warehouse/company.db` on HDFS."
]
},
{
@@ -64,11 +62,9 @@
"source": [
"import os\n",
"\n",
- "hive_host_ip=os.getenv('HIVE_HOST_IP')\n",
- "\n",
- "trino_client.execute(f\"\"\"\n",
+ "trino_client.execute(\"\"\"\n",
"CREATE SCHEMA catalog_hive.company\n",
- " WITH (location =
'hdfs://{hive_host_ip}:9000/user/hive/warehouse/company.db')\n",
+ " WITH (location = 'hdfs://hive:9000/user/hive/warehouse/company.db')\n",
"\"\"\").fetchall()"
]
},
diff --git a/init/jupyter/gravitino_llamaIndex_demo.ipynb
b/init/jupyter/gravitino_llamaIndex_demo.ipynb
index 3386a10..ccd5d27 100644
--- a/init/jupyter/gravitino_llamaIndex_demo.ipynb
+++ b/init/jupyter/gravitino_llamaIndex_demo.ipynb
@@ -85,9 +85,7 @@
"from gravitino import NameIdentifier, GravitinoClient, Catalog, Fileset,
GravitinoAdminClient\n",
"import os \n",
"\n",
- "gravitino_host_ip=os.getenv('GRAVITINO_HOST_IP')\n",
- "\n",
- "gravitino_url = f\"http://{gravitino_host_ip}:8090\"\n",
+ "gravitino_url = \"http://gravitino:8090\"\n",
"metalake_name = \"metalake_demo\"\n",
"\n",
"catalog_name = \"catalog_fileset\"\n",
@@ -234,9 +232,7 @@
"from sqlalchemy.sql.expression import select, text\n",
"import os \n",
"\n",
- "trino_host_ip=os.getenv('TRINO_HOST_IP')\n",
- "\n",
- "trino_engine =
create_engine(f\"trino://admin@{trino_host_ip}:8080/catalog_mysql/demo_llamaindex\")\n",
+ "trino_engine =
create_engine(\"trino://admin@trino:8080/catalog_mysql/demo_llamaindex\")\n",
"\n",
"connection = trino_engine.connect();\n",
"\n",
diff --git a/init/spark/init.sh b/init/spark/init.sh
index e3e7aba..49b8f8b 100644
--- a/init/spark/init.sh
+++ b/init/spark/init.sh
@@ -19,10 +19,6 @@
mkdir -p /opt/spark/conf
cp /tmp/spark/spark-defaults.conf /opt/spark/conf
-# inject host information
-sed -i 's/__GRAVITINO_HOST_IP__/'"$GRAVITINO_HOST_IP"'/g'
/opt/spark/conf/spark-defaults.conf
-sed -i 's/__HIVE_HOST_IP__/'"$HIVE_HOST_IP"'/g'
/opt/spark/conf/spark-defaults.conf
-
cp /tmp/spark/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar
/opt/spark/jars/iceberg-spark-runtime-3.4_2.12-1.5.2.jar
cp
/tmp/spark/packages/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar
/opt/spark/jars/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar
cp /tmp/spark/packages/mysql-connector-java-8.0.27.jar
/opt/spark/jars/mysql-connector-java-8.0.27.jar
diff --git a/init/spark/spark-defaults.conf b/init/spark/spark-defaults.conf
index fdf1a2c..446f865 100644
--- a/init/spark/spark-defaults.conf
+++ b/init/spark/spark-defaults.conf
@@ -18,14 +18,14 @@
#
spark.plugins org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin
-spark.sql.gravitino.uri http://__GRAVITINO_HOST_IP__:8090
+spark.sql.gravitino.uri http://gravitino:8090
spark.sql.gravitino.metalake metalake_demo
spark.sql.gravitino.enableIcebergSupport true
spark.sql.extensions
org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
spark.sql.catalog.catalog_rest org.apache.iceberg.spark.SparkCatalog
spark.sql.catalog.catalog_rest.type rest
-spark.sql.catalog.catalog_rest.uri http://__GRAVITINO_HOST_IP__:9001/iceberg/
+spark.sql.catalog.catalog_rest.uri http://gravitino:9001/iceberg/
spark.locality.wait.node 0
-spark.sql.warehouse.dir hdfs://__HIVE_HOST_IP__:9000/user/hive/warehouse
+spark.sql.warehouse.dir hdfs://hive:9000/user/hive/warehouse
spark.sql.hive.metastore.jars path
spark.sql.hive.metastore.jars.path file:///opt/spark/jars/*
diff --git a/init/trino/init.sh b/init/trino/init.sh
index a74f609..7433c67 100644
--- a/init/trino/init.sh
+++ b/init/trino/init.sh
@@ -30,7 +30,7 @@ while [ $counter -le 240 ]; do
echo "Wait for the initialization of services"
sleep 5
else
- trino --execute "create schema catalog_hive.sales with (location =
'hdfs://${HIVE_HOST_IP}:9000/user/hive/warehouse/sales.db');"
+ trino --execute "create schema catalog_hive.sales with (location =
'hdfs://hive:9000/user/hive/warehouse/sales.db');"
echo "Import the data of the Hive warehouse"
trino </tmp/trino/init.sql
echo "Import ends"
diff --git a/playground.sh b/playground.sh
index 3b1d537..7a0359f 100755
--- a/playground.sh
+++ b/playground.sh
@@ -37,18 +37,6 @@ testDocker() {
fi
}
-testK8s() {
- echo "Testing K8s environment ..."
- kubectl cluster-info
- if [ $? -eq 0 ]; then
- echo "INFO: K8s is working correctly!"
- else
- echo "ERROR: There was an issue running kubectl cluster-info, please check
you K8s cluster."
- exit 1
- fi
-}
-
-
checkCompose() {
isExist=$(which docker-compose)
if [ $isExist ]; then
@@ -59,33 +47,6 @@ checkCompose() {
fi
}
-checkHelm() {
- isExist=$(which helm)
- if [ $isExist ]; then
- true # Placeholder, do nothing
- else
- echo "ERROR: Helm command not found, Please install helm v3."
- exit
- fi
- # check version
- # version will be like:
- # Version:"v3.15.2"
- regex="Version:\"(v[0-9]\.[0-9]+\.[0-9])\""
- version=$(helm version)
- echo "$version"
- if [[ $version =~ $regex ]]; then
- major_version="${BASH_REMATCH[1]}"
- echo "$major_version"
- if [[ $major_version =~ "v3" ]]; then
- echo "INFO: helm check PASS."
- return
- else
- echo "ERROR: Please install helm v3"
- exit
- fi
- fi
-}
-
checkPortInUse() {
local port=$1
if [[ "$(uname)" == "Darwin" ]]; then
@@ -104,20 +65,12 @@ checkPortInUse() {
start() {
echo "INFO: Starting the playground..."
- case "$runtime" in
- k8s)
- testK8s
- checkHelm
- ;;
- docker)
- testDocker
- checkCompose
- ports=(8090 9001 3307 19000 19083 60070 13306 15342 18080 18888 19090
13000)
- for port in "${ports[@]}"; do
- checkPortInUse ${port}
- done
- ;;
- esac
+ testDocker
+ checkCompose
+ ports=(8090 9001 3307 19000 19083 60070 13306 15342 18080 18888 19090
13000)
+ for port in "${ports[@]}"; do
+ checkPortInUse ${port}
+ done
cd ${playground_dir}
echo "Preparing packages..."
@@ -125,76 +78,40 @@ start() {
./init/gravitino/gravitino-dependency.sh
./init/jupyter/jupyter-dependency.sh
- case "$runtime" in
- k8s)
- helm upgrade --install gravitino-playground ./helm-chart/ \
- --create-namespace --namespace gravitino-playground \
- --set projectRoot=$(pwd)
- ;;
- docker)
- logSuffix=$(date +%Y%m%d%H%m%s)
- if [ "$enableRanger" == true ]; then
- docker-compose -f docker-compose.yaml -f
docker-enable-ranger-hive-override.yaml up --detach
- else
- docker-compose up --detach
- fi
-
- docker compose logs -f >${playground_dir}/playground-${logSuffix}.log 2>&1
&
- echo "Check log details: ${playground_dir}/playground-${logSuffix}.log"
- ;;
- esac
+ logSuffix=$(date +%Y%m%d%H%m%s)
+ if [ "$enableRanger" == true ]; then
+ docker-compose -f docker-compose.yaml -f
docker-enable-ranger-hive-override.yaml up --detach
+ else
+ docker-compose up --detach
+ fi
+
+ docker compose logs -f >${playground_dir}/playground-${logSuffix}.log
2>&1 &
+ echo "Check log details: ${playground_dir}/playground-${logSuffix}.log"
}
status() {
- case "$runtime" in
- k8s)
- kubectl -n gravitino-playground get pods -o wide
- ;;
- docker)
- docker-compose ps -a
- ;;
- esac
+ docker-compose ps -a
}
stop() {
echo "INFO: Stopping the playground..."
- case "$runtime" in
- k8s)
- helm uninstall --namespace gravitino-playground gravitino-playground
- ;;
- docker)
- docker-compose down
- if [ $? -eq 0 ]; then
- echo "INFO: Playground stopped!"
- fi
- ;;
- esac
+ docker-compose down
+ if [ $? -eq 0 ]; then
+ echo "INFO: Playground stopped!"
+ fi
}
-runtime=""
-
case "$1" in
-k8s)
- runtime="k8s";
- ;;
-docker)
- runtime="docker";
- ;;
-*)
- echo "ERROR: please specify which runtime you want to use, available
runtime: [docker|k8s]"
-esac
-
-case "$2" in
start)
- if [[ "$3" == "-y" ]]; then
+ if [[ "$2" == "-y" ]]; then
input="y"
else
echo "The playground requires 2 CPU cores, 8 GB of RAM, and 25 GB of disk
storage to operate efficiently."
read -r -p "Confirm the requirement is available in your OS [Y/n]:" input
fi
- if [[ "$4" == "--enable-ranger" || "$3" == "--enable-ranger" ]]; then
+ if [[ "$2" == "--enable-ranger" || "$3" == "--enable-ranger" ]]; then
enableRanger=true
else
enableRanger=false
@@ -219,7 +136,7 @@ stop)
stop
;;
*)
- echo "Usage: $0 [k8s|docker] [start | status | stop]"
+ echo "Usage: $0 [start | status | stop]"
exit 1
;;
esac