This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git
The following commit(s) were added to refs/heads/main by this push:
new 0e8b873 Update for 0.6.0-rc4 (#68)
0e8b873 is described below
commit 0e8b8739440de243cc8e4143eb7a15573117bb3c
Author: roryqi <[email protected]>
AuthorDate: Wed Aug 28 20:54:05 2024 +0800
Update for 0.6.0-rc4 (#68)
Co-authored-by: Shaofeng Shi <[email protected]>
---
README.md | 10 ++++-----
docker-compose.yaml | 6 ++---
init/gravitino/gravitino.conf | 20 ++++++++++++-----
init/gravitino/init.sh | 3 +++
init/jupyter/gravitino-fileset-example.ipynb | 33 +++++++++++++---------------
init/jupyter/gravitino_llamaIndex_demo.ipynb | 13 +++++------
6 files changed, 45 insertions(+), 40 deletions(-)
diff --git a/README.md b/README.md
index bb77f27..722abab 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
## Playground introduction
-The playground is a complete Apache Gravitino Docker runtime environment with
`Hive`, `HDFS`, `Trino`, `MySQL`, `PostgreSQL`, `Jupter`, and a `Gravitino`
server.
+The playground is a complete Apache Gravitino Docker runtime environment with
`Hive`, `HDFS`, `Trino`, `MySQL`, `PostgreSQL`, `Jupyter`, and a `Gravitino`
server.
Depending on your network and computer, startup time may take 3-5 minutes.
Once the playground environment has started, you can open
[http://localhost:8090](http://localhost:8090) in a browser to access the
Gravitino Web UI.
@@ -156,7 +156,7 @@ WHERE e.employee_id = p.employee_id AND p.employee_id =
s.employee_id
GROUP BY e.employee_id, given_name, family_name;
```
-### Using Iceberg REST service
+### Using Apache Iceberg REST service
If you want to migrate your business from Hive to Iceberg. Some tables will
use Hive, and the other tables will use Iceberg.
Gravitino provides an Iceberg REST catalog service, too. You can use Spark to
access REST catalog to write the table data.
@@ -179,7 +179,7 @@ docker exec -it playground-spark bash
```
```shell
-spark@container_id:/$ cd /opt/spark && /bin/bash bin/spark-sql
+spark@container_id:/$ cd /opt/spark && /bin/bash bin/spark-sql
```
```SQL
@@ -187,7 +187,7 @@ use catalog_iceberg;
create database sales;
use sales;
create table customers (customer_id int, customer_name varchar(100),
customer_email varchar(100));
-describe extended customers;
+describe extended customers;
insert into customers (customer_id, customer_name, customer_email) values
(11,'Rory Brown','[email protected]');
insert into customers (customer_id, customer_name, customer_email) values
(12,'Jerry Washington','[email protected]');
```
@@ -200,7 +200,7 @@ docker exec -it playground-trino bash
```
```shell
-trino@container_id:/$ trino
+trino@container_id:/$ trino
```
```SQL
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 648ea27..8ac75e5 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -19,7 +19,7 @@
version: '3.0'
services:
hive:
- image: datastrato/hive:2.7.3-no-yarn
+ image: apache/gravitino-playground:hive-2.7.3
ports:
- "3307:3306"
- "19000:9000"
@@ -43,7 +43,7 @@ services:
memory: 3G
gravitino:
- image: datastrato/gravitino:0.5.1
+ image: apache/gravitino:0.6.0-incubating-rc4
entrypoint: /bin/bash /tmp/gravitino/init.sh
ports:
- "8090:8090"
@@ -69,7 +69,7 @@ services:
retries: 5
trino:
- image: datastrato/trino:435-gravitino-0.5.1
+ image: apache/gravitino-playground:trino-435-gravitino-0.6.0-incubating-rc4
ports:
- "18080:8080"
container_name: playground-trino
diff --git a/init/gravitino/gravitino.conf b/init/gravitino/gravitino.conf
index 1435404..1adc75f 100755
--- a/init/gravitino/gravitino.conf
+++ b/init/gravitino/gravitino.conf
@@ -42,11 +42,18 @@ gravitino.server.webserver.responseHeaderSize = 131072
# THE CONFIGURATION FOR Gravitino ENTITY STORE
# The entity store to use
-gravitino.entity.store = kv
-# The RocksDB entity store
-gravitino.entity.store.kv = RocksDBKvBackend
-# The RocksDB backend path for entity store
-# gravitino.entity.store.kv.rocksdbPath = /tmp/gravitino
+gravitino.entity.store = relational
+# The backend for the entity store, we only supports JDBC
+gravitino.entity.store.relational = JDBCBackend
+
+# The JDBC URL for the entity store
+gravitino.entity.store.relational.jdbcUrl = jdbc:h2
+# The JDBC driver class name
+gravitino.entity.store.relational.jdbcDriver = org.h2.Driver
+# The JDBC user name
+gravitino.entity.store.relational.jdbcUser = gravitino
+# The JDBC password
+gravitino.entity.store.relational.jdbcPassword = gravitino
# THE CONFIGURATION FOR Gravitino CATALOG
# The interval in milliseconds to evict the catalog cache
@@ -56,12 +63,13 @@ gravitino.catalog.cache.evictionIntervalMs = 3600000
# Auxiliary service names, separate by ','
gravitino.auxService.names = iceberg-rest
# Iceberg REST service classpath
-gravitino.auxService.iceberg-rest.classpath = catalogs/lakehouse-iceberg/libs,
catalogs/lakehouse-iceberg/conf
+gravitino.auxService.iceberg-rest.classpath = iceberg-rest-server/libs,
iceberg-rest-server/conf
# Iceberg REST service host
gravitino.auxService.iceberg-rest.host = 0.0.0.0
# Iceberg REST service http port
gravitino.auxService.iceberg-rest.httpPort = 9001
gravitino.auxService.iceberg-rest.catalog-backend = jdbc
+gravitino.auxService.iceberg-rest.catalog-backend-name = catalog_iceberg
gravitino.auxService.iceberg-rest.uri = jdbc:mysql://mysql:3306/db
gravitino.auxService.iceberg-rest.warehouse =
hdfs://hive:9000/user/iceberg/warehouse/
gravitino.auxService.iceberg-rest.jdbc.user = mysql
diff --git a/init/gravitino/init.sh b/init/gravitino/init.sh
index 66f34b6..1c01423 100644
--- a/init/gravitino/init.sh
+++ b/init/gravitino/init.sh
@@ -21,6 +21,9 @@ wget
https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.27/mysql-conn
wget https://jdbc.postgresql.org/download/postgresql-42.7.0.jar -O
/root/gravitino/catalogs/jdbc-postgresql/libs/postgresql-42.7.0.jar
cp /root/gravitino/catalogs/jdbc-postgresql/libs/postgresql-42.7.0.jar
/root/gravitino/catalogs/lakehouse-iceberg/libs
cp /root/gravitino/catalogs/jdbc-mysql/libs/mysql-connector-java-8.0.27.jar
/root/gravitino/catalogs/lakehouse-iceberg/libs
+
+cp /root/gravitino/catalogs/jdbc-postgresql/libs/postgresql-42.7.0.jar
/root/gravitino/iceberg-rest-server/libs
+cp /root/gravitino/catalogs/jdbc-mysql/libs/mysql-connector-java-8.0.27.jar
/root/gravitino/iceberg-rest-server/libs
cp /tmp/gravitino/gravitino.conf /root/gravitino/conf
echo "Finish downloading"
echo "Start the Gravitino Server"
diff --git a/init/jupyter/gravitino-fileset-example.ipynb
b/init/jupyter/gravitino-fileset-example.ipynb
index 60c3ae2..a399093 100644
--- a/init/jupyter/gravitino-fileset-example.ipynb
+++ b/init/jupyter/gravitino-fileset-example.ipynb
@@ -35,7 +35,7 @@
"metadata": {},
"outputs": [],
"source": [
- "pip install gravitino==0.5.1"
+ "pip install apache-gravitino==0.6.0rc4"
]
},
{
@@ -53,8 +53,7 @@
"\n",
"# Create metalake via Gravitino admin client\n",
"metalake_name=\"default\"\n",
- "metalake_ident=NameIdentifier.of(metalake_name)\n",
- "metalake =
gravitino_admin_client.create_metalake(ident=metalake_ident,\n",
+ "metalake = gravitino_admin_client.create_metalake(name=metalake_name,\n",
" comment=\"metalake
comment\", \n",
" properties={})\n",
"print(metalake)"
@@ -95,9 +94,8 @@
"source": [
"# Create catalog via Gravition client\n",
"catalog_name=\"catalog\"\n",
- "catalog_ident=NameIdentifier.of_catalog(metalake_name, catalog_name)\n",
"\n",
- "catalog = gravitino_client.create_catalog(ident=catalog_ident,\n",
+ "catalog = gravitino_client.create_catalog(name=catalog_name,\n",
"
catalog_type=Catalog.Type.FILESET,\n",
" provider=\"hadoop\", \n",
" comment=\"\",\n",
@@ -113,7 +111,7 @@
"outputs": [],
"source": [
"# Load catalog entity via Gravition client\n",
- "catalog = gravitino_client.load_catalog(ident=catalog_ident)\n",
+ "catalog = gravitino_client.load_catalog(name=catalog_name)\n",
"print(catalog)"
]
},
@@ -129,8 +127,7 @@
"schema_path=\"/user/datastrato/\"+schema_name\n",
"schema_hdfs_path=\"hdfs://hive:9000\"+schema_path\n",
"\n",
- "schema_ident: NameIdentifier = NameIdentifier.of_schema(metalake_name,
catalog_name, schema_name)\n",
- "catalog.as_schemas().create_schema(ident=schema_ident, \n",
+ "catalog.as_schemas().create_schema(schema_name=schema_name, \n",
" comment=\"\", \n",
"
properties={\"location\":schema_hdfs_path})\n",
"\n",
@@ -155,7 +152,7 @@
"managed_fileset_path=\"/user/datastrato/\"+schema_name+\"/\"+managed_fileset_name\n",
"managed_fileset_hdfs_path=\"hdfs://hive:9000\"+managed_fileset_path\n",
"\n",
- "managed_fileset_ident: NameIdentifier =
NameIdentifier.of_fileset(metalake_name, catalog_name, schema_name,
managed_fileset_name)\n",
+ "managed_fileset_ident: NameIdentifier = NameIdentifier.of(schema_name,
managed_fileset_name)\n",
"catalog.as_fileset_catalog().create_fileset(ident=managed_fileset_ident,\n",
"
fileset_type=Fileset.Type.MANAGED,\n",
" comment=\"\",\n",
@@ -187,12 +184,12 @@
"try:\n",
" info = hdfs_client.status(external_fileset_path)\n",
" print(f\"Success: The storage location {external_fileset_path} was
successfully created.\")\n",
- " print(\"Details:\", info) # print hdfs path detail informations\n",
+ " print(\"Details:\", info) # print hdfs path detail information\n",
"except Exception:\n",
" print(f\"Faild: The storage location {external_fileset_path} was not
successfully created.\")\n",
"\n",
- "# Create a external type of fileset\n",
- "external_fileset_ident: NameIdentifier =
NameIdentifier.of_fileset(metalake_name, catalog_name, schema_name,
external_fileset_name)\n",
+ "# Create an external type of fileset\n",
+ "external_fileset_ident: NameIdentifier = NameIdentifier.of(schema_name,
external_fileset_name)\n",
"catalog.as_fileset_catalog().create_fileset(ident=external_fileset_ident,\n",
"
fileset_type=Fileset.Type.EXTERNAL,\n",
" comment=\"\",\n",
@@ -208,7 +205,7 @@
"outputs": [],
"source": [
"# List all fileset\n",
- "catalog = gravitino_client.load_catalog(ident=catalog_ident)\n",
+ "catalog = gravitino_client.load_catalog(name=catalog_name)\n",
"fileset_list: List[NameIdentifier] =
catalog.as_fileset_catalog().list_filesets(namespace=managed_fileset_ident.namespace())\n",
"print(fileset_list)"
]
@@ -221,7 +218,7 @@
"outputs": [],
"source": [
"# Load managed fileset\n",
-
"managed_fileset=gravitino_client.load_catalog(ident=catalog_ident).as_fileset_catalog().load_fileset(ident=managed_fileset_ident)\n",
+
"managed_fileset=gravitino_client.load_catalog(name=catalog_name).as_fileset_catalog().load_fileset(ident=managed_fileset_ident)\n",
"print(managed_fileset)"
]
},
@@ -233,7 +230,7 @@
"outputs": [],
"source": [
"# Load external fileset\n",
-
"external_fileset=gravitino_client.load_catalog(ident=catalog_ident).as_fileset_catalog().load_fileset(ident=external_fileset_ident)\n",
+
"external_fileset=gravitino_client.load_catalog(name=catalog_name).as_fileset_catalog().load_fileset(ident=external_fileset_ident)\n",
"print(external_fileset)"
]
},
@@ -281,7 +278,7 @@
"outputs": [],
"source": [
"# Drop schema\n",
- "catalog.as_schemas().drop_schema(ident=schema_ident, cascade=True)\n",
+ "catalog.as_schemas().drop_schema(schema_name=schema_name,
cascade=True)\n",
"\n",
"# Check schema location if successfully deleted\n",
"try:\n",
@@ -299,7 +296,7 @@
"outputs": [],
"source": [
"# Drop catalog\n",
- "result=gravitino_client.drop_catalog(ident=catalog_ident)\n",
+ "result=gravitino_client.drop_catalog(name=catalog_name)\n",
"print(result)"
]
},
@@ -311,7 +308,7 @@
"outputs": [],
"source": [
"# Drop metalake\n",
- "result=gravitino_admin_client.drop_metalake(metalake_ident)\n",
+ "result=gravitino_admin_client.drop_metalake(metalake_name)\n",
"print(result)"
]
}
diff --git a/init/jupyter/gravitino_llamaIndex_demo.ipynb
b/init/jupyter/gravitino_llamaIndex_demo.ipynb
index 138a631..19ba0ad 100644
--- a/init/jupyter/gravitino_llamaIndex_demo.ipynb
+++ b/init/jupyter/gravitino_llamaIndex_demo.ipynb
@@ -53,7 +53,7 @@
"%pip install llama-index-readers-wikipedia\n",
"%pip install llama-index-llms-openai\n",
"%pip install llama-index\n",
- "%pip install gravitino==0.5.0.dev25\n",
+ "%pip install apache-gravitino==0.6.0rc4\n",
"%pip install sqlalchemy-trino"
]
},
@@ -86,16 +86,13 @@
"\n",
"gravitino_url = \"http://gravitino:8090\"\n",
"metalake_name = \"metalake_demo\"\n",
- "metalake_ident = NameIdentifier.of_metalake(metalake_name)\n",
"\n",
"catalog_name = \"catalog_fileset\"\n",
- "catalog_ident = NameIdentifier.of_catalog(metalake_name, catalog_name)\n",
"\n",
"schema_name = \"countries\"\n",
- "schema_ident = NameIdentifier.of_schema(metalake_name, catalog_name,
schema_name)\n",
"\n",
"fileset_name = \"cities\"\n",
- "fileset_ident = NameIdentifier.of_fileset(metalake_name, catalog_name,
schema_name, fileset_name)\n",
+ "fileset_ident = NameIdentifier.of(schema_name, fileset_name)\n",
"\n",
"gravitino_admin_client = GravitinoAdminClient(uri=gravitino_url)\n",
"gravitino_client = GravitinoClient(uri=gravitino_url,
metalake_name=metalake_name)"
@@ -154,9 +151,9 @@
"# Create schema and fileset\n",
"schema_countries = None\n",
"try:\n",
- " schema_countries =
demo_catalog.as_schemas().load_schema(ident=schema_ident)\n",
+ " schema_countries =
demo_catalog.as_schemas().load_schema(schema_name=schema_name)\n",
"except Exception as e:\n",
- " schema_countries =
demo_catalog.as_schemas().create_schema(ident=schema_ident,\n",
+ " schema_countries =
demo_catalog.as_schemas().create_schema(schema_name=schema_name,\n",
"
comment=\"countries\",\n",
"
properties={})\n",
"print(schema_countries)\n",
@@ -199,7 +196,7 @@
"loaded_catalog_demo = gravitino_client.load_catalog(name=catalog_name)\n",
"print(loaded_catalog_demo)\n",
"\n",
- "loaded_schema_countries =
loaded_catalog_demo.as_schemas().load_schema(ident=schema_ident)\n",
+ "loaded_schema_countries =
loaded_catalog_demo.as_schemas().load_schema(schema_name=schema_name)\n",
"print(loaded_schema_countries)\n",
"\n",
"loaded_fileset_cities =
loaded_catalog_demo.as_fileset_catalog().load_fileset(ident=fileset_ident)\n",