This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 60d57d3ab6f [opt](iceberg docker)Use PostgreSQL as the backend for the
Iceberg REST server. (#46289)
60d57d3ab6f is described below
commit 60d57d3ab6fb6b2d1baae00280770905379b2dc8
Author: wuwenchi <[email protected]>
AuthorDate: Wed Jan 8 00:07:59 2025 +0800
[opt](iceberg docker)Use PostgreSQL as the backend for the Iceberg REST
server. (#46289)
### What problem does this PR solve?
Problem Summary:
Use PostgreSQL as the backend for the Iceberg REST server. SQLite does
not support concurrent writes. When multiple engines attempt to operate
on the same database simultaneously, a database lock error will occur.
---
.../docker-compose/iceberg/entrypoint.sh.tpl | 37 +++++++++++-----------
.../docker-compose/iceberg/iceberg.yaml.tpl | 19 +++++++++++
2 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
index a4b27bdd6c0..45d9bbf3592 100644
--- a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
@@ -23,24 +23,25 @@ start-worker.sh spark://doris--spark-iceberg:7077
start-history-server.sh
start-thriftserver.sh --driver-java-options "-Dderby.system.home=/tmp/derby"
-
-
-ls /mnt/scripts/create_preinstalled_scripts/iceberg/*.sql | xargs -n 1 -I {}
bash -c '
- START_TIME=$(date +%s)
- spark-sql --master spark://doris--spark-iceberg:7077 --conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
-f {}
- END_TIME=$(date +%s)
- EXECUTION_TIME=$((END_TIME - START_TIME))
- echo "Script: {} executed in $EXECUTION_TIME seconds"
-'
-
-ls /mnt/scripts/create_preinstalled_scripts/paimon/*.sql | xargs -n 1 -I {}
bash -c '
- START_TIME=$(date +%s)
- spark-sql --master spark://doris--spark-iceberg:7077 --conf
spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
-f {}
- END_TIME=$(date +%s)
- EXECUTION_TIME=$((END_TIME - START_TIME))
- echo "Script: {} executed in $EXECUTION_TIME seconds"
-'
-
+# The creation of a Spark SQL client is time-consuming,
+# and reopening a new client for each SQL file execution leads to significant
overhead.
+# To reduce the time spent on creating clients,
+# we group these files together and execute them using a single client.
+# This approach can reduce the time from 150s to 40s.
+
+START_TIME1=$(date +%s)
+find /mnt/scripts/create_preinstalled_scripts/iceberg -name '*.sql' | sed
's|^|source |' | sed 's|$|;|'> iceberg_total.sql
+spark-sql --master spark://doris--spark-iceberg:7077 --conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
-f iceberg_total.sql
+END_TIME1=$(date +%s)
+EXECUTION_TIME1=$((END_TIME1 - START_TIME1))
+echo "Script iceberg total: {} executed in $EXECUTION_TIME1 seconds"
+
+START_TIME2=$(date +%s)
+find /mnt/scripts/create_preinstalled_scripts/paimon -name '*.sql' | sed
's|^|source |' | sed 's|$|;|'> paimon_total.sql
+spark-sql --master spark://doris--spark-iceberg:7077 --conf
spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
-f paimon_total.sql
+END_TIME2=$(date +%s)
+EXECUTION_TIME2=$((END_TIME2 - START_TIME2))
+echo "Script paimon total: {} executed in $EXECUTION_TIME2 seconds"
touch /mnt/SUCCESS;
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
index 38491f645a9..fa4f8d1cca4 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
@@ -18,6 +18,7 @@
version: "3"
services:
+
spark-iceberg:
image: tabulario/spark-iceberg
container_name: doris--spark-iceberg
@@ -47,6 +48,19 @@ services:
interval: 5s
timeout: 120s
retries: 120
+
+ postgres:
+ image: postgis/postgis:14-3.3
+ container_name: doris--postgres
+ environment:
+ POSTGRES_PASSWORD: 123456
+ POSTGRES_USER: root
+ POSTGRES_DB: iceberg
+ volumes:
+ - ./data/input/pgdata:/var/lib/postgresql/data
+ networks:
+ - doris--iceberg
+
rest:
image: tabulario/iceberg-rest
container_name: doris--iceberg-rest
@@ -54,6 +68,8 @@ services:
- ${REST_CATALOG_PORT}:8181
volumes:
- ./data:/mnt/data
+ depends_on:
+ - postgres
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
@@ -61,6 +77,9 @@ services:
- CATALOG_WAREHOUSE=s3a://warehouse/wh/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
+ - CATALOG_URI=jdbc:postgresql://postgres:5432/iceberg
+ - CATALOG_JDBC_USER=root
+ - CATALOG_JDBC_PASSWORD=123456
networks:
- doris--iceberg
entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]