This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 60d57d3ab6f [opt](iceberg docker)Use PostgreSQL as the backend for the 
Iceberg REST server.  (#46289)
60d57d3ab6f is described below

commit 60d57d3ab6fb6b2d1baae00280770905379b2dc8
Author: wuwenchi <[email protected]>
AuthorDate: Wed Jan 8 00:07:59 2025 +0800

    [opt](iceberg docker)Use PostgreSQL as the backend for the Iceberg REST 
server.  (#46289)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    Use PostgreSQL as the backend for the Iceberg REST server. SQLite does
    not support concurrent writes. When multiple engines attempt to operate
    on the same database simultaneously, a database lock error will occur.
---
 .../docker-compose/iceberg/entrypoint.sh.tpl       | 37 +++++++++++-----------
 .../docker-compose/iceberg/iceberg.yaml.tpl        | 19 +++++++++++
 2 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl 
b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
index a4b27bdd6c0..45d9bbf3592 100644
--- a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
@@ -23,24 +23,25 @@ start-worker.sh spark://doris--spark-iceberg:7077
 start-history-server.sh
 start-thriftserver.sh --driver-java-options "-Dderby.system.home=/tmp/derby"
 
-
-
-ls /mnt/scripts/create_preinstalled_scripts/iceberg/*.sql | xargs -n 1 -I {} 
bash -c '
-    START_TIME=$(date +%s)
-    spark-sql --master spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
 -f {} 
-    END_TIME=$(date +%s)
-    EXECUTION_TIME=$((END_TIME - START_TIME))
-    echo "Script: {} executed in $EXECUTION_TIME seconds"
-'
-
-ls /mnt/scripts/create_preinstalled_scripts/paimon/*.sql | xargs -n 1 -I {} 
bash -c '
-    START_TIME=$(date +%s)
-    spark-sql --master  spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
 -f {} 
-    END_TIME=$(date +%s)
-    EXECUTION_TIME=$((END_TIME - START_TIME))
-    echo "Script: {} executed in $EXECUTION_TIME seconds"
-'
-
+# The creation of a Spark SQL client is time-consuming,
+# and reopening a new client for each SQL file execution leads to significant 
overhead.
+# To reduce the time spent on creating clients,
+# we group these files together and execute them using a single client.
+# This approach can reduce the time from 150s to 40s.
+
+START_TIME1=$(date +%s)
+find /mnt/scripts/create_preinstalled_scripts/iceberg -name '*.sql' | sed 
's|^|source |' | sed 's|$|;|'> iceberg_total.sql
+spark-sql --master spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
 -f iceberg_total.sql 
+END_TIME1=$(date +%s)
+EXECUTION_TIME1=$((END_TIME1 - START_TIME1))
+echo "Script iceberg total: {} executed in $EXECUTION_TIME1 seconds"
+
+START_TIME2=$(date +%s)
+find /mnt/scripts/create_preinstalled_scripts/paimon -name '*.sql' | sed 
's|^|source |' | sed 's|$|;|'> paimon_total.sql
+spark-sql --master  spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
 -f paimon_total.sql
+END_TIME2=$(date +%s)
+EXECUTION_TIME2=$((END_TIME2 - START_TIME2))
+echo "Script paimon total: {} executed in $EXECUTION_TIME2 seconds"
 
 touch /mnt/SUCCESS;
 
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl 
b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
index 38491f645a9..fa4f8d1cca4 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
@@ -18,6 +18,7 @@
 version: "3"
 
 services:
+
   spark-iceberg:
     image: tabulario/spark-iceberg
     container_name: doris--spark-iceberg
@@ -47,6 +48,19 @@ services:
       interval: 5s
       timeout: 120s
       retries: 120
+
+  postgres:
+    image: postgis/postgis:14-3.3
+    container_name: doris--postgres
+    environment:
+      POSTGRES_PASSWORD: 123456
+      POSTGRES_USER: root
+      POSTGRES_DB: iceberg
+    volumes:
+      - ./data/input/pgdata:/var/lib/postgresql/data
+    networks:
+      - doris--iceberg
+
   rest:
     image: tabulario/iceberg-rest
     container_name: doris--iceberg-rest
@@ -54,6 +68,8 @@ services:
       - ${REST_CATALOG_PORT}:8181
     volumes:
       - ./data:/mnt/data
+    depends_on:
+      - postgres
     environment:
       - AWS_ACCESS_KEY_ID=admin
       - AWS_SECRET_ACCESS_KEY=password
@@ -61,6 +77,9 @@ services:
       - CATALOG_WAREHOUSE=s3a://warehouse/wh/
       - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
       - CATALOG_S3_ENDPOINT=http://minio:9000
+      - CATALOG_URI=jdbc:postgresql://postgres:5432/iceberg
+      - CATALOG_JDBC_USER=root
+      - CATALOG_JDBC_PASSWORD=123456
     networks:
       - doris--iceberg
     entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to