(doris) branch branch-4.1 updated: branch-4.1: [upgrade](docker)upgrade iceberg docker spark version to 4.0 (#61149) (#61838)

yiguolei Fri, 27 Mar 2026 19:33:08 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new eed7e9e3aa2 branch-4.1: [upgrade](docker)upgrade iceberg docker spark 
version to 4.0 (#61149) (#61838)
eed7e9e3aa2 is described below

commit eed7e9e3aa24f6d6f84959b4a827db5b4862d7ef
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Fri Mar 27 19:32:55 2026 -0700

    branch-4.1: [upgrade](docker)upgrade iceberg docker spark version to 4.0 
(#61149) (#61838)
    
    bp #61149
    
    Co-authored-by: daidai <[email protected]>
---
 .../docker-compose/iceberg/entrypoint.sh.tpl       |  14 +-
 .../docker-compose/iceberg/iceberg.yaml.tpl        |  14 +-
 .../create_preinstalled_scripts/paimon/run02.sql   | 190 ++++++++++-----------
 .../create_preinstalled_scripts/paimon/run09.sql   |   1 +
 .../docker-compose/iceberg/spark-defaults.conf     |  40 +++--
 docker/thirdparties/run-thirdparties-docker.sh     |  18 +-
 .../paimon/test_paimon_partition_table.out         |  38 ++---
 .../paimon/test_paimon_partition_table.groovy      |  14 +-
 .../paimon/test_paimon_schema_change.groovy        |  39 +++--
 9 files changed, 189 insertions(+), 179 deletions(-)

diff --git a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl 
b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
index a722514bbb4..4232b4f3cc1 100644
--- a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
@@ -25,9 +25,16 @@ done
 
 set -ex
 
-# remove 
/opt/spark/jars/iceberg-aws-bundle-1.5.0.jar\:/opt/spark/jars/iceberg-spark-runtime-3.5_2.12-1.5.0.jar
-rm /opt/spark/jars/iceberg-aws-bundle-1.5.0.jar
-rm /opt/spark/jars/iceberg-spark-runtime-3.5_2.12-1.5.0.jar
+mkdir -p /opt/spark/events
+
+for f in /opt/spark/sbin/*; do
+  ln -s $f /usr/local/bin/$(basename $f)
+done
+
+for f in /opt/spark/bin/*; do
+  ln -s $f /usr/local/bin/$(basename $f)
+done
+
 
 start-master.sh -p 7077
 start-worker.sh spark://doris--spark-iceberg:7077
@@ -54,7 +61,6 @@ END_TIME2=$(date +%s)
 EXECUTION_TIME2=$((END_TIME2 - START_TIME2))
 echo "Script paimon total: {} executed in $EXECUTION_TIME2 seconds"
 
-
 START_TIME3=$(date +%s)
 find /mnt/scripts/create_preinstalled_scripts/iceberg_load -name '*.sql' | sed 
's|^|source |' | sed 's|$|;|'> iceberg_load_total.sql
 spark-sql --master spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
 -f iceberg_load_total.sql 
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl 
b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
index bb86c5742ba..83c1ee6d031 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
@@ -20,7 +20,7 @@ version: "3"
 services:
 
   spark-iceberg:
-    image: tabulario/spark-iceberg:3.5.1_1.5.0
+    image: apache/spark:4.0.0
     container_name: doris--spark-iceberg
     hostname: doris--spark-iceberg
     depends_on:
@@ -29,20 +29,20 @@ services:
       mc:
         condition: service_completed_successfully
     volumes:
-      - ./data/output/spark-warehouse:/home/iceberg/warehouse
-      - ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks
+      - ./data/output/spark-warehouse:/opt/spark/warehouse
       - ./data:/mnt/data
       - ./scripts:/mnt/scripts
       - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
-      - 
./data/input/jars/paimon-spark-3.5-1.0.1.jar:/opt/spark/jars/paimon-spark-3.5-1.0.1.jar
-      - 
./data/input/jars/paimon-s3-1.0.1.jar:/opt/spark/jars/paimon-s3-1.0.1.jar
-      - 
./data/input/jars/iceberg-aws-bundle-1.10.0.jar:/opt/spark/jars/iceberg-aws-bundle-1.10.0.jar
-      - 
./data/input/jars/iceberg-spark-runtime-3.5_2.12-1.10.0.jar:/opt/spark/jars/iceberg-spark-runtime-3.5_2.12-1.10.0.jar
+      - 
./data/input/jars/iceberg-aws-bundle-1.10.1.jar:/opt/spark/jars/iceberg-aws-bundle-1.10.1.jar
+      - 
./data/input/jars/iceberg-spark-runtime-4.0_2.13-1.10.1.jar:/opt/spark/jars/iceberg-spark-runtime-4.0_2.13-1.10.1.jar
+      - 
./data/input/jars/paimon-s3-1.3.1.jar:/opt/spark/jars/paimon-s3-1.3.1.jar
+      - 
./data/input/jars/paimon-spark-4.0-1.3.1.jar:/opt/spark/jars/paimon-spark-4.0-1.3.1.jar
     environment:
       - AWS_ACCESS_KEY_ID=admin
       - AWS_SECRET_ACCESS_KEY=password
       - AWS_REGION=us-east-1
     entrypoint: /bin/sh /mnt/scripts/entrypoint.sh
+    user: root
     networks:
       - doris--iceberg
     healthcheck:
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql
index 1d199a2bec8..a8024a8b660 100644
--- 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run02.sql
@@ -4,102 +4,100 @@ create database if not exists test_paimon_schema_change;
 
 use test_paimon_schema_change;
 
-CREATE TABLE sc_orc_pk (
-    id INT,
-    name STRING,
-    age INT
-) USING paimon
-TBLPROPERTIES ('primary-key' = 'id', "file.format" = 
"orc",'deletion-vectors.enabled' = 'true');
-
-INSERT INTO sc_orc_pk (id, name, age) VALUES (1, 'Alice', 30), (2, 'Bob', 25);
-INSERT INTO sc_orc_pk (id, name, age) VALUES (3, 'Charlie', 28);
-ALTER TABLE sc_orc_pk ADD COLUMNS (city STRING);
-INSERT INTO sc_orc_pk (id, name, age, city) VALUES (4, 'Charlie', 28, 'New 
York');
-INSERT INTO sc_orc_pk (id, name, age, city) VALUES (5, 'David', 32, 'Los 
Angeles');
-ALTER TABLE sc_orc_pk RENAME COLUMN name TO full_name;
-INSERT INTO sc_orc_pk (id, full_name, age, city) VALUES (6, 'David', 35, 'Los 
Angeles');
-INSERT INTO sc_orc_pk (id, full_name, age, city) VALUES (7, 'Eve', 27, 'San 
Francisco');
-ALTER TABLE sc_orc_pk DROP COLUMN age;
-INSERT INTO sc_orc_pk (id, full_name, city) VALUES (8, 'Eve', 'San Francisco');
-INSERT INTO sc_orc_pk (id, full_name, city) VALUES (9, 'Frank', 'Chicago');
-ALTER TABLE sc_orc_pk CHANGE COLUMN id id BIGINT;
-INSERT INTO sc_orc_pk (id, full_name, city) VALUES (10000000000, 'Frank', 
'Chicago');
-INSERT INTO sc_orc_pk (id, full_name, city) VALUES (10, 'Grace', 'Seattle');
-
-ALTER TABLE sc_orc_pk ADD COLUMN salary DECIMAL(10,2) FIRST;
-INSERT INTO sc_orc_pk (id, full_name, city, salary) VALUES (11, 'Grace', 
'Seattle', 5000.00);
-INSERT INTO sc_orc_pk (id, full_name, city, salary) VALUES (12, 'Heidi', 
'Boston', 6000.00);
-
-ALTER TABLE sc_orc_pk RENAME COLUMN city TO location;
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (13, 'Heidi', 
'Boston', 6000.00);
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (14, 'Ivan', 
'Miami', 7000.00);
-
-ALTER TABLE sc_orc_pk CHANGE COLUMN salary salary DECIMAL(12,2);
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (15, 'Ivan', 
'Miami', 7000.00);
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (16, 'Judy', 
'Denver', 8000.00);
-
-ALTER TABLE sc_orc_pk ALTER COLUMN salary AFTER location;
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (17, 'Stm', 
'ttttt', 8000.00);
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (18, 'Ken', 
'Austin', 9000.00);
-
-ALTER TABLE sc_orc_pk ALTER COLUMN full_name FIRST;
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (19, 'AAAA', 
'BBBB', 9000.00);
-INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (20, 'Laura', 
'Portland', 10000.00);
-
-
-
-
-
-CREATE TABLE sc_parquet_pk (
-    id INT,
-    name STRING,
-    age INT
-) USING paimon
-TBLPROPERTIES ('primary-key' = 'id',"file.format" = 
"parquet",'deletion-vectors.enabled' = 'true');
-
-INSERT INTO sc_parquet_pk (id, name, age) VALUES (1, 'Alice', 30), (2, 'Bob', 
25);
-INSERT INTO sc_parquet_pk (id, name, age) VALUES (3, 'Charlie', 28);
-
-ALTER TABLE sc_parquet_pk ADD COLUMNS (city STRING);
-INSERT INTO sc_parquet_pk (id, name, age, city) VALUES (3, 'Charlie', 28, 'New 
York');
-INSERT INTO sc_parquet_pk (id, name, age, city) VALUES (4, 'David', 32, 'Los 
Angeles');
-
-ALTER TABLE sc_parquet_pk RENAME COLUMN name TO full_name;
-INSERT INTO sc_parquet_pk (id, full_name, age, city) VALUES (4, 'David', 35, 
'Los Angeles');
-INSERT INTO sc_parquet_pk (id, full_name, age, city) VALUES (5, 'Eve', 27, 
'San Francisco');
-
-ALTER TABLE sc_parquet_pk DROP COLUMN age;
-INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (5, 'Eve', 'San 
Francisco');
-INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (6, 'Frank', 'Chicago');
-
-ALTER TABLE sc_parquet_pk CHANGE COLUMN id id BIGINT;
-INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (10000000000, 'Frank', 
'Chicago');
-INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (7, 'Grace', 'Seattle');
-
-ALTER TABLE sc_parquet_pk ADD COLUMN salary DECIMAL(10,2) FIRST;
-INSERT INTO sc_parquet_pk (id, full_name, city, salary) VALUES (6, 'Grace', 
'Seattle', 5000.00);
-INSERT INTO sc_parquet_pk (id, full_name, city, salary) VALUES (8, 'Heidi', 
'Boston', 6000.00);
-
-ALTER TABLE sc_parquet_pk RENAME COLUMN city TO location;
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (7, 
'Heidi', 'Boston', 6000.00);
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (9, 'Ivan', 
'Miami', 7000.00);
-
-ALTER TABLE sc_parquet_pk CHANGE COLUMN salary salary DECIMAL(12,2);
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (8, 'Ivan', 
'Miami', 7000.00);
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (10, 
'Judy', 'Denver', 8000.00);
-
-ALTER TABLE sc_parquet_pk ALTER COLUMN salary AFTER location;
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (9, 'Stm', 
'ttttt', 8000.00);
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (11, 'Ken', 
'Austin', 9000.00);
-
-ALTER TABLE sc_parquet_pk ALTER COLUMN full_name FIRST;
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (10, 
'AAAA', 'BBBB', 9000.00);
-INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (12, 
'Laura', 'Portland', 10000.00);
-
-
-
-
-
+-- paimon 1.3 : org.apache.spark.sql.execution.QueryExecutionException: 
java.lang.UnsupportedOperationException: Cannot update primary key
+-- https://github.com/apache/incubator-paimon/pull/6264
+
+-- CREATE TABLE sc_orc_pk (
+--     id INT,
+--     name STRING,
+--     age INT
+-- ) USING paimon
+-- TBLPROPERTIES ('primary-key' = 'id', "file.format" = 
"orc",'deletion-vectors.enabled' = 'true');
+
+-- INSERT INTO sc_orc_pk (id, name, age) VALUES (1, 'Alice', 30), (2, 'Bob', 
25);
+-- INSERT INTO sc_orc_pk (id, name, age) VALUES (3, 'Charlie', 28);
+-- ALTER TABLE sc_orc_pk ADD COLUMNS (city STRING);
+-- INSERT INTO sc_orc_pk (id, name, age, city) VALUES (4, 'Charlie', 28, 'New 
York');
+-- INSERT INTO sc_orc_pk (id, name, age, city) VALUES (5, 'David', 32, 'Los 
Angeles');
+-- ALTER TABLE sc_orc_pk RENAME COLUMN name TO full_name;
+-- INSERT INTO sc_orc_pk (id, full_name, age, city) VALUES (6, 'David', 35, 
'Los Angeles');
+-- INSERT INTO sc_orc_pk (id, full_name, age, city) VALUES (7, 'Eve', 27, 'San 
Francisco');
+-- ALTER TABLE sc_orc_pk DROP COLUMN age;
+-- INSERT INTO sc_orc_pk (id, full_name, city) VALUES (8, 'Eve', 'San 
Francisco');
+-- INSERT INTO sc_orc_pk (id, full_name, city) VALUES (9, 'Frank', 'Chicago');
+-- ALTER TABLE sc_orc_pk CHANGE COLUMN id id BIGINT;
+-- INSERT INTO sc_orc_pk (id, full_name, city) VALUES (10000000000, 'Frank', 
'Chicago');
+-- INSERT INTO sc_orc_pk (id, full_name, city) VALUES (10, 'Grace', 'Seattle');
+
+-- ALTER TABLE sc_orc_pk ADD COLUMN salary DECIMAL(10,2) FIRST;
+-- INSERT INTO sc_orc_pk (id, full_name, city, salary) VALUES (11, 'Grace', 
'Seattle', 5000.00);
+-- INSERT INTO sc_orc_pk (id, full_name, city, salary) VALUES (12, 'Heidi', 
'Boston', 6000.00);
+
+-- ALTER TABLE sc_orc_pk RENAME COLUMN city TO location;
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (13, 
'Heidi', 'Boston', 6000.00);
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (14, 'Ivan', 
'Miami', 7000.00);
+
+-- ALTER TABLE sc_orc_pk CHANGE COLUMN salary salary DECIMAL(12,2);
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (15, 'Ivan', 
'Miami', 7000.00);
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (16, 'Judy', 
'Denver', 8000.00);
+
+-- ALTER TABLE sc_orc_pk ALTER COLUMN salary AFTER location;
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (17, 'Stm', 
'ttttt', 8000.00);
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (18, 'Ken', 
'Austin', 9000.00);
+
+-- ALTER TABLE sc_orc_pk ALTER COLUMN full_name FIRST;
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (19, 'AAAA', 
'BBBB', 9000.00);
+-- INSERT INTO sc_orc_pk (id, full_name, location, salary) VALUES (20, 
'Laura', 'Portland', 10000.00);
+
+
+
+
+
+-- CREATE TABLE sc_parquet_pk (
+--     id INT,
+--     name STRING,
+--     age INT
+-- ) USING paimon
+-- TBLPROPERTIES ('primary-key' = 'id', "file.format" = 
"parquet",'deletion-vectors.enabled' = 'true');
+
+-- INSERT INTO sc_parquet_pk (id, name, age) VALUES (1, 'Alice', 30), (2, 
'Bob', 25);
+-- INSERT INTO sc_parquet_pk (id, name, age) VALUES (3, 'Charlie', 28);
+
+-- ALTER TABLE sc_parquet_pk ADD COLUMNS (city STRING);
+-- INSERT INTO sc_parquet_pk (id, name, age, city) VALUES (3, 'Charlie', 28, 
'New York');
+-- INSERT INTO sc_parquet_pk (id, name, age, city) VALUES (4, 'David', 32, 
'Los Angeles');
+
+-- ALTER TABLE sc_parquet_pk RENAME COLUMN name TO full_name;
+-- INSERT INTO sc_parquet_pk (id, full_name, age, city) VALUES (4, 'David', 
35, 'Los Angeles');
+-- INSERT INTO sc_parquet_pk (id, full_name, age, city) VALUES (5, 'Eve', 27, 
'San Francisco');
+
+-- ALTER TABLE sc_parquet_pk DROP COLUMN age;
+-- INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (5, 'Eve', 'San 
Francisco');
+-- INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (6, 'Frank', 
'Chicago');
+
+-- ALTER TABLE sc_parquet_pk CHANGE COLUMN id id BIGINT;
+-- INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (10000000000, 
'Frank', 'Chicago');
+-- INSERT INTO sc_parquet_pk (id, full_name, city) VALUES (7, 'Grace', 
'Seattle');
+
+-- ALTER TABLE sc_parquet_pk ADD COLUMN salary DECIMAL(10,2) FIRST;
+-- INSERT INTO sc_parquet_pk (id, full_name, city, salary) VALUES (6, 'Grace', 
'Seattle', 5000.00);
+-- INSERT INTO sc_parquet_pk (id, full_name, city, salary) VALUES (8, 'Heidi', 
'Boston', 6000.00);
+
+-- ALTER TABLE sc_parquet_pk RENAME COLUMN city TO location;
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (7, 
'Heidi', 'Boston', 6000.00);
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (9, 
'Ivan', 'Miami', 7000.00);
+
+-- ALTER TABLE sc_parquet_pk CHANGE COLUMN salary salary DECIMAL(12,2);
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (8, 
'Ivan', 'Miami', 7000.00);
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (10, 
'Judy', 'Denver', 8000.00);
+
+-- ALTER TABLE sc_parquet_pk ALTER COLUMN salary AFTER location;
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (9, 
'Stm', 'ttttt', 8000.00);
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (11, 
'Ken', 'Austin', 9000.00);
+
+-- ALTER TABLE sc_parquet_pk ALTER COLUMN full_name FIRST;
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (10, 
'AAAA', 'BBBB', 9000.00);
+-- INSERT INTO sc_parquet_pk (id, full_name, location, salary) VALUES (12, 
'Laura', 'Portland', 10000.00);
 
 
 
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
index 2c3998c4d90..6e1f71cdb0a 100644
--- 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run09.sql
@@ -141,4 +141,5 @@ INSERT INTO 
test_paimon_time_travel_db.tbl_time_travel_expired_tag VALUES
 CALL sys.create_tag(table => 
'test_paimon_time_travel_db.tbl_time_travel_expired_tag', tag => 't_exp_3', 
snapshot => 3);
 
 -- expire snapshots so tag points to expired snapshot file
+ ALTER TABLE test_paimon_time_travel_db.tbl_time_travel_expired_tag SET 
TBLPROPERTIES ('snapshot.num-retained.min' = '1');
 CALL sys.expire_snapshots(table => 
'test_paimon_time_travel_db.tbl_time_travel_expired_tag', retain_max => 1);
diff --git a/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf 
b/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf
index a49dc2173b7..8336a2afcf8 100644
--- a/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf
+++ b/docker/thirdparties/docker-compose/iceberg/spark-defaults.conf
@@ -19,23 +19,27 @@
 # This is useful for setting default environmental settings.
 
 # Example:
-spark.sql.session.timeZone             Asia/Shanghai
-spark.sql.catalog.demo                 org.apache.iceberg.spark.SparkCatalog
-spark.sql.catalog.demo.type            rest
-spark.sql.catalog.demo.uri             http://rest:8181
-spark.sql.catalog.demo.io-impl         org.apache.iceberg.aws.s3.S3FileIO
-spark.sql.catalog.demo.warehouse       s3://warehouse/wh/
-spark.sql.catalog.demo.s3.endpoint     http://minio:9000
-spark.sql.defaultCatalog               demo
-spark.eventLog.enabled                 true
-spark.eventLog.dir                     /home/iceberg/spark-events
-spark.history.fs.logDirectory          /home/iceberg/spark-events
-spark.sql.catalogImplementation        in-memory
+spark.sql.session.timeZone                        Asia/Shanghai
+spark.sql.catalog.demo                            
org.apache.iceberg.spark.SparkCatalog
+spark.sql.catalog.demo.type                       rest
+spark.sql.catalog.demo.uri                        http://rest:8181
+spark.sql.catalog.demo.io-impl                    
org.apache.iceberg.aws.s3.S3FileIO
+spark.sql.catalog.demo.warehouse                  s3://warehouse/wh/
+spark.sql.catalog.demo.s3.endpoint                http://minio:9000
+spark.sql.catalog.demo.s3.path-style-access       true
+spark.sql.defaultCatalog                          demo
+
+
+spark.eventLog.enabled                            true
+spark.eventLog.dir                                /opt/spark/events
+spark.history.fs.logDirectory                     /opt/spark/events
+
+spark.sql.catalogImplementation                   in-memory
 
 # paimon
-spark.sql.catalog.paimon               org.apache.paimon.spark.SparkCatalog
-spark.sql.catalog.paimon.warehouse     s3://warehouse/wh
-spark.sql.catalog.paimon.s3.endpoint   http://minio:9000
-spark.sql.catalog.paimon.s3.access-key admin
-spark.sql.catalog.paimon.s3.secret-key password
-spark.sql.catalog.paimon.s3.region     us-east-1
\ No newline at end of file
+spark.sql.catalog.paimon                          
org.apache.paimon.spark.SparkCatalog
+spark.sql.catalog.paimon.warehouse                s3://warehouse/wh
+spark.sql.catalog.paimon.s3.endpoint              http://minio:9000
+spark.sql.catalog.paimon.s3.access-key            admin
+spark.sql.catalog.paimon.s3.secret-key            password
+spark.sql.catalog.paimon.s3.region                us-east-1
\ No newline at end of file
diff --git a/docker/thirdparties/run-thirdparties-docker.sh 
b/docker/thirdparties/run-thirdparties-docker.sh
index cd8540d5d32..12e2e9b7ba4 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -584,27 +584,15 @@ start_iceberg() {
             echo "${ICEBERG_DIR}/data does not exist"
             cd "${ICEBERG_DIR}" \
             && rm -f iceberg_data*.zip \
-            && wget -P "${ROOT}"/docker-compose/iceberg 
https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_data_paimon_101.zip
 \
-            && sudo unzip iceberg_data_paimon_101.zip \
+            && wget -P "${ROOT}"/docker-compose/iceberg 
https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_data_spark40.zip
 \
+            && sudo unzip iceberg_data_spark40.zip \
             && sudo mv iceberg_data data \
-            && sudo rm -rf iceberg_data_paimon_101.zip
+            && sudo rm -rf iceberg_data_spark40.zip
             cd -
         else
             echo "${ICEBERG_DIR}/data exist, continue !"
         fi
 
-        if [[ ! -f 
"${ICEBERG_DIR}/data/input/jars/iceberg-aws-bundle-1.10.0.jar" ]]; then
-            echo "iceberg 1.10.0 jars does not exist"
-            cd "${ICEBERG_DIR}" \
-            && rm -f iceberg_1_10_0*.jars.tar.gz\
-            && wget -P "${ROOT}"/docker-compose/iceberg 
https://"${s3BucketName}.${s3Endpoint}"/regression/datalake/pipeline_data/iceberg_1_10_0.jars.tar.gz
 \
-            && sudo tar xzvf iceberg_1_10_0.jars.tar.gz -C "data/input/jars" \
-            && sudo rm -rf iceberg_1_10_0.jars.tar.gz
-            cd -
-        else 
-            echo "iceberg 1.10.0 jars exist, continue !"
-        fi
-
         sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml 
--env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d --wait
     fi
 }
diff --git 
a/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out 
b/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out
index f5a5af51ad0..3230ad01841 100644
--- 
a/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out
+++ 
b/regression-test/data/external_table_p0/paimon/test_paimon_partition_table.out
@@ -1,30 +1,30 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !show_partition_sales_by_date --
-sale_date=2024-01-15   sale_date       2       2051    1
-sale_date=2024-01-16   sale_date       2       3899    2
-sale_date=2024-01-17   sale_date       1       1959    1
+sale_date=2024-01-15   sale_date       2       2216    1
+sale_date=2024-01-16   sale_date       2       4217    2
+sale_date=2024-01-17   sale_date       1       2118    1
 
 -- !show_partition_sales_by_region --
-region=China-Beijing   region  1       2240    1
-region=Japan-Tokyo     region  1       2233    1
-region=USA-California  region  1       2268    1
+region=China-Beijing   region  1       2425    1
+region=Japan-Tokyo     region  1       2420    1
+region=USA-California  region  1       2454    1
 
 -- !show_partition_sales_by_date_region --
-sale_date=2024-01-15/region=China-Beijing      sale_date,region        1       
2426    1
-sale_date=2024-01-15/region=Japan-Tokyo        sale_date,region        1       
2412    1
-sale_date=2024-01-15/region=USA-California     sale_date,region        1       
2454    1
-sale_date=2024-01-16/region=China-Shanghai     sale_date,region        1       
2433    1
-sale_date=2024-01-16/region=Japan-Osaka        sale_date,region        1       
2433    1
-sale_date=2024-01-16/region=USA-New York       sale_date,region        1       
2440    1
+sale_date=2024-01-15/region=China-Beijing      sale_date,region        1       
2627    1
+sale_date=2024-01-15/region=Japan-Tokyo        sale_date,region        1       
2614    1
+sale_date=2024-01-15/region=USA-California     sale_date,region        1       
2655    1
+sale_date=2024-01-16/region=China-Shanghai     sale_date,region        1       
2636    1
+sale_date=2024-01-16/region=Japan-Osaka        sale_date,region        1       
2636    1
+sale_date=2024-01-16/region=USA-New York       sale_date,region        1       
2643    1
 
 -- !show_partition_events_by_hour --
-hour_partition=2024-01-15-10   hour_partition  2       2181    1
-hour_partition=2024-01-15-11   hour_partition  2       4170    2
-hour_partition=2024-01-15-14   hour_partition  2       2190    1
+hour_partition=2024-01-15-10   hour_partition  2       2361    1
+hour_partition=2024-01-15-11   hour_partition  2       4515    2
+hour_partition=2024-01-15-14   hour_partition  2       2371    1
 
 -- !show_partition_logs_by_date_hierarchy --
-year_val=2024/month_val=1/day_val=15   year_val,month_val,day_val      2       
2628    1
-year_val=2024/month_val=1/day_val=16   year_val,month_val,day_val      2       
4918    2
-year_val=2024/month_val=1/day_val=17   year_val,month_val,day_val      1       
2456    1
-year_val=2024/month_val=2/day_val=1    year_val,month_val,day_val      1       
2485    1
+year_val=2024/month_val=1/day_val=15   year_val,month_val,day_val      2       
2841    1
+year_val=2024/month_val=1/day_val=16   year_val,month_val,day_val      2       
5323    2
+year_val=2024/month_val=1/day_val=17   year_val,month_val,day_val      1       
2658    1
+year_val=2024/month_val=2/day_val=1    year_val,month_val,day_val      1       
2686    1
 
diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy
 
b/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy
index f1073977b1a..b17eb3cb934 100644
--- 
a/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy
+++ 
b/regression-test/suites/external_table_p0/paimon/test_paimon_partition_table.groovy
@@ -52,8 +52,18 @@ suite("test_paimon_partition_table", 
"p0,external,doris,external_docker,external
            String baseQueryName = "qt_show_partition_${tableName}"
            "$baseQueryName" """show partitions from ${tableName};"""
        }
-
-
+/*
+mysql> show partitions from sales_by_date;
++----------------------+--------------+-------------+-----------------+-----------+
+| Partition            | PartitionKey | RecordCount | FileSizeInBytes | 
FileCount |
++----------------------+--------------+-------------+-----------------+-----------+
+| sale_date=2024-01-15 | sale_date    | 2           | 2051            | 1      
   |
+| sale_date=2024-01-16 | sale_date    | 2           | 3899            | 2      
   |
+| sale_date=2024-01-17 | sale_date    | 1           | 1959            | 1      
   |
++----------------------+--------------+-------------+-----------------+-----------+
+3 rows in set (0.01 sec)
+FileSizeInBytes maybe changed, when upgrade paimon version.
+*/
     } finally {
          sql """drop catalog if exists ${catalog_name}"""
     }
diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy
 
b/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy
index 04a7200d6c2..139d34eb491 100644
--- 
a/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy
+++ 
b/regression-test/suites/external_table_p0/paimon/test_paimon_schema_change.groovy
@@ -41,25 +41,28 @@ suite("test_paimon_schema_change", 
"p0,external,doris,external_docker,external_d
         sql """use test_paimon_schema_change """ 
 
 
-        qt_desc_1  """ desc sc_parquet_pk """
 
-        qt_parquet_pk_1  """SELECT * FROM sc_parquet_pk order by id;"""
-        qt_parquet_pk_2  """SELECT full_name, location FROM sc_parquet_pk 
order by id;"""
-        qt_parquet_pk_3  """SELECT * FROM sc_parquet_pk WHERE salary IS  NULL 
order by id;"""
-        qt_parquet_pk_4  """SELECT * FROM sc_parquet_pk WHERE salary IS NOT 
NULL order by id;"""
-        qt_parquet_pk_5  """SELECT * FROM sc_parquet_pk WHERE location = 'New 
York' OR location = 'Los Angeles'  order by id;"""
-        qt_parquet_pk_6  """SELECT * FROM sc_parquet_pk WHERE id > 5 order by 
id;"""
-        qt_parquet_pk_7  """SELECT * FROM sc_parquet_pk WHERE salary > 6000 
order by id;"""
 
+        // paimon 1.3 : 
org.apache.spark.sql.execution.QueryExecutionException: 
java.lang.UnsupportedOperationException: Cannot update primary key
+        // https://github.com/apache/incubator-paimon/pull/6264
+        // qt_desc_1  """ desc sc_parquet_pk """
+        // qt_parquet_pk_1  """SELECT * FROM sc_parquet_pk order by id;"""
+        // qt_parquet_pk_2  """SELECT full_name, location FROM sc_parquet_pk 
order by id;"""
+        // qt_parquet_pk_3  """SELECT * FROM sc_parquet_pk WHERE salary IS  
NULL order by id;"""
+        // qt_parquet_pk_4  """SELECT * FROM sc_parquet_pk WHERE salary IS NOT 
NULL order by id;"""
+        // qt_parquet_pk_5  """SELECT * FROM sc_parquet_pk WHERE location = 
'New York' OR location = 'Los Angeles'  order by id;"""
+        // qt_parquet_pk_6  """SELECT * FROM sc_parquet_pk WHERE id > 5 order 
by id;"""
+        // qt_parquet_pk_7  """SELECT * FROM sc_parquet_pk WHERE salary > 6000 
order by id;"""
 
-        qt_desc_2 """ desc sc_orc_pk """
-        qt_orc_pk_1 """SELECT * FROM sc_orc_pk order by id;"""
-        qt_orc_pk_2 """SELECT full_name, location FROM sc_orc_pk order by 
id;"""
-        qt_orc_pk_3 """SELECT * FROM sc_orc_pk WHERE salary IS  NULL order by 
id;"""
-        qt_orc_pk_4 """SELECT * FROM sc_orc_pk WHERE salary IS NOT NULL order 
by id;"""
-        qt_orc_pk_5 """SELECT * FROM sc_orc_pk WHERE location = 'New York' OR 
location = 'Los Angeles'  order by id;"""
-        qt_orc_pk_6 """SELECT * FROM sc_orc_pk WHERE id > 5 order by id;"""
-        qt_orc_pk_7 """SELECT * FROM sc_orc_pk WHERE salary > 6000 order by 
id;"""
+
+        // qt_desc_2 """ desc sc_orc_pk """
+        // qt_orc_pk_1 """SELECT * FROM sc_orc_pk order by id;"""
+        // qt_orc_pk_2 """SELECT full_name, location FROM sc_orc_pk order by 
id;"""
+        // qt_orc_pk_3 """SELECT * FROM sc_orc_pk WHERE salary IS  NULL order 
by id;"""
+        // qt_orc_pk_4 """SELECT * FROM sc_orc_pk WHERE salary IS NOT NULL 
order by id;"""
+        // qt_orc_pk_5 """SELECT * FROM sc_orc_pk WHERE location = 'New York' 
OR location = 'Los Angeles'  order by id;"""
+        // qt_orc_pk_6 """SELECT * FROM sc_orc_pk WHERE id > 5 order by id;"""
+        // qt_orc_pk_7 """SELECT * FROM sc_orc_pk WHERE salary > 6000 order by 
id;"""
 
 
 
@@ -76,8 +79,8 @@ suite("test_paimon_schema_change", 
"p0,external,doris,external_docker,external_d
         qt_orc_3  """select * from sc_orc where k <= 1;"""
 
 
-        qt_count_1 """ select count(*) from sc_parquet_pk;"""
-        qt_count_2 """ select count(*) from sc_orc_pk;"""
+        // qt_count_1 """ select count(*) from sc_parquet_pk;"""
+        // qt_count_2 """ select count(*) from sc_orc_pk;"""
         qt_count_3 """ select count(*) from sc_parquet;"""
         qt_count_4 """ select count(*) from sc_orc;"""
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-4.1 updated: branch-4.1: [upgrade](docker)upgrade iceberg docker spark version to 4.0 (#61149) (#61838)

Reply via email to