This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new af7b2cefdac branch-4.1:[fix](iceberg)fix iceberg write special 
partition error. (#64225)
af7b2cefdac is described below

commit af7b2cefdacab6c25877bd0ed86514e688991e3e
Author: daidai <[email protected]>
AuthorDate: Tue Jun 9 10:07:54 2026 +0800

    branch-4.1:[fix](iceberg)fix iceberg write special partition error. (#64225)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 .../sink/writer/iceberg/iceberg_partition_path.cpp | 52 +++++++++++++
 .../sink/writer/iceberg/iceberg_partition_path.h   | 33 ++++++++
 .../sink/writer/iceberg/viceberg_table_writer.cpp  |  4 +-
 .../writer/iceberg/iceberg_partition_path_test.cpp | 40 ++++++++++
 .../write/test_iceberg_write_partition_path.groovy | 88 ++++++++++++++++++++++
 5 files changed, 215 insertions(+), 2 deletions(-)

diff --git a/be/src/exec/sink/writer/iceberg/iceberg_partition_path.cpp 
b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.cpp
new file mode 100644
index 00000000000..ecde5ffba3f
--- /dev/null
+++ b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.cpp
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/sink/writer/iceberg/iceberg_partition_path.h"
+
+namespace doris {
+
+namespace {
+
+bool is_unescaped_url_encoder_char(unsigned char ch) {
+    return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') || ('0' <= ch 
&& ch <= '9') ||
+           ch == '.' || ch == '-' || ch == '*' || ch == '_';
+}
+
+char hex_digit(unsigned char value) {
+    return value < 10 ? static_cast<char>('0' + value) : static_cast<char>('A' 
+ value - 10);
+}
+
+} // namespace
+
+std::string IcebergPartitionPath::escape(const std::string& path) {
+    std::string escaped;
+    escaped.reserve(path.size());
+    for (unsigned char ch : path) {
+        if (is_unescaped_url_encoder_char(ch)) {
+            escaped.push_back(static_cast<char>(ch));
+        } else if (ch == ' ') {
+            escaped.push_back('+');
+        } else {
+            escaped.push_back('%');
+            escaped.push_back(hex_digit(ch >> 4));
+            escaped.push_back(hex_digit(ch & 0x0F));
+        }
+    }
+    return escaped;
+}
+
+} // namespace doris
diff --git a/be/src/exec/sink/writer/iceberg/iceberg_partition_path.h 
b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.h
new file mode 100644
index 00000000000..0b34493e228
--- /dev/null
+++ b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+namespace doris {
+
+class IcebergPartitionPath {
+public:
+    // Match Iceberg Java PartitionSpec.partitionToPath, which uses URLEncoder 
with UTF-8.
+    static std::string escape(const std::string& path);
+
+private:
+    IcebergPartitionPath() = default;
+};
+
+} // namespace doris
diff --git a/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp 
b/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
index c79aa3262bf..dc5f2522122 100644
--- a/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
+++ b/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
@@ -25,10 +25,10 @@
 #include "core/column/column_vector.h"
 #include "core/data_type/data_type_nullable.h"
 #include "core/data_type_serde/data_type_serde.h"
+#include "exec/sink/writer/iceberg/iceberg_partition_path.h"
 #include "exec/sink/writer/iceberg/partition_transformers.h"
 #include "exec/sink/writer/iceberg/viceberg_partition_writer.h"
 #include "exec/sink/writer/iceberg/viceberg_sort_writer.h"
-#include "exec/sink/writer/vhive_utils.h"
 #include "exprs/vexpr.h"
 #include "exprs/vexpr_context.h"
 #include "format/table/iceberg/partition_spec_parser.h"
@@ -517,7 +517,7 @@ std::string VIcebergTableWriter::_partition_to_path(const 
doris::iceberg::Struct
 }
 
 std::string VIcebergTableWriter::_escape(const std::string& path) {
-    return VHiveUtils::escape_path_name(path);
+    return IcebergPartitionPath::escape(path);
 }
 
 std::vector<std::string> VIcebergTableWriter::_partition_values(
diff --git a/be/test/exec/sink/writer/iceberg/iceberg_partition_path_test.cpp 
b/be/test/exec/sink/writer/iceberg/iceberg_partition_path_test.cpp
new file mode 100644
index 00000000000..fc024bfb213
--- /dev/null
+++ b/be/test/exec/sink/writer/iceberg/iceberg_partition_path_test.cpp
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/sink/writer/iceberg/iceberg_partition_path.h"
+
+#include <gtest/gtest.h>
+
+namespace doris {
+
+class IcebergPartitionPathTest : public testing::Test {};
+
+TEST_F(IcebergPartitionPathTest, 
test_escape_matches_iceberg_partition_spec_path_encoding) {
+    EXPECT_EQ("", IcebergPartitionPath::escape(""));
+    EXPECT_EQ("abcXYZ012.-*_", IcebergPartitionPath::escape("abcXYZ012.-*_"));
+    EXPECT_EQ("with+space", IcebergPartitionPath::escape("with space"));
+    EXPECT_EQ("slash%2Fcolon%3Aequals%3Dpercent%25question%3F",
+              
IcebergPartitionPath::escape("slash/colon:equals=percent%question?"));
+    EXPECT_EQ("quote%22hash%23brackets%5B%5Dcaret%5E",
+              IcebergPartitionPath::escape("quote\"hash#brackets[]caret^"));
+    EXPECT_EQ("tilde%7Ebang%21plus%2B", 
IcebergPartitionPath::escape("tilde~bang!plus+"));
+    EXPECT_EQ(
+            "with%CC%81combining+character",
+            IcebergPartitionPath::escape(std::string("with") + "\xCC\x81" + 
"combining character"));
+}
+
+} // namespace doris
diff --git 
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_partition_path.groovy
 
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_partition_path.groovy
new file mode 100644
index 00000000000..5a1946e20aa
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_partition_path.groovy
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_write_partition_path", 
"p0,external,iceberg,external_docker,external_docker_iceberg") {
+    String enabled = context.config.otherConfigs.get("enableIcebergTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("disable iceberg test.")
+        return
+    }
+
+    String catalog_name = "test_iceberg_write_partition_path"
+    String db_name = "test_partition_path_db"
+    String table_name = "test_partition_path_tbl"
+    String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+    String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+    sql """drop catalog if exists ${catalog_name}"""
+    sql """
+        create catalog if not exists ${catalog_name} properties (
+            "type" = "iceberg",
+            "iceberg.catalog.type" = "rest",
+            "uri" = "http://${externalEnvIp}:${rest_port}";,
+            "s3.access_key" = "admin",
+            "s3.secret_key" = "password",
+            "s3.endpoint" = "http://${externalEnvIp}:${minio_port}";,
+            "s3.region" = "us-east-1"
+        )
+    """
+
+    try {
+        sql """switch ${catalog_name}"""
+        sql """drop database if exists ${db_name} force"""
+        sql """create database ${db_name}"""
+        sql """use ${db_name}"""
+
+        sql """drop table if exists ${table_name}"""
+        sql """
+            create table ${table_name} (
+                id bigint,
+                part_col string
+            ) engine=iceberg
+            partition by list (part_col) ()
+            properties (
+                "format-version" = "2",
+                "write-format" = "parquet",
+                "write.format.default" = "parquet"
+            )
+        """
+
+        sql """
+            insert into ${table_name} values
+            (1, concat('with', unhex('CC81'), 'combining character')),
+            (2, 'slash/colon:equals=percent%question?')
+        """
+
+        List<List<Object>> rows = sql """select id, hex(part_col) from 
${table_name} order by id"""
+        assertEquals(2, rows.size())
+        assertEquals("1", rows[0][0].toString())
+        assertEquals("77697468CC81636F6D62696E696E6720636861726163746572", 
rows[0][1].toString())
+        assertEquals("2", rows[1][0].toString())
+        
assertEquals("736C6173682F636F6C6F6E3A657175616C733D70657263656E74257175657374696F6E3F",
 rows[1][1].toString())
+
+        List<String> filePaths = sql("""select file_path from 
${table_name}\$files order by file_path""")
+                .collect { row -> row[0].toString() }
+        logger.info("Iceberg partition file paths: ${filePaths}")
+        assertTrue(filePaths.any { path -> 
path.contains("part_col=with%CC%81combining+character") },
+                "Expected Iceberg URL-encoded UTF-8 partition path, actual 
paths: ${filePaths}")
+        assertTrue(filePaths.any { path -> 
path.contains("part_col=slash%2Fcolon%3Aequals%3Dpercent%25question%3F") },
+                "Expected Iceberg URL-encoded special-character partition 
path, actual paths: ${filePaths}")
+    } finally {
+        sql """drop database if exists ${catalog_name}.${db_name} force"""
+        sql """drop catalog if exists ${catalog_name}"""
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to