This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 16f2dda352c [fix](paimon) infer manifest format from split file format 
in cpp reader (#60795)
16f2dda352c is described below

commit 16f2dda352c8b4c573e413e6597067547c379a30
Author: Chenjunwei <[email protected]>
AuthorDate: Sun Mar 8 05:03:39 2026 +0800

    [fix](paimon) infer manifest format from split file format in cpp reader 
(#60795)
    
    ## Problem
    Followup #60676
    
    When FE does not pass full table options in scan ranges, paimon-cpp may
    default manifest.format to avro.
    For non-avro environments, this can fail in PaimonCppReader
    initialization with:
    Could not find a FileFormatFactory implementation class for format avro.
    
    ## Solution
    In PaimonCppReader::_build_options, if split-level file_format exists
    and table options are missing/empty:
    - set file.format from split file_format
    - set manifest.format from split file_format
    
    This keeps paimon-cpp format resolution consistent with the actual split
    format and avoids unintended avro fallback.
    
    ## Verification
    - Incremental BE build succeeded for doris_be target.
    - Change scope is limited to
    be/src/vec/exec/format/table/paimon_cpp_reader.cpp.
---
 be/src/vec/exec/format/table/paimon_cpp_reader.cpp | 17 ++++++
 .../paimon/test_paimon_cpp_reader.groovy           | 70 ++++++++++++++++++++++
 2 files changed, 87 insertions(+)

diff --git a/be/src/vec/exec/format/table/paimon_cpp_reader.cpp 
b/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
index 0bb89d0c73e..756c87c32f0 100644
--- a/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
+++ b/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
@@ -311,6 +311,23 @@ std::map<std::string, std::string> 
PaimonCppReader::_build_options() const {
     copy_if_missing("fs.s3a.region", "AWS_REGION");
     copy_if_missing("fs.s3a.path.style.access", "use_path_style");
 
+    // FE currently does not pass paimon_options in scan ranges.
+    // Backfill file.format/manifest.format from split file_format to avoid
+    // paimon-cpp falling back to default manifest.format=avro.
+    if (_range.__isset.table_format_params && 
_range.table_format_params.__isset.paimon_params &&
+        _range.table_format_params.paimon_params.__isset.file_format &&
+        !_range.table_format_params.paimon_params.file_format.empty()) {
+        const auto& split_file_format = 
_range.table_format_params.paimon_params.file_format;
+        auto file_format_it = options.find(paimon::Options::FILE_FORMAT);
+        if (file_format_it == options.end() || file_format_it->second.empty()) 
{
+            options[paimon::Options::FILE_FORMAT] = split_file_format;
+        }
+        auto manifest_format_it = 
options.find(paimon::Options::MANIFEST_FORMAT);
+        if (manifest_format_it == options.end() || 
manifest_format_it->second.empty()) {
+            options[paimon::Options::MANIFEST_FORMAT] = split_file_format;
+        }
+    }
+
     options[paimon::Options::FILE_SYSTEM] = "doris";
     return options;
 }
diff --git 
a/regression-test/suites/external_table_p0/paimon/test_paimon_cpp_reader.groovy 
b/regression-test/suites/external_table_p0/paimon/test_paimon_cpp_reader.groovy
new file mode 100644
index 00000000000..64ef323add6
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/paimon/test_paimon_cpp_reader.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_paimon_cpp_reader", "p0,external") {
+    String enabled = context.config.otherConfigs.get("enablePaimonTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("disabled paimon test")
+        return
+    }
+
+    String catalogName = "test_paimon_cpp_reader"
+    String hdfsPort = context.config.otherConfigs.get("hive2HdfsPort")
+    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+    try {
+        sql """drop catalog if exists ${catalogName}"""
+        sql """create catalog if not exists ${catalogName} properties (
+            "type" = "paimon",
+            "paimon.catalog.type" = "filesystem",
+            "warehouse" = 
"hdfs://${externalEnvIp}:${hdfsPort}/user/doris/paimon1"
+        );"""
+        sql """switch ${catalogName}"""
+        sql """use db1"""
+        // Do not force JNI; keep default selection behavior.
+        sql """set force_jni_scanner=false"""
+
+        def testQueries = [
+                """select c1 from complex_all order by c1""",
+                """select c1 from complex_all where c1 >= 2 order by c1""",
+                """select * from all_table order by c1""",
+                """select * from all_table_with_parquet where c13 like '13%' 
order by c1""",
+                """select * from complex_tab order by c1""",
+                """select c3['a_test'], c3['b_test'], c3['bbb'], c3['ccc'] 
from complex_tab order by c3['a_test'], c3['b_test']""",
+                """select array_max(c2) c from complex_tab order by c""",
+                """select c20[0] c from complex_all order by c""",
+                """select * from deletion_vector_orc""",
+                """select * from deletion_vector_parquet"""
+        ]
+
+        // Default path is JNI when enable_paimon_cpp_reader=false.
+        sql """set enable_paimon_cpp_reader=false"""
+        def jniResults = testQueries.collect { query -> sql(query) }
+
+        sql """set enable_paimon_cpp_reader=true"""
+        def cppResults = testQueries.collect { query -> sql(query) }
+
+        assertTrue(cppResults[0].size() > 0)
+        for (int i = 0; i < testQueries.size(); i++) {
+            assertEquals(jniResults[i].toString(), cppResults[i].toString())
+        }
+    } finally {
+        sql """set enable_paimon_cpp_reader=false"""
+        sql """set force_jni_scanner=false"""
+        sql """drop catalog if exists ${catalogName}"""
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to