Copilot commented on code in PR #60676:
URL: https://github.com/apache/doris/pull/60676#discussion_r2792056363


##########
be/CMakeLists.txt:
##########
@@ -545,6 +556,89 @@ set(COMMON_THIRDPARTY
     ${COMMON_THIRDPARTY}
 )
 
+if (ENABLE_PAIMON_CPP)
+    if (NOT PAIMON_HOME)
+        set(_paimon_default_home "${THIRDPARTY_DIR}/paimon-cpp")
+        if (EXISTS 
"${_paimon_default_home}/lib/cmake/Paimon/PaimonConfig.cmake")
+            set(PAIMON_HOME "${_paimon_default_home}" CACHE PATH "" FORCE)
+        endif()
+        unset(_paimon_default_home)
+    endif()
+    if (NOT PAIMON_HOME)
+        message(FATAL_ERROR "ENABLE_PAIMON_CPP=ON but PAIMON_HOME is not set")
+    endif()

Review Comment:
   `ENABLE_PAIMON_CPP` is defaulted to ON, but if 
`${THIRDPARTY_DIR}/paimon-cpp` is not present and `PAIMON_HOME` isn’t set, the 
build hard-fails (`FATAL_ERROR`). In this repo `thirdparty/paimon-cpp` does not 
exist, so a default build will break. Consider defaulting this option to OFF 
(or auto-disabling when the config isn’t found) and only failing when the user 
explicitly enables it.



##########
be/CMakeLists.txt:
##########
@@ -545,6 +556,89 @@ set(COMMON_THIRDPARTY
     ${COMMON_THIRDPARTY}
 )
 
+if (ENABLE_PAIMON_CPP)
+    if (NOT PAIMON_HOME)
+        set(_paimon_default_home "${THIRDPARTY_DIR}/paimon-cpp")
+        if (EXISTS 
"${_paimon_default_home}/lib/cmake/Paimon/PaimonConfig.cmake")
+            set(PAIMON_HOME "${_paimon_default_home}" CACHE PATH "" FORCE)
+        endif()
+        unset(_paimon_default_home)
+    endif()
+    if (NOT PAIMON_HOME)
+        message(FATAL_ERROR "ENABLE_PAIMON_CPP=ON but PAIMON_HOME is not set")
+    endif()
+    set(Paimon_DIR "${PAIMON_HOME}/lib/cmake/Paimon" CACHE PATH "" FORCE)
+    find_package(Paimon CONFIG REQUIRED NO_DEFAULT_PATH)
+    include_directories("${PAIMON_HOME}/include")
+    # Link glog after paimon static libs to satisfy RawLog__ in static link.
+    list(REMOVE_ITEM COMMON_THIRDPARTY glog)
+
+    # Paimon static library dependencies
+    set(paimon_fmt_lib "${PAIMON_HOME}/lib64/paimon_deps/libfmtd.a")
+    if (NOT EXISTS "${paimon_fmt_lib}")
+        set(paimon_fmt_lib "${PAIMON_HOME}/lib64/paimon_deps/libfmt.a")
+    endif()
+    add_library(paimon_fmt STATIC IMPORTED)
+    set_target_properties(paimon_fmt PROPERTIES IMPORTED_LOCATION 
"${paimon_fmt_lib}")
+    set(paimon_tbb_lib "${PAIMON_HOME}/lib64/paimon_deps/libtbb_debug.a")
+    if (NOT EXISTS "${paimon_tbb_lib}")
+        set(paimon_tbb_lib "${PAIMON_HOME}/lib64/paimon_deps/libtbb.a")
+    endif()
+    add_library(paimon_tbb STATIC IMPORTED)
+    set_target_properties(paimon_tbb PROPERTIES IMPORTED_LOCATION 
"${paimon_tbb_lib}")
+    add_library(paimon_roaring STATIC IMPORTED)
+    set_target_properties(paimon_roaring PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/libroaring_bitmap.a")
+    add_library(paimon_xxhash STATIC IMPORTED)
+    set_target_properties(paimon_xxhash PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/libxxhash.a")
+    add_library(paimon_arrow_dataset STATIC IMPORTED)
+    set_target_properties(paimon_arrow_dataset PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/libarrow_dataset.a")
+    add_library(paimon_arrow_acero STATIC IMPORTED)
+    set_target_properties(paimon_arrow_acero PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/libarrow_acero.a")
+    add_library(paimon_arrow STATIC IMPORTED)
+    set_target_properties(paimon_arrow PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/libarrow.a")
+    set(paimon_arrow_bundled_deps_lib 
"${PAIMON_HOME}/lib64/paimon_deps/libarrow_bundled_dependencies.a")
+    if (EXISTS "${paimon_arrow_bundled_deps_lib}")
+        add_library(paimon_arrow_bundled_dependencies STATIC IMPORTED)
+        set_target_properties(paimon_arrow_bundled_dependencies PROPERTIES 
IMPORTED_LOCATION "${paimon_arrow_bundled_deps_lib}")
+    endif()
+    add_library(paimon_parquet STATIC IMPORTED)
+    set_target_properties(paimon_parquet PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/libparquet.a")
+    # Always use paimon-cpp bundled ORC to avoid conflicts with Doris ORC.
+    add_library(paimon_orc STATIC IMPORTED)
+    set_target_properties(paimon_orc PROPERTIES IMPORTED_LOCATION 
"${PAIMON_HOME}/lib64/paimon_deps/liborc.a")
+

Review Comment:
   The paimon-cpp linkage setup hardcodes a 
`${PAIMON_HOME}/lib64/paimon_deps/...` layout and creates a number of imported 
static libs manually. This is brittle across platforms/install layouts (lib vs 
lib64, debug vs release naming) and can easily break packaging. Prefer 
consuming targets exported by `PaimonConfig.cmake` (and its transitive deps), 
or at least probe both `lib` and `lib64` and fail with a clearer message 
showing which paths were tried.



##########
fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonUtils.java:
##########
@@ -37,9 +38,14 @@ public static List<String> getFieldNames(RowType rowType) {
 
     public static <T> T deserialize(String encodedStr) {
         try {
-            return InstantiationUtil.deserializeObject(
-                    
DECODER.decode(encodedStr.getBytes(java.nio.charset.StandardCharsets.UTF_8)),
-                    PaimonUtils.class.getClassLoader());
+            byte[] decoded;
+            try {
+                decoded = 
URL_DECODER.decode(encodedStr.getBytes(java.nio.charset.StandardCharsets.UTF_8));
+            } catch (IllegalArgumentException e) {
+                // Fallback to standard Base64 for splits encoded by native 
Paimon serialization.

Review Comment:
   The fallback comment is inaccurate: decoding with standard Base64 doesn’t 
make the payload compatible with `InstantiationUtil.deserializeObject`. That 
method still requires Java-serialized bytes, not “native Paimon serialization”. 
Please reword the comment to reflect that this fallback only supports 
alternative Base64 alphabets/encodings for the *same* Java-serialized payload.
   ```suggestion
                   // Fallback to standard Base64 for splits encoded with a 
non-URL-safe Base64 alphabet.
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -1295,13 +1297,14 @@ public void checkQuerySlotCount(String slotCnt) {
     public enum IgnoreSplitType {
         NONE,
         IGNORE_JNI,
-        IGNORE_NATIVE
+        IGNORE_NATIVE,
+        IGNORE_PAIMON_CPP
     }
 
     public static final String IGNORE_SPLIT_TYPE = "ignore_split_type";
     @VariableMgr.VarAttr(name = IGNORE_SPLIT_TYPE,
             checker = "checkIgnoreSplitType",
-            options = {"NONE", "IGNORE_JNI", "IGNORE_NATIVE"},
+            options = {"NONE", "IGNORE_JNI", "IGNORE_NATIVE", 
"IGNORE_PAIMON_CPP"},
             description = {"忽略指定类型的 split", "Ignore splits of the specified 
type"})

Review Comment:
   `IGNORE_PAIMON_CPP` was added to `IgnoreSplitType` and exposed as an option, 
but there’s no corresponding handling in the split generation logic (e.g., 
`PaimonScanNode.getSplits` only checks `IGNORE_NATIVE` and `IGNORE_JNI`). 
As-is, setting `ignore_split_type=IGNORE_PAIMON_CPP` will have no effect. 
Either implement the behavior (skip paimon-cpp/JNI-style splits when paimon-cpp 
is enabled) or remove the option to avoid misleading users.



##########
be/src/vec/exec/scan/file_scanner.cpp:
##########
@@ -67,7 +67,9 @@
 #include "vec/exec/format/table/iceberg_reader.h"
 #include "vec/exec/format/table/lakesoul_jni_reader.h"
 #include "vec/exec/format/table/max_compute_jni_reader.h"
+#include "vec/exec/format/table/paimon_cpp_reader.h"
 #include "vec/exec/format/table/paimon_jni_reader.h"
+#include "vec/exec/format/table/paimon_predicate_converter.h"

Review Comment:
   Even with `ENABLE_PAIMON_CPP=OFF`, the current build still compiles all 
`be/src/vec/**/*.cpp` via `file(GLOB_RECURSE VEC_FILES *.cpp)`, and 
`file_scanner.cpp` now unconditionally includes paimon-cpp headers. This means 
disabling the option won’t actually prevent compilation failures when 
paimon-cpp headers/libs aren’t available. To make the option effective, gate 
the includes/implementation with a compile definition (e.g., `#ifdef 
ENABLE_PAIMON_CPP`) and/or exclude the paimon-cpp source files from the Vec 
target when the option is OFF.
   ```suggestion
   #ifdef ENABLE_PAIMON_CPP
   #include "vec/exec/format/table/paimon_cpp_reader.h"
   #include "vec/exec/format/table/paimon_predicate_converter.h"
   #endif
   #include "vec/exec/format/table/paimon_jni_reader.h"
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java:
##########
@@ -217,9 +217,19 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, 
PaimonSplit paimonSplit)
 
         String fileFormat = getFileFormat(paimonSplit.getPathString());
         if (split != null) {
-            // use jni reader
+            // use jni reader or paimon-cpp reader
             rangeDesc.setFormatType(TFileFormatType.FORMAT_JNI);
-            fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split));
+            // Use Paimon native serialization for paimon-cpp reader
+            if (sessionVariable.isEnablePaimonCppReader() && split instanceof 
DataSplit) {
+                
fileDesc.setPaimonSplit(PaimonUtil.encodeDataSplitToString((DataSplit) split));
+            } else {
+                
fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split));
+            }
+            // Set table location for paimon-cpp reader
+            String tableLocation = source.getTableLocation();
+            if (tableLocation != null) {

Review Comment:
   When `enable_paimon_cpp_reader` is on, BE will error out if `paimon_table` 
isn’t set. Here `tableLocation` can still be null/empty (e.g., 
non-FileStoreTable without a `path` option), in which case the query will fail 
later with a generic BE internal error. Consider validating `tableLocation` 
when paimon-cpp is enabled and throwing a clear FE-side exception (or ensuring 
a non-empty location is always populated).
   ```suggestion
               if (sessionVariable.isEnablePaimonCppReader() && split 
instanceof DataSplit
                       && (tableLocation == null || tableLocation.isEmpty())) {
                   throw new RuntimeException(
                           "Paimon table location is empty while paimon-cpp 
reader is enabled. "
                           + "Please ensure the Paimon table path is configured 
or disable paimon-cpp reader.");
               }
               if (tableLocation != null && !tableLocation.isEmpty()) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to