github-actions[bot] commented on code in PR #61031:
URL: https://github.com/apache/doris/pull/61031#discussion_r2944314203


##########
be/src/format/table/paimon_doris_file_system.cpp:
##########
@@ -79,38 +73,58 @@ ParsedUri parse_uri(const std::string& path) {
     return parsed;
 }
 
-bool is_s3_scheme(const std::string& scheme) {
-    return scheme == "s3" || scheme == "s3a" || scheme == "s3n" || scheme == 
"oss" ||
-           scheme == "obs" || scheme == "cos" || scheme == "cosn" || scheme == 
"gs" ||
-           scheme == "abfs" || scheme == "abfss" || scheme == "wasb" || scheme 
== "wasbs";
-}
-
-bool is_hdfs_scheme(const std::string& scheme) {
-    return scheme == "hdfs" || scheme == "viewfs" || scheme == "local";
+bool parse_scheme_mapping_target(std::string_view raw_target, 
doris::TFileType::type* type) {
+    std::string target = doris::to_lower(std::string(doris::trim(raw_target)));
+    if (target == "local") {
+        *type = doris::TFileType::FILE_LOCAL;
+        return true;
+    }
+    if (target == "hdfs") {
+        *type = doris::TFileType::FILE_HDFS;
+        return true;
+    }
+    if (target == "s3") {
+        *type = doris::TFileType::FILE_S3;
+        return true;
+    }
+    if (target == "http") {
+        *type = doris::TFileType::FILE_HTTP;
+        return true;
+    }
+    if (target == "broker") {
+        *type = doris::TFileType::FILE_BROKER;
+        return true;
+    }
+    return false;
 }
 
-bool is_http_scheme(const std::string& scheme) {
-    return scheme == "http" || scheme == "https";
+bool parse_scheme_mapping_entry(std::string_view raw_entry, std::string* 
scheme,
+                                doris::TFileType::type* type) {
+    size_t separator = raw_entry.find('=');
+    if (separator == std::string_view::npos) {
+        return false;
+    }
+    *scheme = doris::to_lower(std::string(doris::trim(raw_entry.substr(0, 
separator))));
+    if (scheme->empty()) {
+        return false;
+    }
+    return parse_scheme_mapping_target(raw_entry.substr(separator + 1), type);
 }
 
 doris::TFileType::type map_scheme_to_file_type(const std::string& scheme) {
     if (scheme.empty()) {
         return doris::TFileType::FILE_HDFS;
     }
-    if (scheme == "file") {
-        return doris::TFileType::FILE_LOCAL;
-    }
-    if (is_hdfs_scheme(scheme)) {
-        return doris::TFileType::FILE_HDFS;
-    }
-    if (is_s3_scheme(scheme)) {
-        return doris::TFileType::FILE_S3;
-    }
-    if (is_http_scheme(scheme)) {
-        return doris::TFileType::FILE_HTTP;
-    }
-    if (scheme == "ofs" || scheme == "gfs" || scheme == "jfs") {
-        return doris::TFileType::FILE_BROKER;
+    std::string normalized_scheme = doris::to_lower(scheme);
+    for (const auto& mapping_entry : 
doris::config::paimon_file_system_scheme_mappings) {
+        std::string configured_scheme;

Review Comment:
   [Medium] Performance suggestion: `map_scheme_to_file_type` re-parses the 
config vector on every invocation (called per file operation in `resolve_path`, 
`GetFileStatus`, `Exists`, etc.). With ~20 default entries, each call does 
linear scan + `find('=')` + `substr` + `to_lower` + `trim` allocations.
   
   Since the config is immutable (`DEFINE_Strings`, not `DEFINE_mStrings`), 
consider building a `static` `std::unordered_map<std::string, TFileType::type>` 
lazily on first call (e.g., via `std::call_once`). This gives O(1) lookup and 
avoids repeated string allocations on every file operation.



##########
be/src/format/table/paimon_doris_file_system.h:
##########
@@ -17,9 +17,20 @@
 
 #pragma once
 
+#include <gen_cpp/Types_types.h>
+
+#include <string>
+
+namespace paimon {
+
+// Visible for tests: maps a URI scheme to the Doris file type used by 
paimon-cpp.
+doris::TFileType::type map_scheme_to_file_type(const std::string& scheme);
+

Review Comment:
   [Low] This header is missing `compile_check_begin.h` / `compile_check_end.h` 
includes. All other headers in `be/src/format/table/` (e.g., 
`paimon_jni_reader.h`, `paimon_cpp_reader.h`) include these compile-check 
guards. New files should include them for consistency, or document why they are 
excluded (e.g., if paimon-cpp types in the `.cpp` cause conversion warnings).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to