This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new d7331bea9a [GLUTEN-8130][CH] Use the actual user insted of yarn user 
to read hdfs file (#8131)
d7331bea9a is described below

commit d7331bea9a5e6ecce2e1a6448a57511c19eb8142
Author: exmy <[email protected]>
AuthorDate: Wed Dec 4 11:53:28 2024 +0800

    [GLUTEN-8130][CH] Use the actual user insted of yarn user to read hdfs file 
(#8131)
---
 .../Storages/SubstraitSource/ReadBufferBuilder.cpp          | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
index c1796f3e3c..daaf6482dd 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
@@ -258,9 +258,16 @@ public:
         /// Get hdfs_uri
         Poco::URI uri(file_info.uri_file());
         auto hdfs_file_path = uri.getPath();
-        std::string hdfs_uri = "hdfs://" + uri.getHost();
-        if (uri.getPort())
-            hdfs_uri += ":" + std::to_string(uri.getPort());
+
+        std::string new_file_uri = uri.toString();
+        if (uri.getUserInfo().empty() && 
BackendInitializerUtil::spark_user.has_value())
+        {
+            uri.setUserInfo(*BackendInitializerUtil::spark_user);
+            new_file_uri = uri.toString();
+        }
+
+        auto begin_of_path = new_file_uri.find('/', new_file_uri.find("//") + 
2);
+        auto hdfs_uri = new_file_uri.substr(0, begin_of_path);
 
         std::optional<size_t> file_size;
         std::optional<size_t> modified_time;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to