This is an automated email from the ASF dual-hosted git repository.
taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new d7331bea9a [GLUTEN-8130][CH] Use the actual user insted of yarn user
to read hdfs file (#8131)
d7331bea9a is described below
commit d7331bea9a5e6ecce2e1a6448a57511c19eb8142
Author: exmy <[email protected]>
AuthorDate: Wed Dec 4 11:53:28 2024 +0800
[GLUTEN-8130][CH] Use the actual user insted of yarn user to read hdfs file
(#8131)
---
.../Storages/SubstraitSource/ReadBufferBuilder.cpp | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
index c1796f3e3c..daaf6482dd 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
@@ -258,9 +258,16 @@ public:
/// Get hdfs_uri
Poco::URI uri(file_info.uri_file());
auto hdfs_file_path = uri.getPath();
- std::string hdfs_uri = "hdfs://" + uri.getHost();
- if (uri.getPort())
- hdfs_uri += ":" + std::to_string(uri.getPort());
+
+ std::string new_file_uri = uri.toString();
+ if (uri.getUserInfo().empty() &&
BackendInitializerUtil::spark_user.has_value())
+ {
+ uri.setUserInfo(*BackendInitializerUtil::spark_user);
+ new_file_uri = uri.toString();
+ }
+
+ auto begin_of_path = new_file_uri.find('/', new_file_uri.find("//") +
2);
+ auto hdfs_uri = new_file_uri.substr(0, begin_of_path);
std::optional<size_t> file_size;
std::optional<size_t> modified_time;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]