zhouyuan commented on code in PR #10540:
URL:
https://github.com/apache/incubator-gluten/pull/10540#discussion_r2303891587
##########
cpp/velox/utils/ConfigExtractor.cc:
##########
@@ -42,176 +50,184 @@ std::string getConfigValue(
}
std::shared_ptr<facebook::velox::config::ConfigBase> getHiveConfig(
- std::shared_ptr<facebook::velox::config::ConfigBase> conf) {
+ std::shared_ptr<facebook::velox::config::ConfigBase> conf,
+ FileSystemType fsType) {
std::unordered_map<std::string, std::string> hiveConfMap;
#ifdef ENABLE_S3
- using namespace facebook::velox::filesystems;
- std::string_view kSparkHadoopS3Prefix = "spark.hadoop.fs.s3a.";
- std::string_view kSparkHadoopS3BucketPrefix = "spark.hadoop.fs.s3a.bucket.";
-
- // Log granularity of AWS C++ SDK
- const std::string kVeloxAwsSdkLogLevel = "spark.gluten.velox.awsSdkLogLevel";
- const std::string kVeloxAwsSdkLogLevelDefault = "FATAL";
-
- // Whether to use proxy from env for s3 c++ client
- const std::string kVeloxS3UseProxyFromEnv =
"spark.gluten.velox.s3UseProxyFromEnv";
- const std::string kVeloxS3UseProxyFromEnvDefault = "false";
-
- // Payload signing policy
- const std::string kVeloxS3PayloadSigningPolicy =
"spark.gluten.velox.s3PayloadSigningPolicy";
- const std::string kVeloxS3PayloadSigningPolicyDefault = "Never";
-
- // Log location of AWS C++ SDK
- const std::string kVeloxS3LogLocation = "spark.gluten.velox.s3LogLocation";
-
- const std::unordered_map<S3Config::Keys, std::pair<std::string,
std::optional<std::string>>> sparkSuffixes = {
- {S3Config::Keys::kAccessKey, std::make_pair("access.key", std::nullopt)},
- {S3Config::Keys::kSecretKey, std::make_pair("secret.key", std::nullopt)},
- {S3Config::Keys::kEndpoint, std::make_pair("endpoint", std::nullopt)},
- {S3Config::Keys::kSSLEnabled, std::make_pair("connection.ssl.enabled",
"false")},
- {S3Config::Keys::kPathStyleAccess, std::make_pair("path.style.access",
"false")},
- {S3Config::Keys::kMaxAttempts, std::make_pair("retry.limit",
std::nullopt)},
- {S3Config::Keys::kRetryMode, std::make_pair("retry.mode", "legacy")},
- {S3Config::Keys::kMaxConnections, std::make_pair("connection.maximum",
"15")},
- {S3Config::Keys::kSocketTimeout, std::make_pair("connection.timeout",
"200s")},
- {S3Config::Keys::kConnectTimeout,
std::make_pair("connection.establish.timeout", "30s")},
- {S3Config::Keys::kUseInstanceCredentials,
std::make_pair("instance.credentials", "false")},
- {S3Config::Keys::kIamRole, std::make_pair("iam.role", std::nullopt)},
- {S3Config::Keys::kIamRoleSessionName,
std::make_pair("iam.role.session.name", "gluten-session")},
- {S3Config::Keys::kEndpointRegion, std::make_pair("endpoint.region",
std::nullopt)},
- };
-
- // get Velox S3 config key from Spark Suffix.
- auto getVeloxKey = [&](std::string_view suffix) {
- for (const auto& [key, value] : sparkSuffixes) {
- if (value.first == suffix) {
- return std::optional<S3Config::Keys>(key);
+ if (contains(fsType, FileSystemType::kS3)) {
Review Comment:
Shall we also add `else` clause here to guard some corner case?
##########
cpp/velox/utils/ConfigExtractor.h:
##########
@@ -28,12 +28,21 @@
namespace gluten {
+enum class FileSystemType : uint8_t {
+ kHdfs = 1 << 0,
+ kS3 = 1 << 1,
+ kAbfs = 1 << 2,
+ kGcs = 1 << 3,
+ kAll = (1 << 4) - 1
Review Comment:
The code seems a bit complicated, how about using 0, 1, 2, 3 and
std::num_limits<uint8_t>::max for kAll?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]