This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 9d2a13bff [VL] Move velox related configs to VeloxConfig.h (#5743)
9d2a13bff is described below
commit 9d2a13bffb4292f17714bfbba96638aeadb91062
Author: Yang Zhang <[email protected]>
AuthorDate: Thu May 16 11:06:18 2024 +0800
[VL] Move velox related configs to VeloxConfig.h (#5743)
---
cpp/core/config/GlutenConfig.h | 7 -
cpp/velox/compute/VeloxBackend.cc | 182 ++++++++------------------
cpp/velox/compute/VeloxBackend.h | 12 +-
cpp/velox/compute/VeloxRuntime.cc | 4 +-
cpp/velox/compute/WholeStageResultIterator.cc | 47 +------
cpp/velox/config/VeloxConfig.h | 127 ++++++++++++++++++
6 files changed, 192 insertions(+), 187 deletions(-)
diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index 3c47fb547..16a18f6be 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -61,13 +61,6 @@ const std::string kShuffleCompressionCodecBackend =
"spark.gluten.sql.columnar.s
const std::string kQatBackendName = "qat";
const std::string kIaaBackendName = "iaa";
-// Velox conf
-const std::string kGlogVerboseLevel =
"spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
-const uint32_t kGlogVerboseLevelDefault = 0;
-const uint32_t kGlogVerboseLevelMaximum = 99;
-const std::string kGlogSeverityLevel =
"spark.gluten.sql.columnar.backend.velox.glogSeverityLevel";
-const uint32_t kGlogSeverityLevelDefault = 1;
-
std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t
planDataLength);
diff --git a/cpp/velox/compute/VeloxBackend.cc
b/cpp/velox/compute/VeloxBackend.cc
index 044c8aa0e..b2fb1c964 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -24,8 +24,6 @@
#include "operators/plannodes/RowVectorStream.h"
#include "utils/ConfigExtractor.h"
-#include "shuffle/VeloxShuffleReader.h"
-
#ifdef GLUTEN_ENABLE_QAT
#include "utils/qat/QatCodec.h"
#endif
@@ -33,7 +31,7 @@
#include "utils/qpl/qpl_codec.h"
#endif
#include "compute/VeloxRuntime.h"
-#include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
#include "jni/JniFileSystem.h"
#include "operators/functions/SparkTokenizer.h"
#include "udf/UdfLoader.h"
@@ -54,71 +52,6 @@ DEFINE_int32(gluten_velox_aysnc_timeout_on_task_stopping,
30000, "Aysnc timout w
using namespace facebook;
-namespace {
-
-const std::string kEnableUserExceptionStacktrace =
- "spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace";
-const bool kEnableUserExceptionStacktraceDefault = true;
-
-const std::string kEnableSystemExceptionStacktrace =
- "spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace";
-const bool kEnableSystemExceptionStacktraceDefault = true;
-
-const std::string kMemoryUseHugePages =
"spark.gluten.sql.columnar.backend.velox.memoryUseHugePages";
-const bool kMemoryUseHugePagesDefault = false;
-
-const std::string kHiveConnectorId = "test-hive";
-const std::string kVeloxCacheEnabled =
"spark.gluten.sql.columnar.backend.velox.cacheEnabled";
-
-// memory cache
-const std::string kVeloxMemCacheSize =
"spark.gluten.sql.columnar.backend.velox.memCacheSize";
-const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G
-
-// ssd cache
-const std::string kVeloxSsdCacheSize =
"spark.gluten.sql.columnar.backend.velox.ssdCacheSize";
-const uint64_t kVeloxSsdCacheSizeDefault = 1073741824; // 1G
-const std::string kVeloxSsdCachePath =
"spark.gluten.sql.columnar.backend.velox.ssdCachePath";
-const std::string kVeloxSsdCachePathDefault = "/tmp/";
-const std::string kVeloxSsdCacheShards =
"spark.gluten.sql.columnar.backend.velox.ssdCacheShards";
-const uint32_t kVeloxSsdCacheShardsDefault = 1;
-const std::string kVeloxSsdCacheIOThreads =
"spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads";
-const uint32_t kVeloxSsdCacheIOThreadsDefault = 1;
-const std::string kVeloxSsdODirectEnabled =
"spark.gluten.sql.columnar.backend.velox.ssdODirect";
-
-// async
-const std::string kVeloxIOThreads =
"spark.gluten.sql.columnar.backend.velox.IOThreads";
-const uint32_t kVeloxIOThreadsDefault = 0;
-const std::string kVeloxAsyncTimeoutOnTaskStopping =
- "spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping";
-const int32_t kVeloxAsyncTimeoutOnTaskStoppingDefault = 30000; // 30s
-
-// udf
-const std::string kVeloxUdfLibraryPaths =
"spark.gluten.sql.columnar.backend.velox.udfLibraryPaths";
-
-// spill
-const std::string kMaxSpillFileSize =
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
-const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024;
-
-// backtrace allocation
-const std::string kBacktraceAllocation = "spark.gluten.backtrace.allocation";
-
-// VeloxShuffleReader print flag.
-const std::string kVeloxShuffleReaderPrintFlag =
"spark.gluten.velox.shuffleReaderPrintFlag";
-
-const std::string kVeloxFileHandleCacheEnabled =
"spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled";
-const bool kVeloxFileHandleCacheEnabledDefault = false;
-
-/* configs for file read in velox*/
-const std::string kDirectorySizeGuess =
"spark.gluten.sql.columnar.backend.velox.directorySizeGuess";
-const std::string kFilePreloadThreshold =
"spark.gluten.sql.columnar.backend.velox.filePreloadThreshold";
-const std::string kPrefetchRowGroups =
"spark.gluten.sql.columnar.backend.velox.prefetchRowGroups";
-const std::string kLoadQuantum =
"spark.gluten.sql.columnar.backend.velox.loadQuantum";
-const std::string kMaxCoalescedDistanceBytes =
"spark.gluten.sql.columnar.backend.velox.maxCoalescedDistanceBytes";
-const std::string kMaxCoalescedBytes =
"spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes";
-const std::string kCachePrefetchMinPct =
"spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct";
-
-} // namespace
-
namespace gluten {
namespace {
@@ -128,25 +61,22 @@ gluten::Runtime* veloxRuntimeFactory(const
std::unordered_map<std::string, std::
} // namespace
void VeloxBackend::init(const std::unordered_map<std::string, std::string>&
conf) {
- backendConf_ = conf;
+ backendConf_ =
std::make_shared<facebook::velox::core::MemConfigMutable>(conf);
// Register Velox runtime factory
gluten::Runtime::registerFactory(gluten::kVeloxRuntimeKind,
veloxRuntimeFactory);
- std::shared_ptr<const facebook::velox::Config> veloxcfg =
- std::make_shared<facebook::velox::core::MemConfigMutable>(conf);
-
- if (veloxcfg->get<bool>(kDebugModeEnabled, false)) {
- LOG(INFO) << "VeloxBackend config:" << printConfig(veloxcfg->valuesCopy());
+ if (backendConf_->get<bool>(kDebugModeEnabled, false)) {
+ LOG(INFO) << "VeloxBackend config:" <<
printConfig(backendConf_->valuesCopy());
}
// Init glog and log level.
- if (!veloxcfg->get<bool>(kDebugModeEnabled, false)) {
- FLAGS_v = veloxcfg->get<uint32_t>(kGlogVerboseLevel,
kGlogVerboseLevelDefault);
- FLAGS_minloglevel = veloxcfg->get<uint32_t>(kGlogSeverityLevel,
kGlogSeverityLevelDefault);
+ if (!backendConf_->get<bool>(kDebugModeEnabled, false)) {
+ FLAGS_v = backendConf_->get<uint32_t>(kGlogVerboseLevel,
kGlogVerboseLevelDefault);
+ FLAGS_minloglevel = backendConf_->get<uint32_t>(kGlogSeverityLevel,
kGlogSeverityLevelDefault);
} else {
- if (veloxcfg->isValueExists(kGlogVerboseLevel)) {
- FLAGS_v = veloxcfg->get<uint32_t>(kGlogVerboseLevel,
kGlogVerboseLevelDefault);
+ if (backendConf_->isValueExists(kGlogVerboseLevel)) {
+ FLAGS_v = backendConf_->get<uint32_t>(kGlogVerboseLevel,
kGlogVerboseLevelDefault);
} else {
FLAGS_v = kGlogVerboseLevelMaximum;
}
@@ -159,27 +89,27 @@ void VeloxBackend::init(const
std::unordered_map<std::string, std::string>& conf
// Set velox_exception_user_stacktrace_enabled.
FLAGS_velox_exception_user_stacktrace_enabled =
- veloxcfg->get<bool>(kEnableUserExceptionStacktrace,
kEnableUserExceptionStacktraceDefault);
+ backendConf_->get<bool>(kEnableUserExceptionStacktrace,
kEnableUserExceptionStacktraceDefault);
// Set velox_exception_system_stacktrace_enabled.
FLAGS_velox_exception_system_stacktrace_enabled =
- veloxcfg->get<bool>(kEnableSystemExceptionStacktrace,
kEnableSystemExceptionStacktraceDefault);
+ backendConf_->get<bool>(kEnableSystemExceptionStacktrace,
kEnableSystemExceptionStacktraceDefault);
// Set velox_memory_use_hugepages.
- FLAGS_velox_memory_use_hugepages = veloxcfg->get<bool>(kMemoryUseHugePages,
kMemoryUseHugePagesDefault);
+ FLAGS_velox_memory_use_hugepages =
backendConf_->get<bool>(kMemoryUseHugePages, kMemoryUseHugePagesDefault);
// Async timeout.
FLAGS_gluten_velox_aysnc_timeout_on_task_stopping =
- veloxcfg->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping,
kVeloxAsyncTimeoutOnTaskStoppingDefault);
+ backendConf_->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping,
kVeloxAsyncTimeoutOnTaskStoppingDefault);
// Set backtrace_allocation
- gluten::backtrace_allocation = veloxcfg->get<bool>(kBacktraceAllocation,
false);
+ gluten::backtrace_allocation = backendConf_->get<bool>(kBacktraceAllocation,
false);
// Setup and register.
velox::filesystems::registerLocalFileSystem();
- initJolFilesystem(veloxcfg);
- initCache(veloxcfg);
- initConnector(veloxcfg);
+ initJolFilesystem();
+ initCache();
+ initConnector();
// Register Velox functions
registerAllFunctions();
@@ -189,7 +119,7 @@ void VeloxBackend::init(const
std::unordered_map<std::string, std::string>& conf
}
velox::exec::Operator::registerOperator(std::make_unique<RowVectorStreamOperatorTranslator>());
- initUdf(veloxcfg);
+ initUdf();
registerSparkTokenizer();
// initialize the global memory manager for current process
@@ -201,8 +131,8 @@ facebook::velox::cache::AsyncDataCache*
VeloxBackend::getAsyncDataCache() const
}
// JNI-or-local filesystem, for spilling-to-heap if we have extra JVM heap
spaces
-void VeloxBackend::initJolFilesystem(const std::shared_ptr<const
facebook::velox::Config>& conf) {
- int64_t maxSpillFileSize = conf->get<int64_t>(kMaxSpillFileSize,
kMaxSpillFileSizeDefault);
+void VeloxBackend::initJolFilesystem() {
+ int64_t maxSpillFileSize = backendConf_->get<int64_t>(kMaxSpillFileSize,
kMaxSpillFileSizeDefault);
// FIXME It's known that if spill compression is disabled, the actual spill
file size may
// in crease beyond this limit a little (maximum 64 rows which is by
default
@@ -210,18 +140,17 @@ void VeloxBackend::initJolFilesystem(const
std::shared_ptr<const facebook::velox
gluten::registerJolFileSystem(maxSpillFileSize);
}
-void VeloxBackend::initCache(const std::shared_ptr<const
facebook::velox::Config>& conf) {
- bool veloxCacheEnabled = conf->get<bool>(kVeloxCacheEnabled, false);
- if (veloxCacheEnabled) {
+void VeloxBackend::initCache() {
+ if (backendConf_->get<bool>(kVeloxCacheEnabled, false)) {
FLAGS_ssd_odirect = true;
- FLAGS_ssd_odirect = conf->get<bool>(kVeloxSsdODirectEnabled, false);
+ FLAGS_ssd_odirect = backendConf_->get<bool>(kVeloxSsdODirectEnabled,
false);
- uint64_t memCacheSize = conf->get<uint64_t>(kVeloxMemCacheSize,
kVeloxMemCacheSizeDefault);
- uint64_t ssdCacheSize = conf->get<uint64_t>(kVeloxSsdCacheSize,
kVeloxSsdCacheSizeDefault);
- int32_t ssdCacheShards = conf->get<int32_t>(kVeloxSsdCacheShards,
kVeloxSsdCacheShardsDefault);
- int32_t ssdCacheIOThreads = conf->get<int32_t>(kVeloxSsdCacheIOThreads,
kVeloxSsdCacheIOThreadsDefault);
- std::string ssdCachePathPrefix =
conf->get<std::string>(kVeloxSsdCachePath, kVeloxSsdCachePathDefault);
+ uint64_t memCacheSize = backendConf_->get<uint64_t>(kVeloxMemCacheSize,
kVeloxMemCacheSizeDefault);
+ uint64_t ssdCacheSize = backendConf_->get<uint64_t>(kVeloxSsdCacheSize,
kVeloxSsdCacheSizeDefault);
+ int32_t ssdCacheShards = backendConf_->get<int32_t>(kVeloxSsdCacheShards,
kVeloxSsdCacheShardsDefault);
+ int32_t ssdCacheIOThreads =
backendConf_->get<int32_t>(kVeloxSsdCacheIOThreads,
kVeloxSsdCacheIOThreadsDefault);
+ std::string ssdCachePathPrefix =
backendConf_->get<std::string>(kVeloxSsdCachePath, kVeloxSsdCachePathDefault);
cachePathPrefix_ = ssdCachePathPrefix;
cacheFilePrefix_ = getCacheFilePrefix();
@@ -257,63 +186,64 @@ void VeloxBackend::initCache(const std::shared_ptr<const
facebook::velox::Config
}
}
-void VeloxBackend::initConnector(const std::shared_ptr<const
facebook::velox::Config>& conf) {
+void VeloxBackend::initConnector() {
// The configs below are used at process level.
- auto mutableConf =
std::make_shared<facebook::velox::core::MemConfigMutable>(conf->valuesCopy());
+ auto connectorConf =
std::make_shared<facebook::velox::core::MemConfigMutable>(backendConf_->valuesCopy());
- auto hiveConf = getHiveConfig(conf);
+ auto hiveConf = getHiveConfig(backendConf_);
for (auto& [k, v] : hiveConf->valuesCopy()) {
- mutableConf->setValue(k, v);
+ connectorConf->setValue(k, v);
}
#ifdef ENABLE_ABFS
- const auto& confValue = conf->valuesCopy();
+ const auto& confValue = backendConf_->valuesCopy();
for (auto& [k, v] : confValue) {
if (k.find("fs.azure.account.key") == 0) {
- mutableConf->setValue(k, v);
+ connectorConf->setValue(k, v);
} else if (k.find("spark.hadoop.fs.azure.account.key") == 0) {
constexpr int32_t accountKeyPrefixLength = 13;
- mutableConf->setValue(k.substr(accountKeyPrefixLength), v);
+ connectorConf->setValue(k.substr(accountKeyPrefixLength), v);
}
}
#endif
- mutableConf->setValue(
+ connectorConf->setValue(
velox::connector::hive::HiveConfig::kEnableFileHandleCache,
- conf->get<bool>(kVeloxFileHandleCacheEnabled,
kVeloxFileHandleCacheEnabledDefault) ? "true" : "false");
+ backendConf_->get<bool>(kVeloxFileHandleCacheEnabled,
kVeloxFileHandleCacheEnabledDefault) ? "true" : "false");
- mutableConf->setValue(
+ connectorConf->setValue(
velox::connector::hive::HiveConfig::kMaxCoalescedBytes,
- conf->get<std::string>(kMaxCoalescedBytes, "67108864")); // 64M
- mutableConf->setValue(
+ backendConf_->get<std::string>(kMaxCoalescedBytes, "67108864")); // 64M
+ connectorConf->setValue(
velox::connector::hive::HiveConfig::kMaxCoalescedDistanceBytes,
- conf->get<std::string>(kMaxCoalescedDistanceBytes, "1048576")); // 1M
- mutableConf->setValue(
- velox::connector::hive::HiveConfig::kPrefetchRowGroups,
conf->get<std::string>(kPrefetchRowGroups, "1"));
- mutableConf->setValue(
- velox::connector::hive::HiveConfig::kLoadQuantum,
conf->get<std::string>(kLoadQuantum, "268435456")); // 256M
- mutableConf->setValue(
+ backendConf_->get<std::string>(kMaxCoalescedDistanceBytes, "1048576"));
// 1M
+ connectorConf->setValue(
+ velox::connector::hive::HiveConfig::kPrefetchRowGroups,
backendConf_->get<std::string>(kPrefetchRowGroups, "1"));
+ connectorConf->setValue(
+ velox::connector::hive::HiveConfig::kLoadQuantum,
+ backendConf_->get<std::string>(kLoadQuantum, "268435456")); // 256M
+ connectorConf->setValue(
velox::connector::hive::HiveConfig::kFooterEstimatedSize,
- conf->get<std::string>(kDirectorySizeGuess, "32768")); // 32K
- mutableConf->setValue(
+ backendConf_->get<std::string>(kDirectorySizeGuess, "32768")); // 32K
+ connectorConf->setValue(
velox::connector::hive::HiveConfig::kFilePreloadThreshold,
- conf->get<std::string>(kFilePreloadThreshold, "1048576")); // 1M
+ backendConf_->get<std::string>(kFilePreloadThreshold, "1048576")); // 1M
// set cache_prefetch_min_pct default as 0 to force all loads are prefetched
in DirectBufferInput.
- FLAGS_cache_prefetch_min_pct = conf->get<int>(kCachePrefetchMinPct, 0);
+ FLAGS_cache_prefetch_min_pct = backendConf_->get<int>(kCachePrefetchMinPct,
0);
- auto ioThreads = conf->get<int32_t>(kVeloxIOThreads, kVeloxIOThreadsDefault);
+ auto ioThreads = backendConf_->get<int32_t>(kVeloxIOThreads,
kVeloxIOThreadsDefault);
if (ioThreads > 0) {
ioExecutor_ = std::make_unique<folly::IOThreadPoolExecutor>(ioThreads);
}
velox::connector::registerConnector(std::make_shared<velox::connector::hive::HiveConnector>(
kHiveConnectorId,
-
std::make_shared<facebook::velox::core::MemConfig>(mutableConf->valuesCopy()),
+
std::make_shared<facebook::velox::core::MemConfig>(connectorConf->valuesCopy()),
ioExecutor_.get()));
}
-void VeloxBackend::initUdf(const std::shared_ptr<const
facebook::velox::Config>& conf) {
- auto got = conf->get<std::string>(kVeloxUdfLibraryPaths, "");
+void VeloxBackend::initUdf() {
+ auto got = backendConf_->get<std::string>(kVeloxUdfLibraryPaths, "");
if (!got.empty()) {
auto udfLoader = gluten::UdfLoader::getInstance();
udfLoader->loadUdfLibraries(got);
@@ -335,7 +265,7 @@ VeloxBackend* VeloxBackend::get() {
return instance_.get();
}
-const std::unordered_map<std::string, std::string>&
VeloxBackend::getBackendConf() const {
+const std::shared_ptr<const facebook::velox::Config>
VeloxBackend::getBackendConf() const {
return backendConf_;
}
diff --git a/cpp/velox/compute/VeloxBackend.h b/cpp/velox/compute/VeloxBackend.h
index a601d715c..891bdd2cc 100644
--- a/cpp/velox/compute/VeloxBackend.h
+++ b/cpp/velox/compute/VeloxBackend.h
@@ -53,7 +53,7 @@ class VeloxBackend {
facebook::velox::cache::AsyncDataCache* getAsyncDataCache() const;
- const std::unordered_map<std::string, std::string>& getBackendConf() const;
+ const std::shared_ptr<const facebook::velox::Config> getBackendConf() const;
void tearDown() {
// Destruct IOThreadPoolExecutor will join all threads.
@@ -68,11 +68,11 @@ class VeloxBackend {
}
void init(const std::unordered_map<std::string, std::string>& conf);
- void initCache(const std::shared_ptr<const facebook::velox::Config>& conf);
- void initConnector(const std::shared_ptr<const facebook::velox::Config>&
conf);
- void initUdf(const std::shared_ptr<const facebook::velox::Config>& conf);
+ void initCache();
+ void initConnector();
+ void initUdf();
- void initJolFilesystem(const std::shared_ptr<const facebook::velox::Config>&
conf);
+ void initJolFilesystem();
std::string getCacheFilePrefix() {
return "cache." +
boost::lexical_cast<std::string>(boost::uuids::random_generator()()) + ".";
@@ -90,7 +90,7 @@ class VeloxBackend {
std::string cachePathPrefix_;
std::string cacheFilePrefix_;
- std::unordered_map<std::string, std::string> backendConf_{};
+ std::shared_ptr<const facebook::velox::Config> backendConf_;
};
} // namespace gluten
diff --git a/cpp/velox/compute/VeloxRuntime.cc
b/cpp/velox/compute/VeloxRuntime.cc
index 8314d0bd2..a3e8c159c 100644
--- a/cpp/velox/compute/VeloxRuntime.cc
+++ b/cpp/velox/compute/VeloxRuntime.cc
@@ -26,7 +26,7 @@
#include "compute/ResultIterator.h"
#include "compute/Runtime.h"
#include "compute/VeloxPlanConverter.h"
-#include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
#include "operators/serializer/VeloxRowToColumnarConverter.h"
#include "shuffle/VeloxShuffleReader.h"
#include "shuffle/VeloxShuffleWriter.h"
@@ -256,7 +256,7 @@ std::unique_ptr<ColumnarBatchSerializer>
VeloxRuntime::createColumnarBatchSerial
}
void VeloxRuntime::dumpConf(const std::string& path) {
- auto backendConf = VeloxBackend::get()->getBackendConf();
+ auto backendConf = VeloxBackend::get()->getBackendConf()->valuesCopy();
auto allConf = backendConf;
for (const auto& pair : confMap_) {
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index 83749061c..006b37588 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -17,13 +17,11 @@
#include "WholeStageResultIterator.h"
#include "VeloxBackend.h"
#include "VeloxRuntime.h"
-#include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
#include "velox/connectors/hive/HiveConfig.h"
#include "velox/connectors/hive/HiveConnectorSplit.h"
#include "velox/exec/PlanNodeStats.h"
-#include "utils/ConfigExtractor.h"
-
#ifdef ENABLE_HDFS
#include "utils/HdfsUtils.h"
#endif
@@ -33,49 +31,6 @@ using namespace facebook;
namespace gluten {
namespace {
-// Velox configs
-const std::string kHiveConnectorId = "test-hive";
-
-// memory
-const std::string kSpillStrategy =
"spark.gluten.sql.columnar.backend.velox.spillStrategy";
-const std::string kSpillStrategyDefaultValue = "auto";
-const std::string kSpillThreadNum =
"spark.gluten.sql.columnar.backend.velox.spillThreadNum";
-const uint32_t kSpillThreadNumDefaultValue = 0;
-const std::string kAggregationSpillEnabled =
"spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled";
-const std::string kJoinSpillEnabled =
"spark.gluten.sql.columnar.backend.velox.joinSpillEnabled";
-const std::string kOrderBySpillEnabled =
"spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled";
-
-// spill config
-// refer to
-//
https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling
-const std::string kMaxSpillLevel =
"spark.gluten.sql.columnar.backend.velox.maxSpillLevel";
-const std::string kMaxSpillFileSize =
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
-const std::string kSpillStartPartitionBit =
"spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit";
-const std::string kSpillPartitionBits =
"spark.gluten.sql.columnar.backend.velox.spillPartitionBits";
-const std::string kMaxSpillRunRows =
"spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows";
-const std::string kMaxSpillBytes =
"spark.gluten.sql.columnar.backend.velox.MaxSpillBytes";
-const std::string kSpillWriteBufferSize =
"spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize";
-
-const std::string kSpillableReservationGrowthPct =
- "spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct";
-const std::string kSpillCompressionKind = "spark.io.compression.codec";
-const std::string kMaxPartialAggregationMemoryRatio =
- "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio";
-const std::string kMaxExtendedPartialAggregationMemoryRatio =
-
"spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio";
-const std::string kAbandonPartialAggregationMinPct =
- "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct";
-const std::string kAbandonPartialAggregationMinRows =
- "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
-
-// execution
-const std::string kBloomFilterExpectedNumItems =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems";
-const std::string kBloomFilterNumBits =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.numBits";
-const std::string kBloomFilterMaxNumBits =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits";
-const std::string kVeloxSplitPreloadPerDriver =
"spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver";
-
-// write fies
-const std::string kMaxPartitions =
"spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
// metrics
const std::string kDynamicFiltersProduced = "dynamicFiltersProduced";
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
new file mode 100644
index 000000000..a3112f83e
--- /dev/null
+++ b/cpp/velox/config/VeloxConfig.h
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "config/GlutenConfig.h"
+
+namespace gluten {
+// memory
+const std::string kSpillStrategy =
"spark.gluten.sql.columnar.backend.velox.spillStrategy";
+const std::string kSpillStrategyDefaultValue = "auto";
+const std::string kSpillThreadNum =
"spark.gluten.sql.columnar.backend.velox.spillThreadNum";
+const uint32_t kSpillThreadNumDefaultValue = 0;
+const std::string kAggregationSpillEnabled =
"spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled";
+const std::string kJoinSpillEnabled =
"spark.gluten.sql.columnar.backend.velox.joinSpillEnabled";
+const std::string kOrderBySpillEnabled =
"spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled";
+
+// spill config
+// refer to
+//
https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling
+const std::string kMaxSpillLevel =
"spark.gluten.sql.columnar.backend.velox.maxSpillLevel";
+const std::string kMaxSpillFileSize =
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
+const std::string kSpillStartPartitionBit =
"spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit";
+const std::string kSpillPartitionBits =
"spark.gluten.sql.columnar.backend.velox.spillPartitionBits";
+const std::string kMaxSpillRunRows =
"spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows";
+const std::string kMaxSpillBytes =
"spark.gluten.sql.columnar.backend.velox.MaxSpillBytes";
+const std::string kSpillWriteBufferSize =
"spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize";
+const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024;
+
+const std::string kSpillableReservationGrowthPct =
+ "spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct";
+const std::string kSpillCompressionKind = "spark.io.compression.codec";
+const std::string kMaxPartialAggregationMemoryRatio =
+ "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio";
+const std::string kMaxExtendedPartialAggregationMemoryRatio =
+
"spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio";
+const std::string kAbandonPartialAggregationMinPct =
+ "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct";
+const std::string kAbandonPartialAggregationMinRows =
+ "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
+
+// execution
+const std::string kBloomFilterExpectedNumItems =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems";
+const std::string kBloomFilterNumBits =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.numBits";
+const std::string kBloomFilterMaxNumBits =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits";
+const std::string kVeloxSplitPreloadPerDriver =
"spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver";
+
+const std::string kEnableUserExceptionStacktrace =
+ "spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace";
+const bool kEnableUserExceptionStacktraceDefault = true;
+
+const std::string kEnableSystemExceptionStacktrace =
+ "spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace";
+const bool kEnableSystemExceptionStacktraceDefault = true;
+
+const std::string kMemoryUseHugePages =
"spark.gluten.sql.columnar.backend.velox.memoryUseHugePages";
+const bool kMemoryUseHugePagesDefault = false;
+
+const std::string kHiveConnectorId = "test-hive";
+const std::string kVeloxCacheEnabled =
"spark.gluten.sql.columnar.backend.velox.cacheEnabled";
+
+// memory cache
+const std::string kVeloxMemCacheSize =
"spark.gluten.sql.columnar.backend.velox.memCacheSize";
+const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G
+
+// ssd cache
+const std::string kVeloxSsdCacheSize =
"spark.gluten.sql.columnar.backend.velox.ssdCacheSize";
+const uint64_t kVeloxSsdCacheSizeDefault = 1073741824; // 1G
+const std::string kVeloxSsdCachePath =
"spark.gluten.sql.columnar.backend.velox.ssdCachePath";
+const std::string kVeloxSsdCachePathDefault = "/tmp/";
+const std::string kVeloxSsdCacheShards =
"spark.gluten.sql.columnar.backend.velox.ssdCacheShards";
+const uint32_t kVeloxSsdCacheShardsDefault = 1;
+const std::string kVeloxSsdCacheIOThreads =
"spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads";
+const uint32_t kVeloxSsdCacheIOThreadsDefault = 1;
+const std::string kVeloxSsdODirectEnabled =
"spark.gluten.sql.columnar.backend.velox.ssdODirect";
+
+// async
+const std::string kVeloxIOThreads =
"spark.gluten.sql.columnar.backend.velox.IOThreads";
+const uint32_t kVeloxIOThreadsDefault = 0;
+const std::string kVeloxAsyncTimeoutOnTaskStopping =
+ "spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping";
+const int32_t kVeloxAsyncTimeoutOnTaskStoppingDefault = 30000; // 30s
+
+// udf
+const std::string kVeloxUdfLibraryPaths =
"spark.gluten.sql.columnar.backend.velox.udfLibraryPaths";
+
+// backtrace allocation
+const std::string kBacktraceAllocation = "spark.gluten.backtrace.allocation";
+
+// VeloxShuffleReader print flag.
+const std::string kVeloxShuffleReaderPrintFlag =
"spark.gluten.velox.shuffleReaderPrintFlag";
+
+const std::string kVeloxFileHandleCacheEnabled =
"spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled";
+const bool kVeloxFileHandleCacheEnabledDefault = false;
+
+/* configs for file read in velox*/
+const std::string kDirectorySizeGuess =
"spark.gluten.sql.columnar.backend.velox.directorySizeGuess";
+const std::string kFilePreloadThreshold =
"spark.gluten.sql.columnar.backend.velox.filePreloadThreshold";
+const std::string kPrefetchRowGroups =
"spark.gluten.sql.columnar.backend.velox.prefetchRowGroups";
+const std::string kLoadQuantum =
"spark.gluten.sql.columnar.backend.velox.loadQuantum";
+const std::string kMaxCoalescedDistanceBytes =
"spark.gluten.sql.columnar.backend.velox.maxCoalescedDistanceBytes";
+const std::string kMaxCoalescedBytes =
"spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes";
+const std::string kCachePrefetchMinPct =
"spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct";
+
+// write fies
+const std::string kMaxPartitions =
"spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
+
+const std::string kGlogVerboseLevel =
"spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
+const uint32_t kGlogVerboseLevelDefault = 0;
+const uint32_t kGlogVerboseLevelMaximum = 99;
+const std::string kGlogSeverityLevel =
"spark.gluten.sql.columnar.backend.velox.glogSeverityLevel";
+const uint32_t kGlogSeverityLevelDefault = 1;
+} // namespace gluten
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]