This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new c5af284db5 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920) 
(#7299)
c5af284db5 is described below

commit c5af284db5ceeaa16ce62375c2a9045ed9de38e0
Author: Kyligence Git <[email protected]>
AuthorDate: Fri Sep 20 09:40:28 2024 -0500

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920) (#7299)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920)
    
    * Fix build due to https://github.com/ClickHouse/ClickHouse/pull/69213
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 cpp-ch/clickhouse.version                          |   4 +-
 cpp-ch/local-engine/Common/CHUtil.cpp              |  13 ++-
 cpp-ch/local-engine/Common/CHUtil.h                |   7 +-
 cpp-ch/local-engine/Common/GlutenConfig.h          |   1 +
 cpp-ch/local-engine/Common/GlutenSettings.cpp      |  41 +++++++
 cpp-ch/local-engine/Common/GlutenSettings.h        |  11 +-
 cpp-ch/local-engine/Common/GlutenSignalHandler.cpp |   2 +
 .../Disks/ObjectStorages/GlutenDiskHDFS.h          |   5 +-
 .../registerGlutenDiskObjectStorage.cpp            |   7 +-
 .../Functions/SparkFunctionGetJsonObject.h         |  16 ++-
 cpp-ch/local-engine/Parser/LocalExecutor.cpp       |   6 +-
 .../Parser/RelParsers/AggregateRelParser.cpp       | 123 ++++++++++++---------
 .../Parser/RelParsers/JoinRelParser.cpp            |   8 +-
 .../Parser/RelParsers/MergeTreeRelParser.cpp       |  14 ++-
 .../Parser/RelParsers/ProjectRelParser.cpp         |  11 +-
 .../Parser/RelParsers/ReadRelParser.cpp            |  12 +-
 .../local-engine/Parser/SerializedPlanParser.cpp   |  13 ++-
 cpp-ch/local-engine/Shuffle/PartitionWriter.h      |   6 +-
 .../local-engine/Storages/Cache/CacheManager.cpp   |   6 +-
 .../Storages/MergeTree/MetaDataHelper.cpp          |  11 +-
 .../Storages/MergeTree/SparkMergeTreeWriter.cpp    |   9 +-
 .../SubstraitSource/ExcelTextFormatFile.cpp        |  29 +++--
 .../Storages/SubstraitSource/ExcelTextFormatFile.h |   9 ++
 .../Storages/SubstraitSource/FormatFile.cpp        |   7 +-
 .../Storages/SubstraitSource/ORCFormatFile.cpp     |  17 +--
 .../Storages/SubstraitSource/ReadBufferBuilder.cpp |  37 +++----
 .../Storages/SubstraitSource/TextFormatFile.cpp    |   1 +
 .../tests/gtest_clickhouse_pr_verify.cpp           |   6 +-
 cpp-ch/local-engine/tests/gtest_write_pipeline.cpp |  20 +++-
 29 files changed, 304 insertions(+), 148 deletions(-)

diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 3ce0469938..32d2acdbdf 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240918
-CH_COMMIT=cc6de0f1995
\ No newline at end of file
+CH_BRANCH=rebase_ch/20240920
+CH_COMMIT=14c2da664d7
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp 
b/cpp-ch/local-engine/Common/CHUtil.cpp
index 5ad0b1b973..fd2030f71c 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -77,6 +77,11 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsUInt64 prefer_external_sort_block_bytes;
+extern const SettingsUInt64 max_bytes_before_external_sort;
+}
 namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
@@ -701,12 +706,12 @@ void 
BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config)
         spark_user = spark_user_c_str;
 }
 
-DB::Field BackendInitializerUtil::toField(const String key, const String value)
+DB::Field BackendInitializerUtil::toField(const String & key, const String & 
value)
 {
     if (BOOL_VALUE_SETTINGS.contains(key))
         return DB::Field(value == "true" || value == "1");
     else if (LONG_VALUE_SETTINGS.contains(key))
-        return DB::Field(std::strtoll(value.c_str(), NULL, 10));
+        return DB::Field(std::strtoll(value.c_str(), nullptr, 10));
     else
         return DB::Field(value);
 }
@@ -797,13 +802,13 @@ void 
BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
         auto task_memory = 
std::stoull(backend_conf_map.at(GLUTEN_TASK_OFFHEAP));
         if (!backend_conf_map.contains(CH_RUNTIME_SETTINGS_PREFIX + 
"max_bytes_before_external_sort"))
         {
-            settings.max_bytes_before_external_sort = static_cast<size_t>(0.8 
* task_memory);
+            settings[Setting::max_bytes_before_external_sort] = 
static_cast<size_t>(0.8 * task_memory);
         }
         if (!backend_conf_map.contains(CH_RUNTIME_SETTINGS_PREFIX + 
"prefer_external_sort_block_bytes"))
         {
             auto mem_gb = task_memory / static_cast<double>(1_GiB);
             // 2.8x+5, Heuristics calculate the block size of external sort, 
[8,16]
-            settings.prefer_external_sort_block_bytes = 
std::max(std::min(static_cast<size_t>(2.8 * mem_gb + 5), 16ul), 8ul) * 1024 * 
1024;
+            settings[Setting::prefer_external_sort_block_bytes] = 
std::max(std::min(static_cast<size_t>(2.8 * mem_gb + 5), 16ul), 8ul) * 1024 * 
1024;
         }
     }
 }
diff --git a/cpp-ch/local-engine/Common/CHUtil.h 
b/cpp-ch/local-engine/Common/CHUtil.h
index 2e0b7266cd..23e319eb56 100644
--- a/cpp-ch/local-engine/Common/CHUtil.h
+++ b/cpp-ch/local-engine/Common/CHUtil.h
@@ -154,7 +154,7 @@ class JNIUtils;
 class BackendInitializerUtil
 {
 public:
-    static DB::Field toField(const String key, const String value);
+    static DB::Field toField(const String & key, const String & value);
 
     /// Initialize two kinds of resources
     /// 1. global level resources like global_context/shared_context, notice 
that they can only be initialized once in process lifetime
@@ -162,11 +162,6 @@ public:
     static void init(const std::string_view plan);
     static void updateConfig(const DB::ContextMutablePtr &, std::string_view);
 
-    // use excel text parser
-    inline static const std::string USE_EXCEL_PARSER = 
"use_excel_serialization";
-    inline static const std::string EXCEL_EMPTY_AS_NULL = 
"use_excel_serialization.empty_as_null";
-    inline static const std::string EXCEL_NUMBER_FORCE = 
"use_excel_serialization.number_force";
-    inline static const std::string EXCEL_QUOTE_STRICT = 
"use_excel_serialization.quote_strict";
     inline static const String CH_BACKEND_PREFIX = 
"spark.gluten.sql.columnar.backend.ch";
 
     inline static const String CH_RUNTIME_CONFIG = "runtime_config";
diff --git a/cpp-ch/local-engine/Common/GlutenConfig.h 
b/cpp-ch/local-engine/Common/GlutenConfig.h
index feded16e46..ab3a6295ae 100644
--- a/cpp-ch/local-engine/Common/GlutenConfig.h
+++ b/cpp-ch/local-engine/Common/GlutenConfig.h
@@ -20,6 +20,7 @@
 #include <Interpreters/Context.h>
 #include <base/types.h>
 #include <base/unit.h>
+#include <Poco/Util/AbstractConfiguration.h>
 #include <Common/logger_useful.h>
 
 namespace local_engine
diff --git a/cpp-ch/local-engine/Common/GlutenSettings.cpp 
b/cpp-ch/local-engine/Common/GlutenSettings.cpp
new file mode 100644
index 0000000000..85e5658226
--- /dev/null
+++ b/cpp-ch/local-engine/Common/GlutenSettings.cpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include "GlutenSettings.h"
+#include <Core/Settings.h>
+
+using namespace DB;
+namespace local_engine
+{
+
+bool tryGetString(const DB::Settings & settings, std::string_view name, 
std::string & value)
+{
+    Field field;
+    if (settings.tryGet(name, field))
+    {
+        value = field.safeGet<String>();
+        return true;
+    }
+    return false;
+}
+bool settingsEqual(const DB::Settings & settings, std::string_view name, const 
std::string & value)
+{
+    if (DB::Field field; settings.tryGet(name, field))
+        return field.safeGet<String>() == value;
+    return false;
+}
+}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Common/GlutenSettings.h 
b/cpp-ch/local-engine/Common/GlutenSettings.h
index 13c0a1327b..d87ff45041 100644
--- a/cpp-ch/local-engine/Common/GlutenSettings.h
+++ b/cpp-ch/local-engine/Common/GlutenSettings.h
@@ -15,8 +15,13 @@
  * limitations under the License.
  */
 #pragma once
+
 #include <Interpreters/Context_fwd.h>
 
+namespace DB
+{
+struct Settings;
+}
 namespace local_engine
 {
 
@@ -59,5 +64,9 @@ namespace local_engine
         LIST_OF_SETTINGS_MACRO(IMPLEMENT_GLUTEN_SET_, SKIP_ALIAS, _) \
     }
 
+// workaround for tryGetString
+
+bool tryGetString(const DB::Settings & settings, std::string_view name, 
std::string & value);
+bool settingsEqual(const DB::Settings & settings, std::string_view name, const 
std::string & value);
 
-}
\ No newline at end of file
+} // namespace local_engine
diff --git a/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp 
b/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
index d04c67d71b..44c43fcb65 100644
--- a/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
+++ b/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
@@ -26,6 +26,8 @@
 #include <base/phdr_cache.h>
 #include <base/sleep.h>
 #include <Poco/Exception.h>
+#include <Poco/Runnable.h>
+#include <Poco/Thread.h>
 #include <Common/CurrentThread.h>
 #include <Common/GlutenSignalHandler.h>
 #include <Common/MemoryTracker.h>
diff --git a/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h 
b/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
index b0f82a340b..68942c6a47 100644
--- a/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
+++ b/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
@@ -19,10 +19,11 @@
 
 #include <config.h>
 
-#include <Common/Throttler.h>
-#include <Disks/ObjectStorages/DiskObjectStorage.h>
 #include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
+#include <Disks/ObjectStorages/DiskObjectStorage.h>
 #include <Interpreters/Cache/FileCacheFactory.h>
+#include <Common/Throttler.h>
+#include <Common/typeid_cast.h>
 #if USE_HDFS
 #include <Disks/ObjectStorages/GlutenHDFSObjectStorage.h>
 #endif
diff --git 
a/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp 
b/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
index 7d4d06a123..b4073d0f39 100644
--- 
a/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
+++ 
b/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
@@ -33,6 +33,11 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsUInt64 hdfs_replication;
+}
+
 namespace ErrorCodes
 {
 extern const int BAD_ARGUMENTS;
@@ -120,7 +125,7 @@ void registerGlutenHDFSObjectStorage(ObjectStorageFactory & 
factory)
 
             std::unique_ptr<HDFSObjectStorageSettings> settings = 
std::make_unique<HDFSObjectStorageSettings>(
                 config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 
1024),
-                context->getSettingsRef().hdfs_replication
+                context->getSettingsRef()[Setting::hdfs_replication]
             );
             return std::make_shared<GlutenHDFSObjectStorage>(uri, 
std::move(settings), config);
         });
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h 
b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
index dfc1e1e328..125ab0a394 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
@@ -18,19 +18,17 @@
 #include <memory>
 #include <string_view>
 #include <Columns/ColumnNullable.h>
+#include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/IDataType.h>
 #include <Functions/FunctionSQLJSON.h>
 #include <Functions/IFunction.h>
-#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
 #include <Functions/JSONPath/Generator/GeneratorJSONPath.h>
 #include <Functions/JSONPath/Parsers/ParserJSONPath.h>
 #include <Interpreters/Context.h>
-#include <Parsers/IAST.h>
 #include <Parsers/IParser.h>
-#include <Parsers/Lexer.h>
 #include <Parsers/TokenIterator.h>
 #include <base/find_symbols.h>
 #include <base/range.h>
@@ -44,6 +42,12 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsBool allow_simdjson;
+extern const SettingsUInt64 max_parser_depth;
+extern const SettingsUInt64 max_parser_backtracks;
+}
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
@@ -526,7 +530,7 @@ public:
         const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & 
/*result_type*/, size_t /*input_rows_count*/) const override
     {
 #if USE_SIMDJSON
-        if (context->getSettingsRef().allow_simdjson)
+        if (context->getSettingsRef()[DB::Setting::allow_simdjson])
         {
             return innerExecuteImpl<
                 DB::SimdJSONParser,
@@ -600,8 +604,8 @@ private:
                 const char * query_begin = reinterpret_cast<const char 
*>(required_fields.back().c_str());
                 const char * query_end = required_fields.back().c_str() + 
required_fields.back().size();
                 DB::Tokens tokens(query_begin, query_end);
-                UInt32 max_parser_depth = 
static_cast<UInt32>(context->getSettingsRef().max_parser_depth);
-                UInt32 max_parser_backtracks = 
static_cast<UInt32>(context->getSettingsRef().max_parser_backtracks);
+                UInt32 max_parser_depth = 
static_cast<UInt32>(context->getSettingsRef()[DB::Setting::max_parser_depth]);
+                UInt32 max_parser_backtracks = 
static_cast<UInt32>(context->getSettingsRef()[DB::Setting::max_parser_backtracks]);
                 DB::IParser::Pos token_iterator(tokens, max_parser_depth, 
max_parser_backtracks);
                 DB::ASTPtr json_path_ast;
                 DB::ParserJSONPath path_parser;
diff --git a/cpp-ch/local-engine/Parser/LocalExecutor.cpp 
b/cpp-ch/local-engine/Parser/LocalExecutor.cpp
index 58c29b53c1..f781556c91 100644
--- a/cpp-ch/local-engine/Parser/LocalExecutor.cpp
+++ b/cpp-ch/local-engine/Parser/LocalExecutor.cpp
@@ -23,6 +23,10 @@
 #include <QueryPipeline/printPipeline.h>
 #include <Common/QueryContext.h>
 
+namespace DB::Setting
+{
+extern const SettingsMaxThreads max_threads;
+}
 using namespace DB;
 namespace local_engine
 {
@@ -123,7 +127,7 @@ void LocalExecutor::execute()
 {
     chassert(query_pipeline_builder);
     push_executor = query_pipeline_builder->execute();
-    
push_executor->execute(local_engine::QueryContext::instance().currentQueryContext()->getSettingsRef().max_threads,
 false);
+    
push_executor->execute(QueryContext::instance().currentQueryContext()->getSettingsRef()[Setting::max_threads],
 false);
 }
 
 Block LocalExecutor::getHeader()
diff --git a/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
index 05f63bfc5b..7313d00df7 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
@@ -33,6 +33,23 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsUInt64 max_bytes_before_external_group_by;
+extern const SettingsBool optimize_group_by_constant_keys;
+extern const SettingsUInt64 min_free_disk_space_for_temporary_data;
+extern const SettingsMaxThreads max_threads;
+extern const SettingsBool empty_result_for_aggregation_by_empty_set;
+extern const SettingsUInt64 group_by_two_level_threshold_bytes;
+extern const SettingsOverflowModeGroupBy group_by_overflow_mode;
+extern const SettingsUInt64 max_rows_to_group_by;
+extern const SettingsBool enable_memory_bound_merging_of_aggregation_results;
+extern const SettingsUInt64 aggregation_in_order_max_block_bytes;
+extern const SettingsUInt64 group_by_two_level_threshold;
+extern const SettingsFloat min_hit_rate_to_use_consecutive_keys_optimization;
+extern const SettingsMaxThreads max_threads;
+extern const SettingsUInt64 max_block_size;
+}
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
@@ -292,13 +309,13 @@ void AggregateRelParser::addMergingAggregatedStep()
         grouping_keys,
         aggregate_descriptions,
         false,
-        settings.max_threads,
-        PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
-        settings.min_hit_rate_to_use_consecutive_keys_optimization);
+        settings[Setting::max_threads],
+        
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
+        settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization]);
     auto config = StreamingAggregateConfig::loadFromContext(getContext());
     if (config.enable_streaming_aggregating)
     {
-        params.group_by_two_level_threshold = 
settings.group_by_two_level_threshold;
+        params.group_by_two_level_threshold = 
settings[Setting::group_by_two_level_threshold];
         auto merging_step = 
std::make_unique<GraceMergingAggregatedStep>(getContext(), 
plan->getCurrentDataStream(), params, false);
         steps.emplace_back(merging_step.get());
         plan->addStep(std::move(merging_step));
@@ -316,10 +333,10 @@ void AggregateRelParser::addMergingAggregatedStep()
             1,
             1,
             false,
-            settings.max_block_size,
-            settings.aggregation_in_order_max_block_bytes,
+            settings[Setting::max_block_size],
+            settings[Setting::aggregation_in_order_max_block_bytes],
             SortDescription(),
-            settings.enable_memory_bound_merging_of_aggregation_results);
+            
settings[Setting::enable_memory_bound_merging_of_aggregation_results]);
         steps.emplace_back(merging_step.get());
         plan->addStep(std::move(merging_step));
     }
@@ -337,22 +354,22 @@ void AggregateRelParser::addCompleteModeAggregatedStep()
             grouping_keys,
             aggregate_descriptions,
             false,
-            settings.max_rows_to_group_by,
-            settings.group_by_overflow_mode,
-            settings.group_by_two_level_threshold,
-            settings.group_by_two_level_threshold_bytes,
-            0, /*settings.max_bytes_before_external_group_by*/
-            settings.empty_result_for_aggregation_by_empty_set,
+            settings[Setting::max_rows_to_group_by],
+            settings[Setting::group_by_overflow_mode],
+            settings[Setting::group_by_two_level_threshold],
+            settings[Setting::group_by_two_level_threshold_bytes],
+            0, /*settings[Setting::max_bytes_before_external_group_by]*/
+            settings[Setting::empty_result_for_aggregation_by_empty_set],
             getContext()->getTempDataOnDisk(),
-            settings.max_threads,
-            settings.min_free_disk_space_for_temporary_data,
+            settings[Setting::max_threads],
+            settings[Setting::min_free_disk_space_for_temporary_data],
             true,
             3,
-            PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+            
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
             /*enable_prefetch*/ true,
             /*only_merge*/ false,
-            settings.optimize_group_by_constant_keys,
-            settings.min_hit_rate_to_use_consecutive_keys_optimization,
+            settings[Setting::optimize_group_by_constant_keys],
+            
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
             /*StatsCollectingParams*/{});
         auto merging_step = 
std::make_unique<GraceMergingAggregatedStep>(getContext(), 
plan->getCurrentDataStream(), params, true);
         steps.emplace_back(merging_step.get());
@@ -364,22 +381,22 @@ void AggregateRelParser::addCompleteModeAggregatedStep()
             grouping_keys,
             aggregate_descriptions,
             false,
-            settings.max_rows_to_group_by,
-            settings.group_by_overflow_mode,
-            settings.group_by_two_level_threshold,
-            settings.group_by_two_level_threshold_bytes,
-            settings.max_bytes_before_external_group_by,
-            settings.empty_result_for_aggregation_by_empty_set,
+            settings[Setting::max_rows_to_group_by],
+            settings[Setting::group_by_overflow_mode],
+            settings[Setting::group_by_two_level_threshold],
+            settings[Setting::group_by_two_level_threshold_bytes],
+            settings[Setting::max_bytes_before_external_group_by],
+            settings[Setting::empty_result_for_aggregation_by_empty_set],
             getContext()->getTempDataOnDisk(),
-            settings.max_threads,
-            settings.min_free_disk_space_for_temporary_data,
+            settings[Setting::max_threads],
+            settings[Setting::min_free_disk_space_for_temporary_data],
             true,
             3,
-            PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+            
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
             /*enable_prefetch*/ true,
             /*only_merge*/ false,
-            settings.optimize_group_by_constant_keys,
-            settings.min_hit_rate_to_use_consecutive_keys_optimization,
+            settings[Setting::optimize_group_by_constant_keys],
+            
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
             /*StatsCollectingParams*/{});
 
         auto aggregating_step = std::make_unique<AggregatingStep>(
@@ -387,8 +404,8 @@ void AggregateRelParser::addCompleteModeAggregatedStep()
             params,
             GroupingSetsParamsList(),
             true,
-            settings.max_block_size,
-            settings.aggregation_in_order_max_block_bytes,
+            settings[Setting::max_block_size],
+            settings[Setting::aggregation_in_order_max_block_bytes],
             1,
             1,
             false,
@@ -422,22 +439,22 @@ void AggregateRelParser::addAggregatingStep()
             grouping_keys,
             aggregate_descriptions,
             false,
-            settings.max_rows_to_group_by,
-            settings.group_by_overflow_mode,
-            settings.group_by_two_level_threshold,
+            settings[Setting::max_rows_to_group_by],
+            settings[Setting::group_by_overflow_mode],
+            settings[Setting::group_by_two_level_threshold],
             0, // group_by_two_level_threshold_bytes
             0,
-            settings.empty_result_for_aggregation_by_empty_set,
+            settings[Setting::empty_result_for_aggregation_by_empty_set],
             nullptr,
-            settings.max_threads,
-            settings.min_free_disk_space_for_temporary_data,
+            settings[Setting::max_threads],
+            settings[Setting::min_free_disk_space_for_temporary_data],
             true,
             3,
-            PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+            
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
             /*enable_prefetch*/ true,
             /*only_merge*/ false,
-            settings.optimize_group_by_constant_keys,
-            settings.min_hit_rate_to_use_consecutive_keys_optimization,
+            settings[Setting::optimize_group_by_constant_keys],
+            
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
             /*StatsCollectingParams*/{});
         auto aggregating_step = 
std::make_unique<StreamingAggregatingStep>(getContext(), 
plan->getCurrentDataStream(), params);
         steps.emplace_back(aggregating_step.get());
@@ -449,22 +466,22 @@ void AggregateRelParser::addAggregatingStep()
             grouping_keys,
             aggregate_descriptions,
             false,
-            settings.max_rows_to_group_by,
-            settings.group_by_overflow_mode,
-            settings.group_by_two_level_threshold,
-            settings.group_by_two_level_threshold_bytes,
-            settings.max_bytes_before_external_group_by,
-            settings.empty_result_for_aggregation_by_empty_set,
+            settings[Setting::max_rows_to_group_by],
+            settings[Setting::group_by_overflow_mode],
+            settings[Setting::group_by_two_level_threshold],
+            settings[Setting::group_by_two_level_threshold_bytes],
+            settings[Setting::max_bytes_before_external_group_by],
+            settings[Setting::empty_result_for_aggregation_by_empty_set],
             getContext()->getTempDataOnDisk(),
-            settings.max_threads,
-            settings.min_free_disk_space_for_temporary_data,
+            settings[Setting::max_threads],
+            settings[Setting::min_free_disk_space_for_temporary_data],
             true,
             3,
-            PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+            
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
             /*enable_prefetch*/ true,
             /*only_merge*/ false,
-            settings.optimize_group_by_constant_keys,
-            settings.min_hit_rate_to_use_consecutive_keys_optimization,
+            settings[Setting::optimize_group_by_constant_keys],
+            
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
             /*StatsCollectingParams*/{});
 
         auto aggregating_step = std::make_unique<AggregatingStep>(
@@ -472,8 +489,8 @@ void AggregateRelParser::addAggregatingStep()
             params,
             GroupingSetsParamsList(),
             false,
-            settings.max_block_size,
-            settings.aggregation_in_order_max_block_bytes,
+            settings[Setting::max_block_size],
+            settings[Setting::aggregation_in_order_max_block_bytes],
             1,
             1,
             false,
diff --git a/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
index a5ed605ed0..cc70b8e0f9 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
@@ -43,6 +43,10 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsJoinAlgorithm join_algorithm;
+}
 namespace ErrorCodes
 {
 extern const int LOGICAL_ERROR;
@@ -329,7 +333,7 @@ DB::QueryPlanPtr JoinRelParser::parseJoin(const 
substrait::JoinRel & join, DB::Q
         }
 
         JoinPtr smj_join = std::make_shared<FullSortingMergeJoin>(table_join, 
right->getCurrentDataStream().header.cloneEmpty(), -1);
-        MultiEnum<DB::JoinAlgorithm> join_algorithm = 
context->getSettingsRef().join_algorithm;
+        MultiEnum<DB::JoinAlgorithm> join_algorithm = 
context->getSettingsRef()[Setting::join_algorithm];
         QueryPlanStepPtr join_step
             = std::make_unique<DB::JoinStep>(left->getCurrentDataStream(), 
right->getCurrentDataStream(), smj_join, 8192, 1, false);
 
@@ -825,7 +829,7 @@ DB::QueryPlanPtr JoinRelParser::buildSingleOnClauseHashJoin(
     ///   data will be spilled to disk. Don't set the limitation too small, 
otherwise the buckets number
     ///   will be too large and the performance will be bad.
     JoinPtr hash_join = nullptr;
-    MultiEnum<DB::JoinAlgorithm> join_algorithm = 
context->getSettingsRef().join_algorithm;
+    MultiEnum<DB::JoinAlgorithm> join_algorithm = 
context->getSettingsRef()[Setting::join_algorithm];
     if (join_algorithm.isSet(DB::JoinAlgorithm::GRACE_HASH))
     {
         hash_join = std::make_shared<GraceHashJoin>(
diff --git a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
index e467042bda..adafc97ed8 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
@@ -16,17 +16,23 @@
  */
 
 #include "MergeTreeRelParser.h"
+#include <Core/Settings.h>
 #include <Parser/FunctionParser.h>
+#include <Parser/InputFileNameParser.h>
 #include <Parser/SubstraitParserUtils.h>
 #include <Parser/TypeParser.h>
 #include <Storages/MergeTree/StorageMergeTreeFactory.h>
 #include <google/protobuf/wrappers.pb.h>
 #include <Poco/StringTokenizer.h>
 #include <Common/CHUtil.h>
-#include <Parser/InputFileNameParser.h>
+#include <Common/GlutenSettings.h>
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsUInt64 max_block_size;
+}
 namespace ErrorCodes
 {
 extern const int NO_SUCH_DATA_PART;
@@ -126,7 +132,7 @@ DB::QueryPlanPtr MergeTreeRelParser::parseReadRel(
         storage_snapshot,
         *query_info,
         context,
-        context->getSettingsRef().max_block_size,
+        context->getSettingsRef()[Setting::max_block_size],
         1);
 
     auto * source_step_with_filter = static_cast<SourceStepWithFilter 
*>(read_step.get());
@@ -138,7 +144,7 @@ DB::QueryPlanPtr MergeTreeRelParser::parseReadRel(
 
     auto ranges = merge_tree_table.extractRange(selected_parts);
     std::string ret;
-    if 
(context->getSettingsRef().tryGetString("enabled_driver_filter_mergetree_index",
 ret) && ret == "'true'")
+    if (settingsEqual(context->getSettingsRef(), 
"enabled_driver_filter_mergetree_index", "true"))
         
SparkStorageMergeTree::analysisPartsByRanges(*reinterpret_cast<ReadFromMergeTree
 *>(read_step.get()), ranges);
     else
         
SparkStorageMergeTree::wrapRangesInDataParts(*reinterpret_cast<ReadFromMergeTree
 *>(read_step.get()), ranges);
@@ -368,7 +374,7 @@ String MergeTreeRelParser::filterRangesOnDriver(const 
substrait::ReadRel & read_
         storage_snapshot,
         *query_info,
         context,
-        context->getSettingsRef().max_block_size,
+        context->getSettingsRef()[Setting::max_block_size],
         10); // TODO: Expect use driver cores.
 
     auto * read_from_mergetree = static_cast<ReadFromMergeTree 
*>(read_step.get());
diff --git a/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
index ed040a3626..637c4361f5 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
@@ -18,12 +18,19 @@
 
 #include <Interpreters/ArrayJoin.h>
 #include <Operator/EmptyProjectStep.h>
+#include <Operator/ReplicateRowsStep.h>
 #include <Processors/QueryPlan/ArrayJoinStep.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Rewriter/ExpressionRewriter.h>
 #include <Common/CHUtil.h>
-#include <Operator/ReplicateRowsStep.h>
 
+namespace DB
+{
+namespace Setting
+{
+extern const SettingsUInt64 max_block_size;
+}
+}
 using namespace DB;
 
 namespace local_engine
@@ -204,7 +211,7 @@ ProjectRelParser::parseGenerate(DB::QueryPlanPtr 
query_plan, const substrait::Re
         array_join.columns = std::move(array_joined_columns);
         array_join.is_left = generate_rel.outer();
         auto array_join_step = std::make_unique<ArrayJoinStep>(
-            query_plan->getCurrentDataStream(), std::move(array_join), false, 
getContext()->getSettingsRef().max_block_size);
+            query_plan->getCurrentDataStream(), std::move(array_join), false, 
getContext()->getSettingsRef()[Setting::max_block_size]);
         array_join_step->setStepDescription("ARRAY JOIN In Generate");
         steps.emplace_back(array_join_step.get());
         query_plan->addStep(std::move(array_join_step));
diff --git a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
index 1f623ced57..e49a51b6c7 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
@@ -32,11 +32,17 @@
 #include <Common/BlockTypeUtils.h>
 
 
-namespace DB::ErrorCodes
+namespace DB
+{
+namespace Setting
+{
+extern const SettingsMaxThreads max_threads;
+}
+namespace ErrorCodes
 {
 extern const int LOGICAL_ERROR;
 }
-
+}
 namespace local_engine
 {
 DB::QueryPlanPtr ReadRelParser::parse(DB::QueryPlanPtr query_plan, const 
substrait::Rel & rel, std::list<const substrait::Rel *> &)
@@ -56,7 +62,7 @@ DB::QueryPlanPtr ReadRelParser::parse(DB::QueryPlanPtr 
query_plan, const substra
         steps.emplace_back(read_step.get());
         query_plan->addStep(std::move(read_step));
 
-        if (getContext()->getSettingsRef().max_threads > 1)
+        if (getContext()->getSettingsRef()[Setting::max_threads] > 1)
         {
             auto buffer_step = 
std::make_unique<BlocksBufferPoolStep>(query_plan->getCurrentDataStream());
             steps.emplace_back(buffer_step.get());
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index c1f00c3348..75ba2a1152 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -88,6 +88,11 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsBool query_plan_enable_optimizations;
+extern const SettingsUInt64 priority;
+}
 namespace ErrorCodes
 {
 extern const int LOGICAL_ERROR;
@@ -418,7 +423,7 @@ QueryPlanPtr SerializedPlanParser::parseOp(const 
substrait::Rel & rel, std::list
 
     std::vector<DB::IQueryPlanStep *> steps = rel_parser->getSteps();
 
-    if (!context->getSettingsRef().query_plan_enable_optimizations)
+    if (!context->getSettingsRef()[Setting::query_plan_enable_optimizations])
     {
         if (rel.rel_type_case() == substrait::Rel::RelTypeCase::kRead)
         {
@@ -1177,12 +1182,12 @@ DB::QueryPipelineBuilderPtr 
SerializedPlanParser::buildQueryPipeline(DB::QueryPl
         context,
         "",
         context->getClientInfo(),
-        priorities.insert(settings.priority),
+        priorities.insert(settings[Setting::priority]),
         CurrentThread::getGroup(),
         IAST::QueryKind::Select,
         settings,
         0);
-    const QueryPlanOptimizationSettings optimization_settings{.optimize_plan = 
settings.query_plan_enable_optimizations};
+    const QueryPlanOptimizationSettings optimization_settings{.optimize_plan = 
settings[Setting::query_plan_enable_optimizations]};
     return query_plan.buildQueryPipeline(
         optimization_settings,
         BuildQueryPipelineSettings{
@@ -1213,7 +1218,7 @@ std::unique_ptr<LocalExecutor> 
SerializedPlanParser::createExecutor(DB::QueryPla
     auto * logger = &Poco::Logger::get("SerializedPlanParser");
     LOG_INFO(logger, "build pipeline {} ms", stopwatch.elapsedMicroseconds() / 
1000.0);
     LOG_DEBUG(
-        logger, "clickhouse plan [optimization={}]:\n{}", 
settings.query_plan_enable_optimizations, PlanUtil::explainPlan(*query_plan));
+        logger, "clickhouse plan [optimization={}]:\n{}", 
settings[Setting::query_plan_enable_optimizations], 
PlanUtil::explainPlan(*query_plan));
 
     auto config = ExecutorConfig::loadFromContext(context);
     return std::make_unique<LocalExecutor>(std::move(query_plan), 
std::move(builder), config.dump_pipeline);
diff --git a/cpp-ch/local-engine/Shuffle/PartitionWriter.h 
b/cpp-ch/local-engine/Shuffle/PartitionWriter.h
index 80bb43b8d1..fa388d08ba 100644
--- a/cpp-ch/local-engine/Shuffle/PartitionWriter.h
+++ b/cpp-ch/local-engine/Shuffle/PartitionWriter.h
@@ -29,6 +29,10 @@
 namespace DB
 {
 class MergingSortedAlgorithm;
+namespace Setting
+{
+extern const SettingsUInt64 prefer_external_sort_block_bytes;
+}
 }
 
 namespace local_engine
@@ -156,7 +160,7 @@ protected:
     {
         max_merge_block_size = options.split_size;
         max_sort_buffer_size = options.max_sort_buffer_size;
-        max_merge_block_bytes = 
QueryContext::globalContext()->getSettingsRef().prefer_external_sort_block_bytes;
+        max_merge_block_bytes = 
QueryContext::globalContext()->getSettingsRef()[DB::Setting::prefer_external_sort_block_bytes];
     }
 public:
     String getName() const override { return "SortBasedPartitionWriter"; }
diff --git a/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp 
b/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
index daa3c0e305..25726dc24f 100644
--- a/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
+++ b/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
@@ -35,6 +35,10 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsUInt64 max_block_size;
+}
 namespace ErrorCodes
 {
 extern const int INVALID_STATE;
@@ -107,7 +111,7 @@ Task CacheManager::cachePart(const MergeTreeTableInstance & 
table, const MergeTr
                 storage_snapshot,
                 *query_info,
                 context,
-                context->getSettingsRef().max_block_size,
+                context->getSettingsRef()[Setting::max_block_size],
                 1);
             QueryPlan plan;
             plan.addStep(std::move(read_step));
diff --git a/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp 
b/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
index 621374377d..0e6f5b102f 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
@@ -31,6 +31,11 @@ extern const Metric LocalThreadScheduled;
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsSeconds lock_acquire_timeout;
+extern const SettingsMaxThreads max_threads;
+}
 namespace ErrorCodes
 {
 extern const int NOT_IMPLEMENTED;
@@ -89,7 +94,7 @@ void restoreMetaData<ROCKSDB>(const SparkStorageMergeTreePtr 
& storage, const Me
             not_exists_part.emplace(part);
     }
 
-    if (auto lock = 
storage->lockForAlter(context.getSettingsRef().lock_acquire_timeout))
+    if (auto lock = 
storage->lockForAlter(context.getSettingsRef()[Setting::lock_acquire_timeout]))
     {
         // put this return clause in lockForAlter
         // so that it will not return until other thread finishes restoring
@@ -140,7 +145,7 @@ void restoreMetaData<LOCAL>(
             not_exists_part.emplace(part);
     }
 
-    if (auto lock = 
storage->lockForAlter(context.getSettingsRef().lock_acquire_timeout))
+    if (auto lock = 
storage->lockForAlter(context.getSettingsRef()[Setting::lock_acquire_timeout]))
     {
         // put this return clause in lockForAlter
         // so that it will not return until other thread finishes restoring
@@ -148,7 +153,7 @@ void restoreMetaData<LOCAL>(
             return;
 
         // Increase the speed of metadata recovery
-        auto max_concurrency = std::max(10UL, 
QueryContext::globalContext()->getSettingsRef().max_threads.value);
+        auto max_concurrency = std::max(10UL, 
QueryContext::globalContext()->getSettingsRef()[Setting::max_threads].value);
         auto max_threads = std::min(max_concurrency, not_exists_part.size());
         FreeThreadPool thread_pool(
             CurrentMetrics::LocalThread,
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp 
b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
index 1ede4960aa..3a55c965b6 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
@@ -27,6 +27,11 @@
 #include <Common/CHUtil.h>
 #include <Common/QueryContext.h>
 
+namespace DB::Setting
+{
+extern const SettingsUInt64 min_insert_block_size_rows;
+extern const SettingsUInt64 min_insert_block_size_bytes;
+}
 using namespace DB;
 namespace
 {
@@ -62,9 +67,9 @@ std::unique_ptr<SparkMergeTreeWriter> 
SparkMergeTreeWriter::create(
     auto sink = dest_storage->write(none, metadata_snapshot, context, false);
     chain.addSink(sink);
     chain.addSource(
-        std::make_shared<ApplySquashingTransform>(header, 
settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes));
+        std::make_shared<ApplySquashingTransform>(header, 
settings[Setting::min_insert_block_size_rows], 
settings[Setting::min_insert_block_size_bytes]));
     chain.addSource(
-        std::make_shared<PlanSquashingTransform>(header, 
settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes));
+        std::make_shared<PlanSquashingTransform>(header, 
settings[Setting::min_insert_block_size_rows], 
settings[Setting::min_insert_block_size_bytes]));
 
     std::unordered_map<String, String> partition_values;
     if (!write_settings_.partition_dir.empty())
diff --git 
a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
index 3bb73a856e..7e87d11dc7 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
@@ -17,11 +17,12 @@
 #include "ExcelTextFormatFile.h"
 #include <memory>
 #include <string>
-
 #include <Columns/ColumnNullable.h>
+#include <Core/Settings.h>
 #include <DataTypes/DataTypeDecimalBase.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
+#include <Formats/FormatFactory.h>
 #include <Formats/FormatSettings.h>
 #include <IO/PeekableReadBuffer.h>
 #include <Processors/Formats/IRowInputFormat.h>
@@ -29,7 +30,7 @@
 #include <Storages/Serializations/ExcelDecimalSerialization.h>
 #include <Storages/Serializations/ExcelSerialization.h>
 #include <Storages/Serializations/ExcelStringReader.h>
-#include <Common/CHUtil.h>
+#include <Common/GlutenSettings.h>
 
 namespace DB
 {
@@ -56,6 +57,11 @@ void skipErrorChars(DB::ReadBuffer & buf, bool has_quote, 
char quote, String & e
             ++buf.position();
 }
 
+bool ExcelTextFormatFile::useThis(const DB::ContextPtr & context)
+{
+    return settingsEqual(context->getSettingsRef(), USE_EXCEL_PARSER, "true");
+}
+
 FormatFile::InputFormatPtr ExcelTextFormatFile::createInputFormat(const 
DB::Block & header)
 {
     auto res = std::make_shared<FormatFile::InputFormat>();
@@ -99,15 +105,15 @@ DB::FormatSettings 
ExcelTextFormatFile::createFormatSettings()
         format_settings.csv.null_representation = 
file_info.text().null_value();
 
     bool empty_as_null = true;
-    if 
(context->getSettingsRef().has(BackendInitializerUtil::EXCEL_EMPTY_AS_NULL))
-        empty_as_null = 
context->getSettingsRef().getString(BackendInitializerUtil::EXCEL_EMPTY_AS_NULL)
 == "'true'";
+    if (context->getSettingsRef().has(EXCEL_EMPTY_AS_NULL))
+        empty_as_null = settingsEqual(context->getSettingsRef(), 
EXCEL_EMPTY_AS_NULL, "true");
+
+    format_settings.try_infer_integers = false;
+    if (!context->getSettingsRef().has(EXCEL_NUMBER_FORCE))
+        format_settings.try_infer_integers = true;
 
-    format_settings.try_infer_integers = 0;
-    if 
(!context->getSettingsRef().has(BackendInitializerUtil::EXCEL_NUMBER_FORCE))
-        format_settings.try_infer_integers = 1;
-    if 
(context->getSettingsRef().has(BackendInitializerUtil::EXCEL_NUMBER_FORCE)
-        && 
context->getSettingsRef().getString(BackendInitializerUtil::EXCEL_NUMBER_FORCE) 
== "'true'")
-        format_settings.try_infer_integers = 1;
+    if (settingsEqual(context->getSettingsRef(), EXCEL_NUMBER_FORCE, "true"))
+        format_settings.try_infer_integers = true;
 
     if (format_settings.csv.null_representation.empty() || empty_as_null)
         format_settings.csv.empty_as_default = true;
@@ -131,8 +137,7 @@ DB::FormatSettings 
ExcelTextFormatFile::createFormatSettings()
     {
         format_settings.csv.allow_single_quotes = false;
 
-        if 
(context->getSettingsRef().has(BackendInitializerUtil::EXCEL_QUOTE_STRICT)
-            && 
context->getSettingsRef().getString(BackendInitializerUtil::EXCEL_QUOTE_STRICT) 
== "'true'")
+        if (settingsEqual(context->getSettingsRef(), EXCEL_QUOTE_STRICT, 
"true"))
             format_settings.csv.allow_double_quotes = false;
         else
             format_settings.csv.allow_double_quotes = true;
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h 
b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
index 4ec69f33b9..6fc4183de0 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
@@ -29,6 +29,15 @@ namespace local_engine
 
 class ExcelTextFormatFile : public FormatFile
 {
+    // use excel text parser
+    static constexpr std::string_view USE_EXCEL_PARSER = 
"use_excel_serialization";
+    static constexpr std::string_view EXCEL_EMPTY_AS_NULL = 
"use_excel_serialization.empty_as_null";
+    static constexpr std::string_view EXCEL_NUMBER_FORCE = 
"use_excel_serialization.number_force";
+    static constexpr std::string_view EXCEL_QUOTE_STRICT = 
"use_excel_serialization.quote_strict";
+
+public:
+    static bool useThis(const DB::ContextPtr & context);
+
 public:
     explicit ExcelTextFormatFile(
         DB::ContextPtr context_, const 
substrait::ReadRel::LocalFiles::FileOrFiles & file_info_, ReadBufferBuilderPtr 
read_buffer_builder_)
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
index 4499a9a559..42b399abe4 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
@@ -18,7 +18,6 @@
 
 #include <memory>
 #include <IO/ReadBufferFromFile.h>
-#include <Common/CHUtil.h>
 #include <Common/Exception.h>
 #include <Common/GlutenStringUtils.h>
 #include <Common/logger_useful.h>
@@ -36,8 +35,8 @@
 #include <Storages/SubstraitSource/TextFormatFile.h>
 #endif
 
-#include <Common/GlutenConfig.h>
 #include <Storages/SubstraitSource/JSONFormatFile.h>
+#include <Common/GlutenConfig.h>
 
 namespace DB
 {
@@ -91,8 +90,7 @@ FormatFilePtr FormatFileUtil::createFile(
 #if USE_HIVE
     if (file.has_text())
     {
-        if 
(context->getSettingsRef().has(BackendInitializerUtil::USE_EXCEL_PARSER)
-            && 
context->getSettingsRef().getString(BackendInitializerUtil::USE_EXCEL_PARSER) 
== "'true'")
+        if (ExcelTextFormatFile::useThis(context))
             return std::make_shared<ExcelTextFormatFile>(context, file, 
read_buffer_builder);
         else
             return std::make_shared<TextFormatFile>(context, file, 
read_buffer_builder);
@@ -102,6 +100,5 @@ FormatFilePtr FormatFileUtil::createFile(
     if (file.has_json())
         return std::make_shared<JSONFormatFile>(context, file, 
read_buffer_builder);
     throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Format not 
supported:{}", file.DebugString());
-    __builtin_unreachable();
 }
 }
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
index 66556e237f..4751088df5 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
@@ -17,14 +17,15 @@
 #include "ORCFormatFile.h"
 
 #if USE_ORC
-#    include <memory>
-#    include <numeric>
-#    include <Formats/FormatFactory.h>
-#    include <IO/SeekableReadBuffer.h>
-#    include <Processors/Formats/Impl/ArrowBufferedStreams.h>
-#    include <Processors/Formats/Impl/NativeORCBlockInputFormat.h>
-#    include <Storages/SubstraitSource/OrcUtil.h>
-#    include <Common/CHUtil.h>
+#include <memory>
+#include <numeric>
+#include <Formats/FormatFactory.h>
+#include <IO/SeekableReadBuffer.h>
+#include <Processors/Formats/Impl/ArrowBufferedStreams.h>
+#include <Processors/Formats/Impl/NativeORCBlockInputFormat.h>
+#include <Storages/SubstraitSource/OrcUtil.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/CHUtil.h>
 
 namespace local_engine
 {
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
index b32073db53..b1884af1d4 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
@@ -49,6 +49,7 @@
 #include <Common/CHUtil.h>
 #include <Common/FileCacheConcurrentMap.h>
 #include <Common/GlutenConfig.h>
+#include <Common/GlutenSettings.h>
 #include <Common/logger_useful.h>
 #include <Common/safe_cast.h>
 
@@ -66,6 +67,12 @@
 
 namespace DB
 {
+namespace Setting
+{
+extern const SettingsUInt64 s3_max_redirects;
+extern const SettingsBool s3_disable_checksum;
+extern const SettingsUInt64 s3_retry_attempts;
+}
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
@@ -511,13 +518,7 @@ private:
     static FileCacheConcurrentMap files_cache_time_map;
     DB::FileCachePtr file_cache;
 
-    std::string & stripQuote(std::string & s)
-    {
-        s.erase(remove(s.begin(), s.end(), '\''), s.end());
-        return s;
-    }
-
-    std::string toBucketNameSetting(const std::string & bucket_name, const 
std::string & config_name)
+    static std::string toBucketNameSetting(const std::string & bucket_name, 
const std::string & config_name)
     {
         if (!config_name.starts_with(BackendInitializerUtil::S3A_PREFIX))
         {
@@ -529,7 +530,7 @@ private:
             + config_name.substr(BackendInitializerUtil::S3A_PREFIX.size());
     }
 
-    std::string getSetting(
+    static std::string getSetting(
         const DB::Settings & settings,
         const std::string & bucket_name,
         const std::string & config_name,
@@ -538,11 +539,11 @@ private:
     {
         std::string ret;
         // if there's a bucket specific config, prefer it to non per bucket 
config
-        if (settings.tryGetString(toBucketNameSetting(bucket_name, 
config_name), ret))
-            return stripQuote(ret);
+        if (tryGetString(settings, toBucketNameSetting(bucket_name, 
config_name), ret))
+            return ret;
 
-        if (!require_per_bucket && settings.tryGetString(config_name, ret))
-            return stripQuote(ret);
+        if (!require_per_bucket && tryGetString(settings, config_name, ret))
+            return ret;
 
         return default_value;
     }
@@ -614,8 +615,8 @@ private:
         DB::S3::PocoHTTPClientConfiguration client_configuration = 
DB::S3::ClientFactory::instance().createClientConfiguration(
             region_name,
             context->getRemoteHostFilter(),
-            static_cast<unsigned>(context->getSettingsRef().s3_max_redirects),
-            static_cast<unsigned>(context->getSettingsRef().s3_retry_attempts),
+            
static_cast<unsigned>(context->getSettingsRef()[DB::Setting::s3_max_redirects]),
+            
static_cast<unsigned>(context->getSettingsRef()[DB::Setting::s3_retry_attempts]),
             false,
             false,
             nullptr,
@@ -631,15 +632,13 @@ private:
 
         std::string ak;
         std::string sk;
-        settings.tryGetString(BackendInitializerUtil::HADOOP_S3_ACCESS_KEY, 
ak);
-        settings.tryGetString(BackendInitializerUtil::HADOOP_S3_SECRET_KEY, 
sk);
-        stripQuote(ak);
-        stripQuote(sk);
+        tryGetString(settings, BackendInitializerUtil::HADOOP_S3_ACCESS_KEY, 
ak);
+        tryGetString(settings, BackendInitializerUtil::HADOOP_S3_SECRET_KEY, 
sk);
         const DB::Settings & global_settings = 
context->getGlobalContext()->getSettingsRef();
         const DB::Settings & local_settings = context->getSettingsRef();
         DB::S3::ClientSettings client_settings{
             .use_virtual_addressing = false,
-            .disable_checksum = local_settings.s3_disable_checksum,
+            .disable_checksum = 
local_settings[DB::Setting::s3_disable_checksum],
             .gcs_issue_compose_request = 
context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
         };
         if (use_assumed_role)
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
index 5cae962a7f..b22f883f5d 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
@@ -18,6 +18,7 @@
 
 #include <memory>
 
+#include <Formats/FormatFactory.h>
 #include <Formats/FormatSettings.h>
 #include <Processors/Formats/Impl/HiveTextRowInputFormat.h>
 #include <Poco/URI.h>
diff --git a/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp 
b/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
index b44fe5eae0..44ba371029 100644
--- a/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
+++ b/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
@@ -25,6 +25,10 @@
 #include <Common/DebugUtils.h>
 #include <Common/QueryContext.h>
 
+namespace DB::Setting
+{
+extern const SettingsBool enable_named_columns_in_function_tuple;
+}
 using namespace local_engine;
 
 using namespace DB;
@@ -36,7 +40,7 @@ TEST(Clickhouse, PR54881)
     const auto context1 = 
DB::Context::createCopy(QueryContext::globalContext());
     // context1->setSetting("enable_named_columns_in_function_tuple", 
DB::Field(true));
     auto settings = context1->getSettingsRef();
-    EXPECT_FALSE(settings.enable_named_columns_in_function_tuple) << "GLUTEN 
NEED set enable_named_columns_in_function_tuple to false";
+    EXPECT_FALSE(settings[Setting::enable_named_columns_in_function_tuple]) << 
"GLUTEN NEED set enable_named_columns_in_function_tuple to false";
 
     constexpr std::string_view split_template
         = 
R"({"items":[{"uriFile":"{replace_local_files}","partitionIndex":"0","length":"1529","parquet":{},"schema":{},"metadataColumns":[{}]}]})";
diff --git a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp 
b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
index 18a18b0e2c..b6293137d6 100644
--- a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
+++ b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
@@ -47,6 +47,16 @@
 #include <Common/DebugUtils.h>
 #include <Common/QueryContext.h>
 
+namespace DB::Setting
+{
+extern const SettingsUInt64 max_parser_depth;
+extern const SettingsUInt64 max_parser_backtracks;
+extern const SettingsBool allow_settings_after_format_in_insert;
+extern const SettingsUInt64 max_query_size;
+extern const SettingsUInt64 min_insert_block_size_rows;
+extern const SettingsUInt64 min_insert_block_size_bytes;
+}
+
 using namespace local_engine;
 using namespace DB;
 
@@ -85,8 +95,8 @@ TEST(LocalExecutor, StorageObjectStorageSink)
         "QUERY TEST",
         /* allow_multi_statements = */ false,
         0,
-        settings.max_parser_depth,
-        settings.max_parser_backtracks,
+        settings[Setting::max_parser_depth],
+        settings[Setting::max_parser_backtracks],
         true);
     auto & create = ast->as<ASTCreateQuery &>();
     auto arg = create.storage->children[0];
@@ -312,9 +322,9 @@ TEST(WritePipeline, MergeTree)
 
     const char * begin = query.data();
     const char * end = query.data() + query.size();
-    ParserQuery parser(end, settings.allow_settings_after_format_in_insert);
+    ParserQuery parser(end, 
settings[Setting::allow_settings_after_format_in_insert]);
 
-    ASTPtr ast = parseQuery(parser, begin, end, "", settings.max_query_size, 
settings.max_parser_depth, settings.max_parser_backtracks);
+    ASTPtr ast = parseQuery(parser, begin, end, "", 
settings[Setting::max_query_size], settings[Setting::max_parser_depth], 
settings[Setting::max_parser_backtracks]);
 
     EXPECT_TRUE(ast->as<ASTCreateQuery>());
     auto & create = ast->as<ASTCreateQuery &>();
@@ -361,7 +371,7 @@ TEST(WritePipeline, MergeTree)
         std::move(storage_settings));
 
     Block header{{INT(), "id"}, {STRING(), "Name"}, {makeNullable(INT()), 
"Age"}};
-    DB::Squashing squashing(header, settings.min_insert_block_size_rows, 
settings.min_insert_block_size_bytes);
+    DB::Squashing squashing(header, 
settings[Setting::min_insert_block_size_rows], 
settings[Setting::min_insert_block_size_bytes]);
     squashing.add(person_chunk());
     auto x = Squashing::squash(squashing.flush());
     x.getChunkInfos().add(std::make_shared<DeduplicationToken::TokenInfo>());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to