This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c5af284db5 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920)
(#7299)
c5af284db5 is described below
commit c5af284db5ceeaa16ce62375c2a9045ed9de38e0
Author: Kyligence Git <[email protected]>
AuthorDate: Fri Sep 20 09:40:28 2024 -0500
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920) (#7299)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920)
* Fix build due to https://github.com/ClickHouse/ClickHouse/pull/69213
---------
Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
---
cpp-ch/clickhouse.version | 4 +-
cpp-ch/local-engine/Common/CHUtil.cpp | 13 ++-
cpp-ch/local-engine/Common/CHUtil.h | 7 +-
cpp-ch/local-engine/Common/GlutenConfig.h | 1 +
cpp-ch/local-engine/Common/GlutenSettings.cpp | 41 +++++++
cpp-ch/local-engine/Common/GlutenSettings.h | 11 +-
cpp-ch/local-engine/Common/GlutenSignalHandler.cpp | 2 +
.../Disks/ObjectStorages/GlutenDiskHDFS.h | 5 +-
.../registerGlutenDiskObjectStorage.cpp | 7 +-
.../Functions/SparkFunctionGetJsonObject.h | 16 ++-
cpp-ch/local-engine/Parser/LocalExecutor.cpp | 6 +-
.../Parser/RelParsers/AggregateRelParser.cpp | 123 ++++++++++++---------
.../Parser/RelParsers/JoinRelParser.cpp | 8 +-
.../Parser/RelParsers/MergeTreeRelParser.cpp | 14 ++-
.../Parser/RelParsers/ProjectRelParser.cpp | 11 +-
.../Parser/RelParsers/ReadRelParser.cpp | 12 +-
.../local-engine/Parser/SerializedPlanParser.cpp | 13 ++-
cpp-ch/local-engine/Shuffle/PartitionWriter.h | 6 +-
.../local-engine/Storages/Cache/CacheManager.cpp | 6 +-
.../Storages/MergeTree/MetaDataHelper.cpp | 11 +-
.../Storages/MergeTree/SparkMergeTreeWriter.cpp | 9 +-
.../SubstraitSource/ExcelTextFormatFile.cpp | 29 +++--
.../Storages/SubstraitSource/ExcelTextFormatFile.h | 9 ++
.../Storages/SubstraitSource/FormatFile.cpp | 7 +-
.../Storages/SubstraitSource/ORCFormatFile.cpp | 17 +--
.../Storages/SubstraitSource/ReadBufferBuilder.cpp | 37 +++----
.../Storages/SubstraitSource/TextFormatFile.cpp | 1 +
.../tests/gtest_clickhouse_pr_verify.cpp | 6 +-
cpp-ch/local-engine/tests/gtest_write_pipeline.cpp | 20 +++-
29 files changed, 304 insertions(+), 148 deletions(-)
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 3ce0469938..32d2acdbdf 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240918
-CH_COMMIT=cc6de0f1995
\ No newline at end of file
+CH_BRANCH=rebase_ch/20240920
+CH_COMMIT=14c2da664d7
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp
b/cpp-ch/local-engine/Common/CHUtil.cpp
index 5ad0b1b973..fd2030f71c 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -77,6 +77,11 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsUInt64 prefer_external_sort_block_bytes;
+extern const SettingsUInt64 max_bytes_before_external_sort;
+}
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@@ -701,12 +706,12 @@ void
BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config)
spark_user = spark_user_c_str;
}
-DB::Field BackendInitializerUtil::toField(const String key, const String value)
+DB::Field BackendInitializerUtil::toField(const String & key, const String &
value)
{
if (BOOL_VALUE_SETTINGS.contains(key))
return DB::Field(value == "true" || value == "1");
else if (LONG_VALUE_SETTINGS.contains(key))
- return DB::Field(std::strtoll(value.c_str(), NULL, 10));
+ return DB::Field(std::strtoll(value.c_str(), nullptr, 10));
else
return DB::Field(value);
}
@@ -797,13 +802,13 @@ void
BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
auto task_memory =
std::stoull(backend_conf_map.at(GLUTEN_TASK_OFFHEAP));
if (!backend_conf_map.contains(CH_RUNTIME_SETTINGS_PREFIX +
"max_bytes_before_external_sort"))
{
- settings.max_bytes_before_external_sort = static_cast<size_t>(0.8
* task_memory);
+ settings[Setting::max_bytes_before_external_sort] =
static_cast<size_t>(0.8 * task_memory);
}
if (!backend_conf_map.contains(CH_RUNTIME_SETTINGS_PREFIX +
"prefer_external_sort_block_bytes"))
{
auto mem_gb = task_memory / static_cast<double>(1_GiB);
// 2.8x+5, Heuristics calculate the block size of external sort,
[8,16]
- settings.prefer_external_sort_block_bytes =
std::max(std::min(static_cast<size_t>(2.8 * mem_gb + 5), 16ul), 8ul) * 1024 *
1024;
+ settings[Setting::prefer_external_sort_block_bytes] =
std::max(std::min(static_cast<size_t>(2.8 * mem_gb + 5), 16ul), 8ul) * 1024 *
1024;
}
}
}
diff --git a/cpp-ch/local-engine/Common/CHUtil.h
b/cpp-ch/local-engine/Common/CHUtil.h
index 2e0b7266cd..23e319eb56 100644
--- a/cpp-ch/local-engine/Common/CHUtil.h
+++ b/cpp-ch/local-engine/Common/CHUtil.h
@@ -154,7 +154,7 @@ class JNIUtils;
class BackendInitializerUtil
{
public:
- static DB::Field toField(const String key, const String value);
+ static DB::Field toField(const String & key, const String & value);
/// Initialize two kinds of resources
/// 1. global level resources like global_context/shared_context, notice
that they can only be initialized once in process lifetime
@@ -162,11 +162,6 @@ public:
static void init(const std::string_view plan);
static void updateConfig(const DB::ContextMutablePtr &, std::string_view);
- // use excel text parser
- inline static const std::string USE_EXCEL_PARSER =
"use_excel_serialization";
- inline static const std::string EXCEL_EMPTY_AS_NULL =
"use_excel_serialization.empty_as_null";
- inline static const std::string EXCEL_NUMBER_FORCE =
"use_excel_serialization.number_force";
- inline static const std::string EXCEL_QUOTE_STRICT =
"use_excel_serialization.quote_strict";
inline static const String CH_BACKEND_PREFIX =
"spark.gluten.sql.columnar.backend.ch";
inline static const String CH_RUNTIME_CONFIG = "runtime_config";
diff --git a/cpp-ch/local-engine/Common/GlutenConfig.h
b/cpp-ch/local-engine/Common/GlutenConfig.h
index feded16e46..ab3a6295ae 100644
--- a/cpp-ch/local-engine/Common/GlutenConfig.h
+++ b/cpp-ch/local-engine/Common/GlutenConfig.h
@@ -20,6 +20,7 @@
#include <Interpreters/Context.h>
#include <base/types.h>
#include <base/unit.h>
+#include <Poco/Util/AbstractConfiguration.h>
#include <Common/logger_useful.h>
namespace local_engine
diff --git a/cpp-ch/local-engine/Common/GlutenSettings.cpp
b/cpp-ch/local-engine/Common/GlutenSettings.cpp
new file mode 100644
index 0000000000..85e5658226
--- /dev/null
+++ b/cpp-ch/local-engine/Common/GlutenSettings.cpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include "GlutenSettings.h"
+#include <Core/Settings.h>
+
+using namespace DB;
+namespace local_engine
+{
+
+bool tryGetString(const DB::Settings & settings, std::string_view name,
std::string & value)
+{
+ Field field;
+ if (settings.tryGet(name, field))
+ {
+ value = field.safeGet<String>();
+ return true;
+ }
+ return false;
+}
+bool settingsEqual(const DB::Settings & settings, std::string_view name, const
std::string & value)
+{
+ if (DB::Field field; settings.tryGet(name, field))
+ return field.safeGet<String>() == value;
+ return false;
+}
+}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Common/GlutenSettings.h
b/cpp-ch/local-engine/Common/GlutenSettings.h
index 13c0a1327b..d87ff45041 100644
--- a/cpp-ch/local-engine/Common/GlutenSettings.h
+++ b/cpp-ch/local-engine/Common/GlutenSettings.h
@@ -15,8 +15,13 @@
* limitations under the License.
*/
#pragma once
+
#include <Interpreters/Context_fwd.h>
+namespace DB
+{
+struct Settings;
+}
namespace local_engine
{
@@ -59,5 +64,9 @@ namespace local_engine
LIST_OF_SETTINGS_MACRO(IMPLEMENT_GLUTEN_SET_, SKIP_ALIAS, _) \
}
+// workaround for tryGetString
+
+bool tryGetString(const DB::Settings & settings, std::string_view name,
std::string & value);
+bool settingsEqual(const DB::Settings & settings, std::string_view name, const
std::string & value);
-}
\ No newline at end of file
+} // namespace local_engine
diff --git a/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
b/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
index d04c67d71b..44c43fcb65 100644
--- a/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
+++ b/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
@@ -26,6 +26,8 @@
#include <base/phdr_cache.h>
#include <base/sleep.h>
#include <Poco/Exception.h>
+#include <Poco/Runnable.h>
+#include <Poco/Thread.h>
#include <Common/CurrentThread.h>
#include <Common/GlutenSignalHandler.h>
#include <Common/MemoryTracker.h>
diff --git a/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
b/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
index b0f82a340b..68942c6a47 100644
--- a/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
+++ b/cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
@@ -19,10 +19,11 @@
#include <config.h>
-#include <Common/Throttler.h>
-#include <Disks/ObjectStorages/DiskObjectStorage.h>
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
+#include <Disks/ObjectStorages/DiskObjectStorage.h>
#include <Interpreters/Cache/FileCacheFactory.h>
+#include <Common/Throttler.h>
+#include <Common/typeid_cast.h>
#if USE_HDFS
#include <Disks/ObjectStorages/GlutenHDFSObjectStorage.h>
#endif
diff --git
a/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
b/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
index 7d4d06a123..b4073d0f39 100644
---
a/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
+++
b/cpp-ch/local-engine/Disks/ObjectStorages/registerGlutenDiskObjectStorage.cpp
@@ -33,6 +33,11 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsUInt64 hdfs_replication;
+}
+
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@@ -120,7 +125,7 @@ void registerGlutenHDFSObjectStorage(ObjectStorageFactory &
factory)
std::unique_ptr<HDFSObjectStorageSettings> settings =
std::make_unique<HDFSObjectStorageSettings>(
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 *
1024),
- context->getSettingsRef().hdfs_replication
+ context->getSettingsRef()[Setting::hdfs_replication]
);
return std::make_shared<GlutenHDFSObjectStorage>(uri,
std::move(settings), config);
});
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
index dfc1e1e328..125ab0a394 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
@@ -18,19 +18,17 @@
#include <memory>
#include <string_view>
#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/IDataType.h>
#include <Functions/FunctionSQLJSON.h>
#include <Functions/IFunction.h>
-#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
#include <Functions/JSONPath/Generator/GeneratorJSONPath.h>
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
#include <Interpreters/Context.h>
-#include <Parsers/IAST.h>
#include <Parsers/IParser.h>
-#include <Parsers/Lexer.h>
#include <Parsers/TokenIterator.h>
#include <base/find_symbols.h>
#include <base/range.h>
@@ -44,6 +42,12 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsBool allow_simdjson;
+extern const SettingsUInt64 max_parser_depth;
+extern const SettingsUInt64 max_parser_backtracks;
+}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@@ -526,7 +530,7 @@ public:
const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr &
/*result_type*/, size_t /*input_rows_count*/) const override
{
#if USE_SIMDJSON
- if (context->getSettingsRef().allow_simdjson)
+ if (context->getSettingsRef()[DB::Setting::allow_simdjson])
{
return innerExecuteImpl<
DB::SimdJSONParser,
@@ -600,8 +604,8 @@ private:
const char * query_begin = reinterpret_cast<const char
*>(required_fields.back().c_str());
const char * query_end = required_fields.back().c_str() +
required_fields.back().size();
DB::Tokens tokens(query_begin, query_end);
- UInt32 max_parser_depth =
static_cast<UInt32>(context->getSettingsRef().max_parser_depth);
- UInt32 max_parser_backtracks =
static_cast<UInt32>(context->getSettingsRef().max_parser_backtracks);
+ UInt32 max_parser_depth =
static_cast<UInt32>(context->getSettingsRef()[DB::Setting::max_parser_depth]);
+ UInt32 max_parser_backtracks =
static_cast<UInt32>(context->getSettingsRef()[DB::Setting::max_parser_backtracks]);
DB::IParser::Pos token_iterator(tokens, max_parser_depth,
max_parser_backtracks);
DB::ASTPtr json_path_ast;
DB::ParserJSONPath path_parser;
diff --git a/cpp-ch/local-engine/Parser/LocalExecutor.cpp
b/cpp-ch/local-engine/Parser/LocalExecutor.cpp
index 58c29b53c1..f781556c91 100644
--- a/cpp-ch/local-engine/Parser/LocalExecutor.cpp
+++ b/cpp-ch/local-engine/Parser/LocalExecutor.cpp
@@ -23,6 +23,10 @@
#include <QueryPipeline/printPipeline.h>
#include <Common/QueryContext.h>
+namespace DB::Setting
+{
+extern const SettingsMaxThreads max_threads;
+}
using namespace DB;
namespace local_engine
{
@@ -123,7 +127,7 @@ void LocalExecutor::execute()
{
chassert(query_pipeline_builder);
push_executor = query_pipeline_builder->execute();
-
push_executor->execute(local_engine::QueryContext::instance().currentQueryContext()->getSettingsRef().max_threads,
false);
+
push_executor->execute(QueryContext::instance().currentQueryContext()->getSettingsRef()[Setting::max_threads],
false);
}
Block LocalExecutor::getHeader()
diff --git a/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
index 05f63bfc5b..7313d00df7 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/AggregateRelParser.cpp
@@ -33,6 +33,23 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsUInt64 max_bytes_before_external_group_by;
+extern const SettingsBool optimize_group_by_constant_keys;
+extern const SettingsUInt64 min_free_disk_space_for_temporary_data;
+extern const SettingsMaxThreads max_threads;
+extern const SettingsBool empty_result_for_aggregation_by_empty_set;
+extern const SettingsUInt64 group_by_two_level_threshold_bytes;
+extern const SettingsOverflowModeGroupBy group_by_overflow_mode;
+extern const SettingsUInt64 max_rows_to_group_by;
+extern const SettingsBool enable_memory_bound_merging_of_aggregation_results;
+extern const SettingsUInt64 aggregation_in_order_max_block_bytes;
+extern const SettingsUInt64 group_by_two_level_threshold;
+extern const SettingsFloat min_hit_rate_to_use_consecutive_keys_optimization;
+extern const SettingsMaxThreads max_threads;
+extern const SettingsUInt64 max_block_size;
+}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@@ -292,13 +309,13 @@ void AggregateRelParser::addMergingAggregatedStep()
grouping_keys,
aggregate_descriptions,
false,
- settings.max_threads,
- PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
- settings.min_hit_rate_to_use_consecutive_keys_optimization);
+ settings[Setting::max_threads],
+
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
+ settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization]);
auto config = StreamingAggregateConfig::loadFromContext(getContext());
if (config.enable_streaming_aggregating)
{
- params.group_by_two_level_threshold =
settings.group_by_two_level_threshold;
+ params.group_by_two_level_threshold =
settings[Setting::group_by_two_level_threshold];
auto merging_step =
std::make_unique<GraceMergingAggregatedStep>(getContext(),
plan->getCurrentDataStream(), params, false);
steps.emplace_back(merging_step.get());
plan->addStep(std::move(merging_step));
@@ -316,10 +333,10 @@ void AggregateRelParser::addMergingAggregatedStep()
1,
1,
false,
- settings.max_block_size,
- settings.aggregation_in_order_max_block_bytes,
+ settings[Setting::max_block_size],
+ settings[Setting::aggregation_in_order_max_block_bytes],
SortDescription(),
- settings.enable_memory_bound_merging_of_aggregation_results);
+
settings[Setting::enable_memory_bound_merging_of_aggregation_results]);
steps.emplace_back(merging_step.get());
plan->addStep(std::move(merging_step));
}
@@ -337,22 +354,22 @@ void AggregateRelParser::addCompleteModeAggregatedStep()
grouping_keys,
aggregate_descriptions,
false,
- settings.max_rows_to_group_by,
- settings.group_by_overflow_mode,
- settings.group_by_two_level_threshold,
- settings.group_by_two_level_threshold_bytes,
- 0, /*settings.max_bytes_before_external_group_by*/
- settings.empty_result_for_aggregation_by_empty_set,
+ settings[Setting::max_rows_to_group_by],
+ settings[Setting::group_by_overflow_mode],
+ settings[Setting::group_by_two_level_threshold],
+ settings[Setting::group_by_two_level_threshold_bytes],
+ 0, /*settings[Setting::max_bytes_before_external_group_by]*/
+ settings[Setting::empty_result_for_aggregation_by_empty_set],
getContext()->getTempDataOnDisk(),
- settings.max_threads,
- settings.min_free_disk_space_for_temporary_data,
+ settings[Setting::max_threads],
+ settings[Setting::min_free_disk_space_for_temporary_data],
true,
3,
- PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
/*enable_prefetch*/ true,
/*only_merge*/ false,
- settings.optimize_group_by_constant_keys,
- settings.min_hit_rate_to_use_consecutive_keys_optimization,
+ settings[Setting::optimize_group_by_constant_keys],
+
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
/*StatsCollectingParams*/{});
auto merging_step =
std::make_unique<GraceMergingAggregatedStep>(getContext(),
plan->getCurrentDataStream(), params, true);
steps.emplace_back(merging_step.get());
@@ -364,22 +381,22 @@ void AggregateRelParser::addCompleteModeAggregatedStep()
grouping_keys,
aggregate_descriptions,
false,
- settings.max_rows_to_group_by,
- settings.group_by_overflow_mode,
- settings.group_by_two_level_threshold,
- settings.group_by_two_level_threshold_bytes,
- settings.max_bytes_before_external_group_by,
- settings.empty_result_for_aggregation_by_empty_set,
+ settings[Setting::max_rows_to_group_by],
+ settings[Setting::group_by_overflow_mode],
+ settings[Setting::group_by_two_level_threshold],
+ settings[Setting::group_by_two_level_threshold_bytes],
+ settings[Setting::max_bytes_before_external_group_by],
+ settings[Setting::empty_result_for_aggregation_by_empty_set],
getContext()->getTempDataOnDisk(),
- settings.max_threads,
- settings.min_free_disk_space_for_temporary_data,
+ settings[Setting::max_threads],
+ settings[Setting::min_free_disk_space_for_temporary_data],
true,
3,
- PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
/*enable_prefetch*/ true,
/*only_merge*/ false,
- settings.optimize_group_by_constant_keys,
- settings.min_hit_rate_to_use_consecutive_keys_optimization,
+ settings[Setting::optimize_group_by_constant_keys],
+
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
/*StatsCollectingParams*/{});
auto aggregating_step = std::make_unique<AggregatingStep>(
@@ -387,8 +404,8 @@ void AggregateRelParser::addCompleteModeAggregatedStep()
params,
GroupingSetsParamsList(),
true,
- settings.max_block_size,
- settings.aggregation_in_order_max_block_bytes,
+ settings[Setting::max_block_size],
+ settings[Setting::aggregation_in_order_max_block_bytes],
1,
1,
false,
@@ -422,22 +439,22 @@ void AggregateRelParser::addAggregatingStep()
grouping_keys,
aggregate_descriptions,
false,
- settings.max_rows_to_group_by,
- settings.group_by_overflow_mode,
- settings.group_by_two_level_threshold,
+ settings[Setting::max_rows_to_group_by],
+ settings[Setting::group_by_overflow_mode],
+ settings[Setting::group_by_two_level_threshold],
0, // group_by_two_level_threshold_bytes
0,
- settings.empty_result_for_aggregation_by_empty_set,
+ settings[Setting::empty_result_for_aggregation_by_empty_set],
nullptr,
- settings.max_threads,
- settings.min_free_disk_space_for_temporary_data,
+ settings[Setting::max_threads],
+ settings[Setting::min_free_disk_space_for_temporary_data],
true,
3,
- PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
/*enable_prefetch*/ true,
/*only_merge*/ false,
- settings.optimize_group_by_constant_keys,
- settings.min_hit_rate_to_use_consecutive_keys_optimization,
+ settings[Setting::optimize_group_by_constant_keys],
+
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
/*StatsCollectingParams*/{});
auto aggregating_step =
std::make_unique<StreamingAggregatingStep>(getContext(),
plan->getCurrentDataStream(), params);
steps.emplace_back(aggregating_step.get());
@@ -449,22 +466,22 @@ void AggregateRelParser::addAggregatingStep()
grouping_keys,
aggregate_descriptions,
false,
- settings.max_rows_to_group_by,
- settings.group_by_overflow_mode,
- settings.group_by_two_level_threshold,
- settings.group_by_two_level_threshold_bytes,
- settings.max_bytes_before_external_group_by,
- settings.empty_result_for_aggregation_by_empty_set,
+ settings[Setting::max_rows_to_group_by],
+ settings[Setting::group_by_overflow_mode],
+ settings[Setting::group_by_two_level_threshold],
+ settings[Setting::group_by_two_level_threshold_bytes],
+ settings[Setting::max_bytes_before_external_group_by],
+ settings[Setting::empty_result_for_aggregation_by_empty_set],
getContext()->getTempDataOnDisk(),
- settings.max_threads,
- settings.min_free_disk_space_for_temporary_data,
+ settings[Setting::max_threads],
+ settings[Setting::min_free_disk_space_for_temporary_data],
true,
3,
- PODArrayUtil::adjustMemoryEfficientSize(settings.max_block_size),
+
PODArrayUtil::adjustMemoryEfficientSize(settings[Setting::max_block_size]),
/*enable_prefetch*/ true,
/*only_merge*/ false,
- settings.optimize_group_by_constant_keys,
- settings.min_hit_rate_to_use_consecutive_keys_optimization,
+ settings[Setting::optimize_group_by_constant_keys],
+
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization],
/*StatsCollectingParams*/{});
auto aggregating_step = std::make_unique<AggregatingStep>(
@@ -472,8 +489,8 @@ void AggregateRelParser::addAggregatingStep()
params,
GroupingSetsParamsList(),
false,
- settings.max_block_size,
- settings.aggregation_in_order_max_block_bytes,
+ settings[Setting::max_block_size],
+ settings[Setting::aggregation_in_order_max_block_bytes],
1,
1,
false,
diff --git a/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
index a5ed605ed0..cc70b8e0f9 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
@@ -43,6 +43,10 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsJoinAlgorithm join_algorithm;
+}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@@ -329,7 +333,7 @@ DB::QueryPlanPtr JoinRelParser::parseJoin(const
substrait::JoinRel & join, DB::Q
}
JoinPtr smj_join = std::make_shared<FullSortingMergeJoin>(table_join,
right->getCurrentDataStream().header.cloneEmpty(), -1);
- MultiEnum<DB::JoinAlgorithm> join_algorithm =
context->getSettingsRef().join_algorithm;
+ MultiEnum<DB::JoinAlgorithm> join_algorithm =
context->getSettingsRef()[Setting::join_algorithm];
QueryPlanStepPtr join_step
= std::make_unique<DB::JoinStep>(left->getCurrentDataStream(),
right->getCurrentDataStream(), smj_join, 8192, 1, false);
@@ -825,7 +829,7 @@ DB::QueryPlanPtr JoinRelParser::buildSingleOnClauseHashJoin(
/// data will be spilled to disk. Don't set the limitation too small,
otherwise the buckets number
/// will be too large and the performance will be bad.
JoinPtr hash_join = nullptr;
- MultiEnum<DB::JoinAlgorithm> join_algorithm =
context->getSettingsRef().join_algorithm;
+ MultiEnum<DB::JoinAlgorithm> join_algorithm =
context->getSettingsRef()[Setting::join_algorithm];
if (join_algorithm.isSet(DB::JoinAlgorithm::GRACE_HASH))
{
hash_join = std::make_shared<GraceHashJoin>(
diff --git a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
index e467042bda..adafc97ed8 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
@@ -16,17 +16,23 @@
*/
#include "MergeTreeRelParser.h"
+#include <Core/Settings.h>
#include <Parser/FunctionParser.h>
+#include <Parser/InputFileNameParser.h>
#include <Parser/SubstraitParserUtils.h>
#include <Parser/TypeParser.h>
#include <Storages/MergeTree/StorageMergeTreeFactory.h>
#include <google/protobuf/wrappers.pb.h>
#include <Poco/StringTokenizer.h>
#include <Common/CHUtil.h>
-#include <Parser/InputFileNameParser.h>
+#include <Common/GlutenSettings.h>
namespace DB
{
+namespace Setting
+{
+extern const SettingsUInt64 max_block_size;
+}
namespace ErrorCodes
{
extern const int NO_SUCH_DATA_PART;
@@ -126,7 +132,7 @@ DB::QueryPlanPtr MergeTreeRelParser::parseReadRel(
storage_snapshot,
*query_info,
context,
- context->getSettingsRef().max_block_size,
+ context->getSettingsRef()[Setting::max_block_size],
1);
auto * source_step_with_filter = static_cast<SourceStepWithFilter
*>(read_step.get());
@@ -138,7 +144,7 @@ DB::QueryPlanPtr MergeTreeRelParser::parseReadRel(
auto ranges = merge_tree_table.extractRange(selected_parts);
std::string ret;
- if
(context->getSettingsRef().tryGetString("enabled_driver_filter_mergetree_index",
ret) && ret == "'true'")
+ if (settingsEqual(context->getSettingsRef(),
"enabled_driver_filter_mergetree_index", "true"))
SparkStorageMergeTree::analysisPartsByRanges(*reinterpret_cast<ReadFromMergeTree
*>(read_step.get()), ranges);
else
SparkStorageMergeTree::wrapRangesInDataParts(*reinterpret_cast<ReadFromMergeTree
*>(read_step.get()), ranges);
@@ -368,7 +374,7 @@ String MergeTreeRelParser::filterRangesOnDriver(const
substrait::ReadRel & read_
storage_snapshot,
*query_info,
context,
- context->getSettingsRef().max_block_size,
+ context->getSettingsRef()[Setting::max_block_size],
10); // TODO: Expect use driver cores.
auto * read_from_mergetree = static_cast<ReadFromMergeTree
*>(read_step.get());
diff --git a/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
index ed040a3626..637c4361f5 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/ProjectRelParser.cpp
@@ -18,12 +18,19 @@
#include <Interpreters/ArrayJoin.h>
#include <Operator/EmptyProjectStep.h>
+#include <Operator/ReplicateRowsStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Rewriter/ExpressionRewriter.h>
#include <Common/CHUtil.h>
-#include <Operator/ReplicateRowsStep.h>
+namespace DB
+{
+namespace Setting
+{
+extern const SettingsUInt64 max_block_size;
+}
+}
using namespace DB;
namespace local_engine
@@ -204,7 +211,7 @@ ProjectRelParser::parseGenerate(DB::QueryPlanPtr
query_plan, const substrait::Re
array_join.columns = std::move(array_joined_columns);
array_join.is_left = generate_rel.outer();
auto array_join_step = std::make_unique<ArrayJoinStep>(
- query_plan->getCurrentDataStream(), std::move(array_join), false,
getContext()->getSettingsRef().max_block_size);
+ query_plan->getCurrentDataStream(), std::move(array_join), false,
getContext()->getSettingsRef()[Setting::max_block_size]);
array_join_step->setStepDescription("ARRAY JOIN In Generate");
steps.emplace_back(array_join_step.get());
query_plan->addStep(std::move(array_join_step));
diff --git a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
index 1f623ced57..e49a51b6c7 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
@@ -32,11 +32,17 @@
#include <Common/BlockTypeUtils.h>
-namespace DB::ErrorCodes
+namespace DB
+{
+namespace Setting
+{
+extern const SettingsMaxThreads max_threads;
+}
+namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
-
+}
namespace local_engine
{
DB::QueryPlanPtr ReadRelParser::parse(DB::QueryPlanPtr query_plan, const
substrait::Rel & rel, std::list<const substrait::Rel *> &)
@@ -56,7 +62,7 @@ DB::QueryPlanPtr ReadRelParser::parse(DB::QueryPlanPtr
query_plan, const substra
steps.emplace_back(read_step.get());
query_plan->addStep(std::move(read_step));
- if (getContext()->getSettingsRef().max_threads > 1)
+ if (getContext()->getSettingsRef()[Setting::max_threads] > 1)
{
auto buffer_step =
std::make_unique<BlocksBufferPoolStep>(query_plan->getCurrentDataStream());
steps.emplace_back(buffer_step.get());
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index c1f00c3348..75ba2a1152 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -88,6 +88,11 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsBool query_plan_enable_optimizations;
+extern const SettingsUInt64 priority;
+}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@@ -418,7 +423,7 @@ QueryPlanPtr SerializedPlanParser::parseOp(const
substrait::Rel & rel, std::list
std::vector<DB::IQueryPlanStep *> steps = rel_parser->getSteps();
- if (!context->getSettingsRef().query_plan_enable_optimizations)
+ if (!context->getSettingsRef()[Setting::query_plan_enable_optimizations])
{
if (rel.rel_type_case() == substrait::Rel::RelTypeCase::kRead)
{
@@ -1177,12 +1182,12 @@ DB::QueryPipelineBuilderPtr
SerializedPlanParser::buildQueryPipeline(DB::QueryPl
context,
"",
context->getClientInfo(),
- priorities.insert(settings.priority),
+ priorities.insert(settings[Setting::priority]),
CurrentThread::getGroup(),
IAST::QueryKind::Select,
settings,
0);
- const QueryPlanOptimizationSettings optimization_settings{.optimize_plan =
settings.query_plan_enable_optimizations};
+ const QueryPlanOptimizationSettings optimization_settings{.optimize_plan =
settings[Setting::query_plan_enable_optimizations]};
return query_plan.buildQueryPipeline(
optimization_settings,
BuildQueryPipelineSettings{
@@ -1213,7 +1218,7 @@ std::unique_ptr<LocalExecutor>
SerializedPlanParser::createExecutor(DB::QueryPla
auto * logger = &Poco::Logger::get("SerializedPlanParser");
LOG_INFO(logger, "build pipeline {} ms", stopwatch.elapsedMicroseconds() /
1000.0);
LOG_DEBUG(
- logger, "clickhouse plan [optimization={}]:\n{}",
settings.query_plan_enable_optimizations, PlanUtil::explainPlan(*query_plan));
+ logger, "clickhouse plan [optimization={}]:\n{}",
settings[Setting::query_plan_enable_optimizations],
PlanUtil::explainPlan(*query_plan));
auto config = ExecutorConfig::loadFromContext(context);
return std::make_unique<LocalExecutor>(std::move(query_plan),
std::move(builder), config.dump_pipeline);
diff --git a/cpp-ch/local-engine/Shuffle/PartitionWriter.h
b/cpp-ch/local-engine/Shuffle/PartitionWriter.h
index 80bb43b8d1..fa388d08ba 100644
--- a/cpp-ch/local-engine/Shuffle/PartitionWriter.h
+++ b/cpp-ch/local-engine/Shuffle/PartitionWriter.h
@@ -29,6 +29,10 @@
namespace DB
{
class MergingSortedAlgorithm;
+namespace Setting
+{
+extern const SettingsUInt64 prefer_external_sort_block_bytes;
+}
}
namespace local_engine
@@ -156,7 +160,7 @@ protected:
{
max_merge_block_size = options.split_size;
max_sort_buffer_size = options.max_sort_buffer_size;
- max_merge_block_bytes =
QueryContext::globalContext()->getSettingsRef().prefer_external_sort_block_bytes;
+ max_merge_block_bytes =
QueryContext::globalContext()->getSettingsRef()[DB::Setting::prefer_external_sort_block_bytes];
}
public:
String getName() const override { return "SortBasedPartitionWriter"; }
diff --git a/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
b/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
index daa3c0e305..25726dc24f 100644
--- a/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
+++ b/cpp-ch/local-engine/Storages/Cache/CacheManager.cpp
@@ -35,6 +35,10 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsUInt64 max_block_size;
+}
namespace ErrorCodes
{
extern const int INVALID_STATE;
@@ -107,7 +111,7 @@ Task CacheManager::cachePart(const MergeTreeTableInstance &
table, const MergeTr
storage_snapshot,
*query_info,
context,
- context->getSettingsRef().max_block_size,
+ context->getSettingsRef()[Setting::max_block_size],
1);
QueryPlan plan;
plan.addStep(std::move(read_step));
diff --git a/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
b/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
index 621374377d..0e6f5b102f 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
@@ -31,6 +31,11 @@ extern const Metric LocalThreadScheduled;
namespace DB
{
+namespace Setting
+{
+extern const SettingsSeconds lock_acquire_timeout;
+extern const SettingsMaxThreads max_threads;
+}
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
@@ -89,7 +94,7 @@ void restoreMetaData<ROCKSDB>(const SparkStorageMergeTreePtr
& storage, const Me
not_exists_part.emplace(part);
}
- if (auto lock =
storage->lockForAlter(context.getSettingsRef().lock_acquire_timeout))
+ if (auto lock =
storage->lockForAlter(context.getSettingsRef()[Setting::lock_acquire_timeout]))
{
// put this return clause in lockForAlter
// so that it will not return until other thread finishes restoring
@@ -140,7 +145,7 @@ void restoreMetaData<LOCAL>(
not_exists_part.emplace(part);
}
- if (auto lock =
storage->lockForAlter(context.getSettingsRef().lock_acquire_timeout))
+ if (auto lock =
storage->lockForAlter(context.getSettingsRef()[Setting::lock_acquire_timeout]))
{
// put this return clause in lockForAlter
// so that it will not return until other thread finishes restoring
@@ -148,7 +153,7 @@ void restoreMetaData<LOCAL>(
return;
// Increase the speed of metadata recovery
- auto max_concurrency = std::max(10UL,
QueryContext::globalContext()->getSettingsRef().max_threads.value);
+ auto max_concurrency = std::max(10UL,
QueryContext::globalContext()->getSettingsRef()[Setting::max_threads].value);
auto max_threads = std::min(max_concurrency, not_exists_part.size());
FreeThreadPool thread_pool(
CurrentMetrics::LocalThread,
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
index 1ede4960aa..3a55c965b6 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
@@ -27,6 +27,11 @@
#include <Common/CHUtil.h>
#include <Common/QueryContext.h>
+namespace DB::Setting
+{
+extern const SettingsUInt64 min_insert_block_size_rows;
+extern const SettingsUInt64 min_insert_block_size_bytes;
+}
using namespace DB;
namespace
{
@@ -62,9 +67,9 @@ std::unique_ptr<SparkMergeTreeWriter>
SparkMergeTreeWriter::create(
auto sink = dest_storage->write(none, metadata_snapshot, context, false);
chain.addSink(sink);
chain.addSource(
- std::make_shared<ApplySquashingTransform>(header,
settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes));
+ std::make_shared<ApplySquashingTransform>(header,
settings[Setting::min_insert_block_size_rows],
settings[Setting::min_insert_block_size_bytes]));
chain.addSource(
- std::make_shared<PlanSquashingTransform>(header,
settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes));
+ std::make_shared<PlanSquashingTransform>(header,
settings[Setting::min_insert_block_size_rows],
settings[Setting::min_insert_block_size_bytes]));
std::unordered_map<String, String> partition_values;
if (!write_settings_.partition_dir.empty())
diff --git
a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
index 3bb73a856e..7e87d11dc7 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp
@@ -17,11 +17,12 @@
#include "ExcelTextFormatFile.h"
#include <memory>
#include <string>
-
#include <Columns/ColumnNullable.h>
+#include <Core/Settings.h>
#include <DataTypes/DataTypeDecimalBase.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/Serializations/SerializationNullable.h>
+#include <Formats/FormatFactory.h>
#include <Formats/FormatSettings.h>
#include <IO/PeekableReadBuffer.h>
#include <Processors/Formats/IRowInputFormat.h>
@@ -29,7 +30,7 @@
#include <Storages/Serializations/ExcelDecimalSerialization.h>
#include <Storages/Serializations/ExcelSerialization.h>
#include <Storages/Serializations/ExcelStringReader.h>
-#include <Common/CHUtil.h>
+#include <Common/GlutenSettings.h>
namespace DB
{
@@ -56,6 +57,11 @@ void skipErrorChars(DB::ReadBuffer & buf, bool has_quote,
char quote, String & e
++buf.position();
}
+bool ExcelTextFormatFile::useThis(const DB::ContextPtr & context)
+{
+ return settingsEqual(context->getSettingsRef(), USE_EXCEL_PARSER, "true");
+}
+
FormatFile::InputFormatPtr ExcelTextFormatFile::createInputFormat(const
DB::Block & header)
{
auto res = std::make_shared<FormatFile::InputFormat>();
@@ -99,15 +105,15 @@ DB::FormatSettings
ExcelTextFormatFile::createFormatSettings()
format_settings.csv.null_representation =
file_info.text().null_value();
bool empty_as_null = true;
- if
(context->getSettingsRef().has(BackendInitializerUtil::EXCEL_EMPTY_AS_NULL))
- empty_as_null =
context->getSettingsRef().getString(BackendInitializerUtil::EXCEL_EMPTY_AS_NULL)
== "'true'";
+ if (context->getSettingsRef().has(EXCEL_EMPTY_AS_NULL))
+ empty_as_null = settingsEqual(context->getSettingsRef(),
EXCEL_EMPTY_AS_NULL, "true");
+
+ format_settings.try_infer_integers = false;
+ if (!context->getSettingsRef().has(EXCEL_NUMBER_FORCE))
+ format_settings.try_infer_integers = true;
- format_settings.try_infer_integers = 0;
- if
(!context->getSettingsRef().has(BackendInitializerUtil::EXCEL_NUMBER_FORCE))
- format_settings.try_infer_integers = 1;
- if
(context->getSettingsRef().has(BackendInitializerUtil::EXCEL_NUMBER_FORCE)
- &&
context->getSettingsRef().getString(BackendInitializerUtil::EXCEL_NUMBER_FORCE)
== "'true'")
- format_settings.try_infer_integers = 1;
+ if (settingsEqual(context->getSettingsRef(), EXCEL_NUMBER_FORCE, "true"))
+ format_settings.try_infer_integers = true;
if (format_settings.csv.null_representation.empty() || empty_as_null)
format_settings.csv.empty_as_default = true;
@@ -131,8 +137,7 @@ DB::FormatSettings
ExcelTextFormatFile::createFormatSettings()
{
format_settings.csv.allow_single_quotes = false;
- if
(context->getSettingsRef().has(BackendInitializerUtil::EXCEL_QUOTE_STRICT)
- &&
context->getSettingsRef().getString(BackendInitializerUtil::EXCEL_QUOTE_STRICT)
== "'true'")
+ if (settingsEqual(context->getSettingsRef(), EXCEL_QUOTE_STRICT,
"true"))
format_settings.csv.allow_double_quotes = false;
else
format_settings.csv.allow_double_quotes = true;
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
index 4ec69f33b9..6fc4183de0 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h
@@ -29,6 +29,15 @@ namespace local_engine
class ExcelTextFormatFile : public FormatFile
{
+ // use excel text parser
+ static constexpr std::string_view USE_EXCEL_PARSER =
"use_excel_serialization";
+ static constexpr std::string_view EXCEL_EMPTY_AS_NULL =
"use_excel_serialization.empty_as_null";
+ static constexpr std::string_view EXCEL_NUMBER_FORCE =
"use_excel_serialization.number_force";
+ static constexpr std::string_view EXCEL_QUOTE_STRICT =
"use_excel_serialization.quote_strict";
+
+public:
+ static bool useThis(const DB::ContextPtr & context);
+
public:
explicit ExcelTextFormatFile(
DB::ContextPtr context_, const
substrait::ReadRel::LocalFiles::FileOrFiles & file_info_, ReadBufferBuilderPtr
read_buffer_builder_)
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
index 4499a9a559..42b399abe4 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/FormatFile.cpp
@@ -18,7 +18,6 @@
#include <memory>
#include <IO/ReadBufferFromFile.h>
-#include <Common/CHUtil.h>
#include <Common/Exception.h>
#include <Common/GlutenStringUtils.h>
#include <Common/logger_useful.h>
@@ -36,8 +35,8 @@
#include <Storages/SubstraitSource/TextFormatFile.h>
#endif
-#include <Common/GlutenConfig.h>
#include <Storages/SubstraitSource/JSONFormatFile.h>
+#include <Common/GlutenConfig.h>
namespace DB
{
@@ -91,8 +90,7 @@ FormatFilePtr FormatFileUtil::createFile(
#if USE_HIVE
if (file.has_text())
{
- if
(context->getSettingsRef().has(BackendInitializerUtil::USE_EXCEL_PARSER)
- &&
context->getSettingsRef().getString(BackendInitializerUtil::USE_EXCEL_PARSER)
== "'true'")
+ if (ExcelTextFormatFile::useThis(context))
return std::make_shared<ExcelTextFormatFile>(context, file,
read_buffer_builder);
else
return std::make_shared<TextFormatFile>(context, file,
read_buffer_builder);
@@ -102,6 +100,5 @@ FormatFilePtr FormatFileUtil::createFile(
if (file.has_json())
return std::make_shared<JSONFormatFile>(context, file,
read_buffer_builder);
throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Format not
supported:{}", file.DebugString());
- __builtin_unreachable();
}
}
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
index 66556e237f..4751088df5 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ORCFormatFile.cpp
@@ -17,14 +17,15 @@
#include "ORCFormatFile.h"
#if USE_ORC
-# include <memory>
-# include <numeric>
-# include <Formats/FormatFactory.h>
-# include <IO/SeekableReadBuffer.h>
-# include <Processors/Formats/Impl/ArrowBufferedStreams.h>
-# include <Processors/Formats/Impl/NativeORCBlockInputFormat.h>
-# include <Storages/SubstraitSource/OrcUtil.h>
-# include <Common/CHUtil.h>
+#include <memory>
+#include <numeric>
+#include <Formats/FormatFactory.h>
+#include <IO/SeekableReadBuffer.h>
+#include <Processors/Formats/Impl/ArrowBufferedStreams.h>
+#include <Processors/Formats/Impl/NativeORCBlockInputFormat.h>
+#include <Storages/SubstraitSource/OrcUtil.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/CHUtil.h>
namespace local_engine
{
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
index b32073db53..b1884af1d4 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
@@ -49,6 +49,7 @@
#include <Common/CHUtil.h>
#include <Common/FileCacheConcurrentMap.h>
#include <Common/GlutenConfig.h>
+#include <Common/GlutenSettings.h>
#include <Common/logger_useful.h>
#include <Common/safe_cast.h>
@@ -66,6 +67,12 @@
namespace DB
{
+namespace Setting
+{
+extern const SettingsUInt64 s3_max_redirects;
+extern const SettingsBool s3_disable_checksum;
+extern const SettingsUInt64 s3_retry_attempts;
+}
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@@ -511,13 +518,7 @@ private:
static FileCacheConcurrentMap files_cache_time_map;
DB::FileCachePtr file_cache;
- std::string & stripQuote(std::string & s)
- {
- s.erase(remove(s.begin(), s.end(), '\''), s.end());
- return s;
- }
-
- std::string toBucketNameSetting(const std::string & bucket_name, const
std::string & config_name)
+ static std::string toBucketNameSetting(const std::string & bucket_name,
const std::string & config_name)
{
if (!config_name.starts_with(BackendInitializerUtil::S3A_PREFIX))
{
@@ -529,7 +530,7 @@ private:
+ config_name.substr(BackendInitializerUtil::S3A_PREFIX.size());
}
- std::string getSetting(
+ static std::string getSetting(
const DB::Settings & settings,
const std::string & bucket_name,
const std::string & config_name,
@@ -538,11 +539,11 @@ private:
{
std::string ret;
// if there's a bucket specific config, prefer it to non per bucket
config
- if (settings.tryGetString(toBucketNameSetting(bucket_name,
config_name), ret))
- return stripQuote(ret);
+ if (tryGetString(settings, toBucketNameSetting(bucket_name,
config_name), ret))
+ return ret;
- if (!require_per_bucket && settings.tryGetString(config_name, ret))
- return stripQuote(ret);
+ if (!require_per_bucket && tryGetString(settings, config_name, ret))
+ return ret;
return default_value;
}
@@ -614,8 +615,8 @@ private:
DB::S3::PocoHTTPClientConfiguration client_configuration =
DB::S3::ClientFactory::instance().createClientConfiguration(
region_name,
context->getRemoteHostFilter(),
- static_cast<unsigned>(context->getSettingsRef().s3_max_redirects),
- static_cast<unsigned>(context->getSettingsRef().s3_retry_attempts),
+
static_cast<unsigned>(context->getSettingsRef()[DB::Setting::s3_max_redirects]),
+
static_cast<unsigned>(context->getSettingsRef()[DB::Setting::s3_retry_attempts]),
false,
false,
nullptr,
@@ -631,15 +632,13 @@ private:
std::string ak;
std::string sk;
- settings.tryGetString(BackendInitializerUtil::HADOOP_S3_ACCESS_KEY,
ak);
- settings.tryGetString(BackendInitializerUtil::HADOOP_S3_SECRET_KEY,
sk);
- stripQuote(ak);
- stripQuote(sk);
+ tryGetString(settings, BackendInitializerUtil::HADOOP_S3_ACCESS_KEY,
ak);
+ tryGetString(settings, BackendInitializerUtil::HADOOP_S3_SECRET_KEY,
sk);
const DB::Settings & global_settings =
context->getGlobalContext()->getSettingsRef();
const DB::Settings & local_settings = context->getSettingsRef();
DB::S3::ClientSettings client_settings{
.use_virtual_addressing = false,
- .disable_checksum = local_settings.s3_disable_checksum,
+ .disable_checksum =
local_settings[DB::Setting::s3_disable_checksum],
.gcs_issue_compose_request =
context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
};
if (use_assumed_role)
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
index 5cae962a7f..b22f883f5d 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/TextFormatFile.cpp
@@ -18,6 +18,7 @@
#include <memory>
+#include <Formats/FormatFactory.h>
#include <Formats/FormatSettings.h>
#include <Processors/Formats/Impl/HiveTextRowInputFormat.h>
#include <Poco/URI.h>
diff --git a/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
b/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
index b44fe5eae0..44ba371029 100644
--- a/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
+++ b/cpp-ch/local-engine/tests/gtest_clickhouse_pr_verify.cpp
@@ -25,6 +25,10 @@
#include <Common/DebugUtils.h>
#include <Common/QueryContext.h>
+namespace DB::Setting
+{
+extern const SettingsBool enable_named_columns_in_function_tuple;
+}
using namespace local_engine;
using namespace DB;
@@ -36,7 +40,7 @@ TEST(Clickhouse, PR54881)
const auto context1 =
DB::Context::createCopy(QueryContext::globalContext());
// context1->setSetting("enable_named_columns_in_function_tuple",
DB::Field(true));
auto settings = context1->getSettingsRef();
- EXPECT_FALSE(settings.enable_named_columns_in_function_tuple) << "GLUTEN
NEED set enable_named_columns_in_function_tuple to false";
+ EXPECT_FALSE(settings[Setting::enable_named_columns_in_function_tuple]) <<
"GLUTEN NEED set enable_named_columns_in_function_tuple to false";
constexpr std::string_view split_template
=
R"({"items":[{"uriFile":"{replace_local_files}","partitionIndex":"0","length":"1529","parquet":{},"schema":{},"metadataColumns":[{}]}]})";
diff --git a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
index 18a18b0e2c..b6293137d6 100644
--- a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
+++ b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
@@ -47,6 +47,16 @@
#include <Common/DebugUtils.h>
#include <Common/QueryContext.h>
+namespace DB::Setting
+{
+extern const SettingsUInt64 max_parser_depth;
+extern const SettingsUInt64 max_parser_backtracks;
+extern const SettingsBool allow_settings_after_format_in_insert;
+extern const SettingsUInt64 max_query_size;
+extern const SettingsUInt64 min_insert_block_size_rows;
+extern const SettingsUInt64 min_insert_block_size_bytes;
+}
+
using namespace local_engine;
using namespace DB;
@@ -85,8 +95,8 @@ TEST(LocalExecutor, StorageObjectStorageSink)
"QUERY TEST",
/* allow_multi_statements = */ false,
0,
- settings.max_parser_depth,
- settings.max_parser_backtracks,
+ settings[Setting::max_parser_depth],
+ settings[Setting::max_parser_backtracks],
true);
auto & create = ast->as<ASTCreateQuery &>();
auto arg = create.storage->children[0];
@@ -312,9 +322,9 @@ TEST(WritePipeline, MergeTree)
const char * begin = query.data();
const char * end = query.data() + query.size();
- ParserQuery parser(end, settings.allow_settings_after_format_in_insert);
+ ParserQuery parser(end,
settings[Setting::allow_settings_after_format_in_insert]);
- ASTPtr ast = parseQuery(parser, begin, end, "", settings.max_query_size,
settings.max_parser_depth, settings.max_parser_backtracks);
+ ASTPtr ast = parseQuery(parser, begin, end, "",
settings[Setting::max_query_size], settings[Setting::max_parser_depth],
settings[Setting::max_parser_backtracks]);
EXPECT_TRUE(ast->as<ASTCreateQuery>());
auto & create = ast->as<ASTCreateQuery &>();
@@ -361,7 +371,7 @@ TEST(WritePipeline, MergeTree)
std::move(storage_settings));
Block header{{INT(), "id"}, {STRING(), "Name"}, {makeNullable(INT()),
"Age"}};
- DB::Squashing squashing(header, settings.min_insert_block_size_rows,
settings.min_insert_block_size_bytes);
+ DB::Squashing squashing(header,
settings[Setting::min_insert_block_size_rows],
settings[Setting::min_insert_block_size_bytes]);
squashing.add(person_chunk());
auto x = Squashing::squash(squashing.flush());
x.getChunkInfos().add(std::make_shared<DeduplicationToken::TokenInfo>());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]