This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ed0f6ea665 [CH] Fix crash in static initialization of
MergeTreeRelParser (#9664)
ed0f6ea665 is described below
commit ed0f6ea665e880d545479a420295798d9f78d543
Author: exmy <[email protected]>
AuthorDate: Fri May 16 17:58:26 2025 +0800
[CH] Fix crash in static initialization of MergeTreeRelParser (#9664)
* [CH] Fix crash in static initialization of MergeTreeRelParser
* add comment
---
.../Parser/RelParsers/MergeTreeRelParser.cpp | 33 +++++++++++++++++++---
.../Parser/RelParsers/MergeTreeRelParser.h | 20 +------------
2 files changed, 30 insertions(+), 23 deletions(-)
diff --git a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
index 11ea8bbdb5..17fd4f4af9 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.cpp
@@ -156,6 +156,31 @@ static Int64 findMinPosition(const NameSet &
condition_table_columns, const Name
return min_position;
}
+/// Initialize DELTA_META_COLUMN_MAP only upon its first use to avoid static
initialization order fiasco.
+const std::unordered_map<String, std::tuple<std::optional<String>,
DB::DataTypePtr, ReplaceDeltaNodeFunc>> & getDeltaMetaColumnMap()
+{
+ static const std::unordered_map<String, std::tuple<std::optional<String>,
DB::DataTypePtr, ReplaceDeltaNodeFunc>> DELTA_META_COLUMN_MAP
+ = {{FileMetaColumns::INPUT_FILE_NAME, std::tuple("_part",
std::make_shared<DB::DataTypeString>(), replaceInputFileNameNode)},
+ {FileMetaColumns::INPUT_FILE_BLOCK_START,
+ std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceInputFileBlockStartNode)},
+ {FileMetaColumns::INPUT_FILE_BLOCK_LENGTH,
+ std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceInputFileBlockLengthNode)},
+ {ParquetVirtualMeta::TMP_ROWINDEX,
+ std::tuple(DB::BlockOffsetColumn::name,
std::make_shared<DB::DataTypeUInt64>(), replaceTmpRowIndexNode)},
+ {FileMetaColumns::FILE_PATH, std::tuple("_part",
std::make_shared<DB::DataTypeString>(), replaceFilePathNode)},
+ {FileMetaColumns::FILE_NAME, std::tuple("_part",
std::make_shared<DB::DataTypeString>(), replaceFileNameNode)},
+ {FileMetaColumns::FILE_BLOCK_START,
+ std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceFileBlockStartNode)},
+ {FileMetaColumns::FILE_BLOCK_LENGTH,
+ std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceFileBlockLengthNode)},
+ {FileMetaColumns::FILE_SIZE, std::tuple(std::nullopt,
std::make_shared<DB::DataTypeInt64>(), replaceFileSizeNode)},
+ {FileMetaColumns::FILE_MODIFICATION_TIME, std::tuple(std::nullopt,
std::make_shared<DB::DataTypeDateTime64>(6), replaceFileModificationTimeNode)},
+ {DeltaVirtualMeta::DELTA_INTERNAL_IS_ROW_DELETED,
+ std::tuple("_part",
std::make_shared<DB::DataTypeNullable>(std::make_shared<DB::DataTypeInt8>()),
replaceDeltaInternalRowDeletedNode)} // make sure there is a '_part' column
+ };
+ return DELTA_META_COLUMN_MAP;
+}
+
DB::Block MergeTreeRelParser::parseMergeTreeOutput(const substrait::ReadRel &
rel, SparkStorageMergeTreePtr storage)
{
@@ -175,9 +200,9 @@ DB::Block
MergeTreeRelParser::replaceDeltaNameIfNeeded(const DB::Block & output)
NameSet names;
for (const auto & column : output)
{
- if (DELTA_META_COLUMN_MAP.contains(column.name))
+ if (getDeltaMetaColumnMap().contains(column.name))
{
- if (auto tuple = DELTA_META_COLUMN_MAP.at(column.name);
std::get<0>(tuple).has_value())
+ if (auto tuple = getDeltaMetaColumnMap().at(column.name);
std::get<0>(tuple).has_value())
{
if (!names.contains(std::get<0>(tuple).value()))
{
@@ -206,10 +231,10 @@ void MergeTreeRelParser::recoverDeltaNameIfNeeded(
bool need_recover = false;
for (const auto & column : output)
{
- if (DELTA_META_COLUMN_MAP.contains(column.name))
+ if (getDeltaMetaColumnMap().contains(column.name))
{
need_recover = true;
- auto tuple = DELTA_META_COLUMN_MAP.at(column.name);
+ auto tuple = getDeltaMetaColumnMap().at(column.name);
ReplaceDeltaNodeFunc func = std::get<2>(tuple);
func(actions_dag, merge_tree_table, context);
}
diff --git a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.h
b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.h
index fde6c332f9..e8892e64f6 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/MergeTreeRelParser.h
@@ -55,25 +55,7 @@ void replaceInputFileBlockStartNode(DB::ActionsDAG &
actions_dag, const MergeTre
void replaceInputFileBlockLengthNode(DB::ActionsDAG & actions_dag, const
MergeTreeTableInstance & merge_tree_table, DB::ContextPtr context);
void replaceTmpRowIndexNode(DB::ActionsDAG & actions_dag, const
MergeTreeTableInstance & merge_tree_table, DB::ContextPtr context);
-static const std::unordered_map<String, std::tuple<std::optional<String>,
DB::DataTypePtr, ReplaceDeltaNodeFunc>> DELTA_META_COLUMN_MAP
- = {{FileMetaColumns::INPUT_FILE_NAME, std::tuple("_part",
std::make_shared<DB::DataTypeString>(), replaceInputFileNameNode)},
- {FileMetaColumns::INPUT_FILE_BLOCK_START,
- std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceInputFileBlockStartNode)},
- {FileMetaColumns::INPUT_FILE_BLOCK_LENGTH,
- std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceInputFileBlockLengthNode)},
- {ParquetVirtualMeta::TMP_ROWINDEX,
- std::tuple(DB::BlockOffsetColumn::name,
std::make_shared<DB::DataTypeUInt64>(), replaceTmpRowIndexNode)},
- {FileMetaColumns::FILE_PATH, std::tuple("_part",
std::make_shared<DB::DataTypeString>(), replaceFilePathNode)},
- {FileMetaColumns::FILE_NAME, std::tuple("_part",
std::make_shared<DB::DataTypeString>(), replaceFileNameNode)},
- {FileMetaColumns::FILE_BLOCK_START,
- std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceFileBlockStartNode)},
- {FileMetaColumns::FILE_BLOCK_LENGTH,
- std::tuple(std::nullopt, std::make_shared<DB::DataTypeInt64>(),
replaceFileBlockLengthNode)},
- {FileMetaColumns::FILE_SIZE, std::tuple(std::nullopt,
std::make_shared<DB::DataTypeInt64>(), replaceFileSizeNode)},
- {FileMetaColumns::FILE_MODIFICATION_TIME, std::tuple(std::nullopt,
std::make_shared<DB::DataTypeDateTime64>(6), replaceFileModificationTimeNode)},
- {DeltaVirtualMeta::DELTA_INTERNAL_IS_ROW_DELETED,
- std::tuple("_part",
std::make_shared<DB::DataTypeNullable>(std::make_shared<DB::DataTypeInt8>()),
replaceDeltaInternalRowDeletedNode)} // make sure there is a '_part' column
- };
+const std::unordered_map<String, std::tuple<std::optional<String>,
DB::DataTypePtr, ReplaceDeltaNodeFunc>> & getDeltaMetaColumnMap();
class MergeTreeRelParser : public RelParser
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]