This is an automated email from the ASF dual-hosted git repository.
exmy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f0b8ebe2aa [GLUTEN-9083][CH]Fix the nullability missmatch of nothing
type (#9091)
f0b8ebe2aa is described below
commit f0b8ebe2aa4afa27e8a6bb8b39b905cd4956d8c1
Author: lgbo <[email protected]>
AuthorDate: Wed Mar 26 12:01:34 2025 +0800
[GLUTEN-9083][CH]Fix the nullability missmatch of nothing type (#9091)
* resolve nothing type nullabitlity
* debug
* update
---
cpp-ch/local-engine/Common/CHUtil.cpp | 27 +++++++++++++++++++++++++
cpp-ch/local-engine/Common/CHUtil.h | 6 ++++++
cpp-ch/local-engine/Parser/ExpressionParser.cpp | 9 +++++++--
cpp-ch/local-engine/Parser/FunctionParser.cpp | 17 ++++++++--------
cpp-ch/local-engine/Parser/TypeParser.cpp | 1 -
cpp-ch/local-engine/Parser/TypeParser.h | 3 ++-
6 files changed, 50 insertions(+), 13 deletions(-)
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp
b/cpp-ch/local-engine/Common/CHUtil.cpp
index f6eeecfdbc..adbcfee8f8 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -36,6 +36,7 @@
#include <Core/Settings.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesDecimal.h>
@@ -317,6 +318,32 @@ DB::Block
BlockUtil::concatenateBlocksMemoryEfficiently(std::vector<DB::Block> &
return out;
}
+bool TypeUtil::hasNothingType(DB::DataTypePtr data_type)
+{
+ if (DB::isNothing(data_type))
+ return true;
+ else if (data_type->isNullable())
+ return hasNothingType(typeid_cast<const DB::DataTypeNullable
*>(data_type.get())->getNestedType());
+ else if (DB::isArray(data_type))
+ return hasNothingType(typeid_cast<const DB::DataTypeArray
*>(data_type.get())->getNestedType());
+ else if (DB::isMap(data_type))
+ {
+ const auto * type_map = typeid_cast<const DB::DataTypeMap
*>(data_type.get());
+ return hasNothingType(type_map->getKeyType()) ||
hasNothingType(type_map->getValueType());
+ }
+ else if (DB::isTuple(data_type))
+ {
+ const auto * type_tuple = typeid_cast<const DB::DataTypeTuple
*>(data_type.get());
+ for (size_t i = 0; i < type_tuple->getElements().size(); ++i)
+ {
+ if (hasNothingType(type_tuple->getElements()[i]))
+ return true;
+ }
+ }
+ return false;
+
+}
+
size_t PODArrayUtil::adjustMemoryEfficientSize(size_t n)
{
/// According to definition of DEFUALT_BLOCK_SIZE
diff --git a/cpp-ch/local-engine/Common/CHUtil.h
b/cpp-ch/local-engine/Common/CHUtil.h
index b7cd75524b..925daec17b 100644
--- a/cpp-ch/local-engine/Common/CHUtil.h
+++ b/cpp-ch/local-engine/Common/CHUtil.h
@@ -85,6 +85,12 @@ public:
convertColumnAsNecessary(const DB::ColumnWithTypeAndName & column, const
DB::ColumnWithTypeAndName & sample_column);
};
+class TypeUtil
+{
+public:
+ static bool hasNothingType(DB::DataTypePtr data_type);
+};
+
class PODArrayUtil
{
public:
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 53dec15464..82e8abbf90 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -247,6 +247,7 @@ std::pair<DB::DataTypePtr, DB::Field>
LiteralParser::parse(const substrait::Expr
}
case substrait::Expression_Literal::kNull: {
type = TypeParser::parseType(literal.null());
+ type =
TypeParser::tryWrapNullable(substrait::Type_Nullability::Type_Nullability_NULLABILITY_NULLABLE,
type);
field = DB::Field{};
break;
}
@@ -281,7 +282,6 @@ ExpressionParser::addConstColumn(DB::ActionsDAG &
actions_dag, const DB::DataTyp
return res_node;
}
-
ExpressionParser::NodeRawConstPtr ExpressionParser::parseExpression(ActionsDAG
& actions_dag, const substrait::Expression & rel) const
{
switch (rel.rex_type_case())
@@ -374,7 +374,12 @@ ExpressionParser::NodeRawConstPtr
ExpressionParser::parseExpression(ActionsDAG &
{
/// Common process: CAST(input, type)
args.emplace_back(addConstColumn(actions_dag,
std::make_shared<DataTypeString>(), output_type->getName()));
- result_node = toFunctionNode(actions_dag, "CAST", args);
+ if (TypeUtil::hasNothingType(args[0]->result_type))
+ {
+ result_node = toFunctionNode(actions_dag,
"accurateCastOrNull", args);
+ }
+ else
+ result_node = toFunctionNode(actions_dag, "CAST", args);
}
actions_dag.addOrReplaceInOutputs(*result_node);
diff --git a/cpp-ch/local-engine/Parser/FunctionParser.cpp
b/cpp-ch/local-engine/Parser/FunctionParser.cpp
index 3751547428..4bd35cb8c9 100644
--- a/cpp-ch/local-engine/Parser/FunctionParser.cpp
+++ b/cpp-ch/local-engine/Parser/FunctionParser.cpp
@@ -25,6 +25,7 @@
#include <Parser/TypeParser.h>
#include <Common/BlockTypeUtils.h>
#include <Common/CHUtil.h>
+#include <Common/logger_useful.h>
#include "ExpressionParser.h"
namespace DB
@@ -41,7 +42,8 @@ namespace local_engine
{
using namespace DB;
-FunctionParser::FunctionParser(ParserContextPtr ctx) : parser_context(ctx)
+FunctionParser::FunctionParser(ParserContextPtr ctx)
+ : parser_context(ctx)
{
expression_parser = std::make_unique<ExpressionParser>(parser_context);
}
@@ -121,7 +123,7 @@ const ActionsDAG::Node *
FunctionParser::convertNodeTypeIfNeeded(
{
if (!TypeParser::isTypeMatched(output_type, func_node->result_type))
{
- auto result_type =
TypeParser::parseType(substrait_func.output_type());
+ auto result_type = TypeParser::parseType(output_type);
if (DB::isDecimalOrNullableDecimal(result_type))
{
return ActionsDAGUtil::convertNodeType(
@@ -135,13 +137,10 @@ const ActionsDAG::Node *
FunctionParser::convertNodeTypeIfNeeded(
}
else
{
- return ActionsDAGUtil::convertNodeType(
- actions_dag,
- func_node,
- // as stated in isTypeMatched, currently we don't change
nullability of the result type
- func_node->result_type->isNullable() ?
local_engine::wrapNullableType(true, TypeParser::parseType(output_type))
- :
DB::removeNullable(TypeParser::parseType(output_type)),
- func_node->result_name);
+ // as stated in isTypeMatched, currently we don't change
nullability of the result type
+ auto target_type = func_node->result_type->isNullable() ?
local_engine::wrapNullableType(true, result_type)
+ :
local_engine::removeNullable(result_type);
+ return ActionsDAGUtil::convertNodeType(actions_dag, func_node,
target_type, func_node->result_name);
}
}
else
diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp
b/cpp-ch/local-engine/Parser/TypeParser.cpp
index 7b5ab7452d..d20a4b7a4e 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.cpp
+++ b/cpp-ch/local-engine/Parser/TypeParser.cpp
@@ -240,7 +240,6 @@ DB::DataTypePtr TypeParser::parseType(const substrait::Type
& substrait_type, st
return ch_type;
}
-
DB::Block TypeParser::buildBlockFromNamedStruct(const substrait::NamedStruct &
struct_, const std::string & low_card_cols)
{
std::unordered_set<std::string> low_card_columns;
diff --git a/cpp-ch/local-engine/Parser/TypeParser.h
b/cpp-ch/local-engine/Parser/TypeParser.h
index 2a498989e2..4a0b4f7385 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.h
+++ b/cpp-ch/local-engine/Parser/TypeParser.h
@@ -49,11 +49,12 @@ namespace local_engine
static DB::Block buildBlockFromNamedStructWithoutDFS(const
substrait::NamedStruct& struct_);
static bool isTypeMatched(const substrait::Type & substrait_type,
const DB::DataTypePtr & ch_type, bool ignore_nullability = true);
+ static DB::DataTypePtr tryWrapNullable(substrait::Type_Nullability
nullable, DB::DataTypePtr nested_type);
private:
/// Mapping spark type names to CH type names.
static std::unordered_map<String, String> type_names_mapping;
- static DB::DataTypePtr tryWrapNullable(substrait::Type_Nullability
nullable, DB::DataTypePtr nested_type);
+
};
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]