This is an automated email from the ASF dual-hosted git repository.

exmy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new f0b8ebe2aa [GLUTEN-9083][CH]Fix the nullability missmatch of nothing 
type (#9091)
f0b8ebe2aa is described below

commit f0b8ebe2aa4afa27e8a6bb8b39b905cd4956d8c1
Author: lgbo <[email protected]>
AuthorDate: Wed Mar 26 12:01:34 2025 +0800

    [GLUTEN-9083][CH]Fix the nullability missmatch of nothing type (#9091)
    
    * resolve nothing type nullabitlity
    
    * debug
    
    * update
---
 cpp-ch/local-engine/Common/CHUtil.cpp           | 27 +++++++++++++++++++++++++
 cpp-ch/local-engine/Common/CHUtil.h             |  6 ++++++
 cpp-ch/local-engine/Parser/ExpressionParser.cpp |  9 +++++++--
 cpp-ch/local-engine/Parser/FunctionParser.cpp   | 17 ++++++++--------
 cpp-ch/local-engine/Parser/TypeParser.cpp       |  1 -
 cpp-ch/local-engine/Parser/TypeParser.h         |  3 ++-
 6 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp 
b/cpp-ch/local-engine/Common/CHUtil.cpp
index f6eeecfdbc..adbcfee8f8 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -36,6 +36,7 @@
 #include <Core/Settings.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesDecimal.h>
@@ -317,6 +318,32 @@ DB::Block 
BlockUtil::concatenateBlocksMemoryEfficiently(std::vector<DB::Block> &
     return out;
 }
 
+bool TypeUtil::hasNothingType(DB::DataTypePtr data_type)
+{
+    if (DB::isNothing(data_type))
+        return true;
+    else if (data_type->isNullable())
+        return hasNothingType(typeid_cast<const DB::DataTypeNullable 
*>(data_type.get())->getNestedType());
+    else if (DB::isArray(data_type))
+        return hasNothingType(typeid_cast<const DB::DataTypeArray 
*>(data_type.get())->getNestedType());
+    else if (DB::isMap(data_type))
+    {
+        const auto * type_map = typeid_cast<const DB::DataTypeMap 
*>(data_type.get());
+        return hasNothingType(type_map->getKeyType()) || 
hasNothingType(type_map->getValueType());
+    }
+    else if (DB::isTuple(data_type))
+    {
+        const auto * type_tuple = typeid_cast<const DB::DataTypeTuple 
*>(data_type.get());
+        for (size_t i = 0; i < type_tuple->getElements().size(); ++i)
+        {
+            if (hasNothingType(type_tuple->getElements()[i]))
+                return true;
+        }
+    }
+    return false;
+
+}
+
 size_t PODArrayUtil::adjustMemoryEfficientSize(size_t n)
 {
     /// According to definition of DEFUALT_BLOCK_SIZE
diff --git a/cpp-ch/local-engine/Common/CHUtil.h 
b/cpp-ch/local-engine/Common/CHUtil.h
index b7cd75524b..925daec17b 100644
--- a/cpp-ch/local-engine/Common/CHUtil.h
+++ b/cpp-ch/local-engine/Common/CHUtil.h
@@ -85,6 +85,12 @@ public:
     convertColumnAsNecessary(const DB::ColumnWithTypeAndName & column, const 
DB::ColumnWithTypeAndName & sample_column);
 };
 
+class TypeUtil
+{
+public:
+    static bool hasNothingType(DB::DataTypePtr data_type);
+};
+
 class PODArrayUtil
 {
 public:
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp 
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 53dec15464..82e8abbf90 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -247,6 +247,7 @@ std::pair<DB::DataTypePtr, DB::Field> 
LiteralParser::parse(const substrait::Expr
         }
         case substrait::Expression_Literal::kNull: {
             type = TypeParser::parseType(literal.null());
+            type = 
TypeParser::tryWrapNullable(substrait::Type_Nullability::Type_Nullability_NULLABILITY_NULLABLE,
 type);
             field = DB::Field{};
             break;
         }
@@ -281,7 +282,6 @@ ExpressionParser::addConstColumn(DB::ActionsDAG & 
actions_dag, const DB::DataTyp
     return res_node;
 }
 
-
 ExpressionParser::NodeRawConstPtr ExpressionParser::parseExpression(ActionsDAG 
& actions_dag, const substrait::Expression & rel) const
 {
     switch (rel.rex_type_case())
@@ -374,7 +374,12 @@ ExpressionParser::NodeRawConstPtr 
ExpressionParser::parseExpression(ActionsDAG &
             {
                 /// Common process: CAST(input, type)
                 args.emplace_back(addConstColumn(actions_dag, 
std::make_shared<DataTypeString>(), output_type->getName()));
-                result_node = toFunctionNode(actions_dag, "CAST", args);
+                if (TypeUtil::hasNothingType(args[0]->result_type))
+                {
+                    result_node = toFunctionNode(actions_dag, 
"accurateCastOrNull", args);
+                }
+                else
+                    result_node = toFunctionNode(actions_dag, "CAST", args);
             }
 
             actions_dag.addOrReplaceInOutputs(*result_node);
diff --git a/cpp-ch/local-engine/Parser/FunctionParser.cpp 
b/cpp-ch/local-engine/Parser/FunctionParser.cpp
index 3751547428..4bd35cb8c9 100644
--- a/cpp-ch/local-engine/Parser/FunctionParser.cpp
+++ b/cpp-ch/local-engine/Parser/FunctionParser.cpp
@@ -25,6 +25,7 @@
 #include <Parser/TypeParser.h>
 #include <Common/BlockTypeUtils.h>
 #include <Common/CHUtil.h>
+#include <Common/logger_useful.h>
 #include "ExpressionParser.h"
 
 namespace DB
@@ -41,7 +42,8 @@ namespace local_engine
 {
 using namespace DB;
 
-FunctionParser::FunctionParser(ParserContextPtr ctx) : parser_context(ctx)
+FunctionParser::FunctionParser(ParserContextPtr ctx)
+    : parser_context(ctx)
 {
     expression_parser = std::make_unique<ExpressionParser>(parser_context);
 }
@@ -121,7 +123,7 @@ const ActionsDAG::Node * 
FunctionParser::convertNodeTypeIfNeeded(
     {
         if (!TypeParser::isTypeMatched(output_type, func_node->result_type))
         {
-            auto result_type = 
TypeParser::parseType(substrait_func.output_type());
+            auto result_type = TypeParser::parseType(output_type);
             if (DB::isDecimalOrNullableDecimal(result_type))
             {
                 return ActionsDAGUtil::convertNodeType(
@@ -135,13 +137,10 @@ const ActionsDAG::Node * 
FunctionParser::convertNodeTypeIfNeeded(
             }
             else
             {
-                return ActionsDAGUtil::convertNodeType(
-                    actions_dag,
-                    func_node,
-                    // as stated in isTypeMatched, currently we don't change 
nullability of the result type
-                    func_node->result_type->isNullable() ? 
local_engine::wrapNullableType(true, TypeParser::parseType(output_type))
-                                                         : 
DB::removeNullable(TypeParser::parseType(output_type)),
-                    func_node->result_name);
+                // as stated in isTypeMatched, currently we don't change 
nullability of the result type
+                auto target_type = func_node->result_type->isNullable() ? 
local_engine::wrapNullableType(true, result_type)
+                                                                        : 
local_engine::removeNullable(result_type);
+                return ActionsDAGUtil::convertNodeType(actions_dag, func_node, 
target_type, func_node->result_name);
             }
         }
         else
diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp 
b/cpp-ch/local-engine/Parser/TypeParser.cpp
index 7b5ab7452d..d20a4b7a4e 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.cpp
+++ b/cpp-ch/local-engine/Parser/TypeParser.cpp
@@ -240,7 +240,6 @@ DB::DataTypePtr TypeParser::parseType(const substrait::Type 
& substrait_type, st
     return ch_type;
 }
 
-
 DB::Block TypeParser::buildBlockFromNamedStruct(const substrait::NamedStruct & 
struct_, const std::string & low_card_cols)
 {
     std::unordered_set<std::string> low_card_columns;
diff --git a/cpp-ch/local-engine/Parser/TypeParser.h 
b/cpp-ch/local-engine/Parser/TypeParser.h
index 2a498989e2..4a0b4f7385 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.h
+++ b/cpp-ch/local-engine/Parser/TypeParser.h
@@ -49,11 +49,12 @@ namespace local_engine
         static DB::Block buildBlockFromNamedStructWithoutDFS(const 
substrait::NamedStruct& struct_);
 
         static bool isTypeMatched(const substrait::Type & substrait_type, 
const DB::DataTypePtr & ch_type, bool ignore_nullability = true);
+        static DB::DataTypePtr tryWrapNullable(substrait::Type_Nullability 
nullable, DB::DataTypePtr nested_type);
 
     private:
         /// Mapping spark type names to CH type names.
         static std::unordered_map<String, String> type_names_mapping;
 
-        static DB::DataTypePtr tryWrapNullable(substrait::Type_Nullability 
nullable, DB::DataTypePtr nested_type);
+
     };
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to