(doris) 12/12: [fix](RF) fix 'Invalid value' error of RF of decimal type (#32749)

yiguolei Mon, 25 Mar 2024 07:34:44 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


commit ff0da8108bd8f78c8cb08058ca5cf90d35716e8f
Author: TengJianPing <[email protected]>
AuthorDate: Mon Mar 25 22:27:31 2024 +0800

    [fix](RF) fix 'Invalid value' error of RF of decimal type (#32749)
---
 be/src/vec/core/types.h                            |  13 +++
 be/src/vec/exprs/vexpr.h                           |  15 ++-
 be/test/vec/data_types/decimal_test.cpp            | 104 +++++++++++++++++++++
 .../join/test_runtimefilter_on_decimal.out         |   8 ++
 .../join/test_runtimefilter_on_decimal.groovy      |  67 +++++++++++++
 5 files changed, 203 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
index 8899b6ce017..fd3e16e0a2b 100644
--- a/be/src/vec/core/types.h
+++ b/be/src/vec/core/types.h
@@ -422,6 +422,13 @@ std::string decimal_to_string(const T& value, UInt32 
scale) {
     return str;
 }
 
+template <typename T>
+std::string decimal_to_string(const T& orig_value, UInt32 trunc_precision, 
UInt32 scale) {
+    T multiplier = decimal_scale_multiplier<T>(trunc_precision);
+    T value = orig_value % multiplier;
+    return decimal_to_string(value, scale);
+}
+
 template <typename T>
 size_t decimal_to_string(const T& value, char* dst, UInt32 scale, const T& 
scale_multiplier) {
     if (UNLIKELY(value == std::numeric_limits<T>::min())) {
@@ -621,6 +628,12 @@ struct Decimal {
 
     std::string to_string(UInt32 scale) const { return 
decimal_to_string(value, scale); }
 
+    // truncate to specified precision and scale,
+    // used by runtime filter only for now.
+    std::string to_string(UInt32 precision, UInt32 scale) const {
+        return decimal_to_string(value, precision, scale);
+    }
+
     /**
      * Got the string representation of a decimal.
      * @param dst Store the result, should be pre-allocated.
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 4c4f0aa6740..42a46d8a8f3 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -374,28 +374,35 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
         const auto* origin_value = reinterpret_cast<const 
vectorized::Decimal<int32_t>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        decimal_literal.__set_value(origin_value->to_string(scale));
+        decimal_literal.__set_value(origin_value->to_string(precision, scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL64) {
         const auto* origin_value = reinterpret_cast<const 
vectorized::Decimal<int64_t>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        decimal_literal.__set_value(origin_value->to_string(scale));
+        decimal_literal.__set_value(origin_value->to_string(precision, scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL128I) {
         const auto* origin_value = reinterpret_cast<const 
vectorized::Decimal<int128_t>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        decimal_literal.__set_value(origin_value->to_string(scale));
+        // e.g. For a decimal(26,6) column, the initial value of the _min of 
the MinMax RF
+        // on the RF producer side is an int128 value with 38 digits of 9, and 
this is the
+        // final min value of the MinMax RF if the fragment instance has no 
data.
+        // Need to truncate the value to the right precision and scale here, 
to avoid
+        // error when casting string back to decimal later.
+        // TODO: this is a temporary solution, the best solution is to produce 
the
+        // right min max value at the producer side.
+        decimal_literal.__set_value(origin_value->to_string(precision, scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL256) {
         const auto* origin_value = reinterpret_cast<const 
vectorized::Decimal<wide::Int256>*>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
-        decimal_literal.__set_value(origin_value->to_string(scale));
+        decimal_literal.__set_value(origin_value->to_string(precision, scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL256, 
precision, scale));
     } else if constexpr (T == TYPE_FLOAT) {
diff --git a/be/test/vec/data_types/decimal_test.cpp 
b/be/test/vec/data_types/decimal_test.cpp
index 0f4b9502014..7f12876e148 100644
--- a/be/test/vec/data_types/decimal_test.cpp
+++ b/be/test/vec/data_types/decimal_test.cpp
@@ -23,6 +23,7 @@
 #include <memory>
 
 #include "gtest/gtest_pred_impl.h"
+#include "runtime/define_primitive_type.h"
 #include "runtime/raw_value.h"
 #include "runtime/type_limit.h"
 #include "util/string_parser.hpp"
@@ -209,4 +210,107 @@ TEST(DecimalTest, hash) {
         EXPECT_EQ(hash_val, 12344);
     }
 }
+
+TEST(DecimalTest, to_string) {
+    {
+        Decimal32 dec(999999999);
+        auto dec_str = dec.to_string(9, 0);
+        EXPECT_EQ(dec_str, "999999999");
+        dec_str = dec.to_string(9, 6);
+        EXPECT_EQ(dec_str, "999.999999");
+        dec_str = dec.to_string(9, 9);
+        EXPECT_EQ(dec_str, "0.999999999");
+
+        dec_str = dec.to_string(8, 0);
+        EXPECT_EQ(dec_str, "99999999");
+        dec_str = dec.to_string(8, 6);
+        EXPECT_EQ(dec_str, "99.999999");
+        dec_str = dec.to_string(8, 8);
+        EXPECT_EQ(dec_str, "0.99999999");
+
+        dec_str = dec.to_string(10, 0);
+        EXPECT_EQ(dec_str, "999999999");
+        dec_str = dec.to_string(10, 6);
+        EXPECT_EQ(dec_str, "999.999999");
+        dec_str = dec.to_string(10, 9);
+        EXPECT_EQ(dec_str, "0.999999999");
+    }
+    {
+        Decimal32 dec(-999999999);
+        auto dec_str = dec.to_string(9, 0);
+        EXPECT_EQ(dec_str, "-999999999");
+        dec_str = dec.to_string(9, 6);
+        EXPECT_EQ(dec_str, "-999.999999");
+        dec_str = dec.to_string(9, 9);
+        EXPECT_EQ(dec_str, "-0.999999999");
+
+        dec_str = dec.to_string(8, 0);
+        EXPECT_EQ(dec_str, "-99999999");
+        dec_str = dec.to_string(8, 6);
+        EXPECT_EQ(dec_str, "-99.999999");
+        dec_str = dec.to_string(8, 8);
+        EXPECT_EQ(dec_str, "-0.99999999");
+
+        dec_str = dec.to_string(10, 0);
+        EXPECT_EQ(dec_str, "-999999999");
+        dec_str = dec.to_string(10, 6);
+        EXPECT_EQ(dec_str, "-999.999999");
+        dec_str = dec.to_string(10, 9);
+        EXPECT_EQ(dec_str, "-0.999999999");
+    }
+    {
+        std::string val_str("999999999999999999999999999999"); // 30 digits
+        StringParser::ParseResult parse_result;
+        Decimal128V3 dec = StringParser::string_to_decimal<TYPE_DECIMAL128I>(
+                val_str.data(), val_str.size(), val_str.size(), 0, 
&parse_result);
+        EXPECT_EQ(parse_result, StringParser::ParseResult::PARSE_SUCCESS);
+        auto dec_str = dec.to_string(30, 0);
+        EXPECT_EQ(dec_str, "999999999999999999999999999999");
+        dec_str = dec.to_string(30, 6);
+        EXPECT_EQ(dec_str, "999999999999999999999999.999999");
+        dec_str = dec.to_string(30, 30);
+        EXPECT_EQ(dec_str, "0.999999999999999999999999999999");
+
+        dec_str = dec.to_string(20, 0);
+        EXPECT_EQ(dec_str, "99999999999999999999");
+        dec_str = dec.to_string(20, 6);
+        EXPECT_EQ(dec_str, "99999999999999.999999");
+        dec_str = dec.to_string(20, 20);
+        EXPECT_EQ(dec_str, "0.99999999999999999999");
+    }
+    {
+        std::string val_str("-999999999999999999999999999999"); // 30 digits
+        StringParser::ParseResult parse_result;
+        Decimal128V3 dec = StringParser::string_to_decimal<TYPE_DECIMAL128I>(
+                val_str.data(), val_str.size(), val_str.size(), 0, 
&parse_result);
+        EXPECT_EQ(parse_result, StringParser::ParseResult::PARSE_SUCCESS);
+        auto dec_str = dec.to_string(30, 0);
+        EXPECT_EQ(dec_str, "-999999999999999999999999999999");
+        dec_str = dec.to_string(30, 6);
+        EXPECT_EQ(dec_str, "-999999999999999999999999.999999");
+        dec_str = dec.to_string(30, 30);
+        EXPECT_EQ(dec_str, "-0.999999999999999999999999999999");
+
+        dec_str = dec.to_string(20, 0);
+        EXPECT_EQ(dec_str, "-99999999999999999999");
+        dec_str = dec.to_string(20, 6);
+        EXPECT_EQ(dec_str, "-99999999999999.999999");
+        dec_str = dec.to_string(20, 20);
+        EXPECT_EQ(dec_str, "-0.99999999999999999999");
+    }
+
+    {
+        Decimal256 dec(type_limit<vectorized::Decimal256>::max());
+        // Decimal256 dec_min(type_limit<vectorized::Decimal256>::min());
+        auto dec_str = dec.to_string(76, 0);
+        EXPECT_EQ(dec_str,
+                  
"9999999999999999999999999999999999999999999999999999999999999999999999999999");
+        dec_str = dec.to_string(76, 6);
+        EXPECT_EQ(dec_str,
+                  
"9999999999999999999999999999999999999999999999999999999999999999999999.999999");
+        dec_str = dec.to_string(76, 76);
+        EXPECT_EQ(dec_str,
+                  
"0.9999999999999999999999999999999999999999999999999999999999999999999999999999");
+    }
+}
 } // namespace doris::vectorized
\ No newline at end of file
diff --git 
a/regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out 
b/regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out
new file mode 100644
index 00000000000..bdcd7847029
--- /dev/null
+++ b/regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !dec_rftest_1 --
+
+-- !dec_rftest_2 --
+-99999999999999999999.999999   -99999999999999999999.999999
+12345678901234567890.123456    12345678901234567890.123456
+99999999999999999999.999999    99999999999999999999.999999
+
diff --git 
a/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy 
b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy
new file mode 100644
index 00000000000..f1bef86bbb2
--- /dev/null
+++ 
b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_runtimefilter_on_decimal", "nereids_p0") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    // bug fix
+    sql "set disable_join_reorder=true;"
+    sql "set enable_runtime_filter_prune=false;"
+    sql "set runtime_filter_type='MIN_MAX';"
+    sql "set runtime_filter_wait_infinitely=true;"
+
+    sql "drop table if exists decimal_rftest_l";
+    sql "drop table if exists decimal_rftest_r";
+    sql """
+        CREATE TABLE `decimal_rftest_l` (
+          `k1_dec_l` decimalv3(26, 6)
+        )
+        DISTRIBUTED BY HASH(`k1_dec_l`) buckets 16
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    sql """
+        CREATE TABLE `decimal_rftest_r` (
+          `k1_dec_r` decimalv3(27, 6)
+        )
+        DISTRIBUTED BY HASH(`k1_dec_r`) buckets 16
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    sql """
+        insert into decimal_rftest_l values ("12345678901234567890.123456");
+    """
+    sql """
+        insert into decimal_rftest_r values (null);
+    """
+    qt_dec_rftest_1 """
+        select /*+SET_VAR(parallel_pipeline_task_num=2)*/ * from 
decimal_rftest_l join decimal_rftest_r on k1_dec_l = k1_dec_r order by 1, 2;
+    """
+
+    sql """
+        insert into decimal_rftest_l values ("-99999999999999999999.999999"), 
("99999999999999999999.999999");
+    """
+    sql """
+        insert into decimal_rftest_r values ("-99999999999999999999.999999"), 
("12345678901234567890.123456"), ("99999999999999999999.999999");
+    """
+    qt_dec_rftest_2 """
+        select /*+SET_VAR(parallel_pipeline_task_num=8)*/ * from 
decimal_rftest_l join decimal_rftest_r on k1_dec_l = k1_dec_r order by 1, 2;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) 12/12: [fix](RF) fix 'Invalid value' error of RF of decimal type (#32749)

Reply via email to