This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d3097ec980 [fix](be) Handle legacy DecimalV2 segments with missing 
precision/frac (#63569)
4d3097ec980 is described below

commit 4d3097ec9809b9b13fc8e742750af9d01631c765
Author: TengJianPing <[email protected]>
AuthorDate: Wed May 27 18:19:20 2026 +0800

    [fix](be) Handle legacy DecimalV2 segments with missing precision/frac 
(#63569)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Problem Summary: After upgrading from 3.1.4 to 4.0.4, queries on
    DecimalV2 columns fail with:
    
        [INTERNAL_ERROR]meet invalid precision: real_precision=0,
        max_decimal_precision=27, min_decimal_precision=1
    
    Root cause: Segments written by Doris < 2.1.0 (before #26572) do not
    persist precision/frac in ColumnMetaPB; they default to 0 when read
    back. Since #35222, DataTypeFactory passes those raw values as
    original_precision/original_scale into DataTypeDecimalV2, which calls
    check_type_precision(0) and throws. 3.1.4 was unaffected because the old
    code hardcoded (27, 9).
    
    Fix: In _create_primitive_data_type for OLAP_FIELD_TYPE_DECIMAL, when
    precision is not positive (legacy segment), pass UINT32_MAX to
    DataTypeDecimalV2 to signal that the original precision/scale are
    unknown. DecimalScaleInfo<TYPE_DECIMALV2> already treats UINT32_MAX as
    'unknown' and falls back to the in-memory (27, 9) representation in
    get_original_precision()/get_original_scale().
    
    ### Release note
    
    Fix "meet invalid precision: real_precision=0" when querying DecimalV2
    columns on segments written by Doris versions older than 2.1.0.
    
    ### Check List (For Author)
    
    - Test: Unit Test
    - Added DataTypeDecimalTest.create_decimalv2_from_legacy_tablet_column
    covering: legacy TabletColumn (unset precision/frac), modern
    TabletColumn with decimal(26,6), and legacy segment_v2::ColumnMetaPB
    with default-0 precision/frac.
    - Behavior changed: No
    - Does this need documentation: No
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
    
    Co-authored-by: Copilot <[email protected]>
---
 be/src/core/data_type/data_type_factory.cpp       |  19 ++-
 be/test/core/data_type/data_type_decimal_test.cpp | 179 ++++++++++++++++++++++
 2 files changed, 196 insertions(+), 2 deletions(-)

diff --git a/be/src/core/data_type/data_type_factory.cpp 
b/be/src/core/data_type/data_type_factory.cpp
index 256493cba04..ab6551d23ad 100644
--- a/be/src/core/data_type/data_type_factory.cpp
+++ b/be/src/core/data_type/data_type_factory.cpp
@@ -379,8 +379,23 @@ DataTypePtr DataTypeFactory::create_data_type(const 
segment_v2::ColumnMetaPB& pc
         nested = std::make_shared<DataTypeStruct>(dataTypes, names);
     } else {
         // TODO add precision and frac
-        nested = 
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
-                                             pcolumn.precision(), 
pcolumn.frac(), -1);
+        auto meta_precision = pcolumn.precision();
+        auto meta_scale = pcolumn.frac();
+        if (pcolumn.type() == 
static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL)) {
+            // Segments written by Doris < 2.1.0 (before #26572) do not persist
+            // precision/frac in ColumnMetaPB, so they default to 0 when read 
back.
+            // Pass UINT32_MAX to DataTypeDecimalV2 to signal that the original
+            // precision/scale are unknown; otherwise check_type_precision(0) 
throws
+            // "meet invalid precision: real_precision=0".
+            UInt32 orig_precision =
+                    meta_precision > 0 ? static_cast<UInt32>(meta_precision) : 
UINT32_MAX;
+            UInt32 orig_scale = meta_precision > 0 ? 
static_cast<UInt32>(meta_scale) : UINT32_MAX;
+            nested = 
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+                                                 orig_precision, orig_scale, 
-1);
+        } else {
+            nested = 
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+                                                 meta_precision, meta_scale, 
-1);
+        }
     }
 
     if (pcolumn.is_nullable() && nested) {
diff --git a/be/test/core/data_type/data_type_decimal_test.cpp 
b/be/test/core/data_type/data_type_decimal_test.cpp
index 312924b5cee..952f8acb366 100644
--- a/be/test/core/data_type/data_type_decimal_test.cpp
+++ b/be/test/core/data_type/data_type_decimal_test.cpp
@@ -28,16 +28,24 @@
 #include <limits>
 #include <type_traits>
 
+#include "agent/be_exec_version_manager.h"
 #include "common/exception.h"
 #include "core/assert_cast.h"
 #include "core/column/column.h"
 #include "core/data_type/common_data_type_serder_test.h"
 #include "core/data_type/common_data_type_test.h"
 #include "core/data_type/data_type.h"
+#include "core/data_type/data_type_agg_state.h"
+#include "core/data_type/data_type_array.h"
 #include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
 #include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
 #include "core/field.h"
 #include "core/types.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "storage/tablet/tablet_schema.h"
 #include "testutil/test_util.h"
 
 namespace doris {
@@ -446,6 +454,177 @@ TEST_F(DataTypeDecimalTest, ser_deser) {
     test_func(dt_decimal256_2, *column_decimal256_2, 
BeExecVersionManager::max_be_exec_version);
     test_func(dt_decimal256_3, *column_decimal256_3, 
BeExecVersionManager::max_be_exec_version);
 }
+// Regression for legacy DecimalV2 segments written by Doris < 2.1.0 (before
+// #26572): ColumnMetaPB.precision/frac are absent and default to 0 when read
+// back. DataTypeFactory must not pass 0 as the original precision/scale to
+// DataTypeDecimalV2, otherwise check_type_precision throws
+// "meet invalid precision: real_precision=0".
+TEST_F(DataTypeDecimalTest, create_decimalv2_from_legacy_tablet_column) {
+    // Case 1: legacy segment — precision/frac missing (default 0)
+    {
+        TabletColumn col;
+        col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+        // intentionally do not set precision/frac to mimic old segments
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col, 
false));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+        const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+        EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+        // When original precision/scale are unknown, get_original_* should 
fall
+        // back to the in-memory (27, 9) representation.
+        EXPECT_EQ(decv2->get_original_precision(), 
BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+    }
+    // Case 2: new segment — precision/frac are persisted (e.g. decimal(26,6))
+    {
+        TabletColumn col;
+        col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+        col.set_precision(26);
+        col.set_frac(6);
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col, 
false));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+        const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+        EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+        EXPECT_EQ(decv2->get_original_precision(), 26U);
+        EXPECT_EQ(decv2->get_original_scale(), 6U);
+    }
+    // Case 3: same regression via the segment-read path that consumes
+    // ColumnMetaPB directly (this is the exact code path that broke after
+    // PR #35222 when reading old DecimalV2 segments).
+    {
+        segment_v2::ColumnMetaPB meta;
+        meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+        // precision/frac intentionally unset -> default 0
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = 
DataTypeFactory::instance().create_data_type(meta));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+        const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+        EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+        EXPECT_EQ(decv2->get_original_precision(), 
BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+    }
+}
+
+// Regression for complex types wrapping legacy DecimalV2 children whose
+// segment ColumnMetaPB.precision/frac default to 0. The fix in
+// DataTypeFactory::create_data_type(segment_v2::ColumnMetaPB&) must
+// propagate through Array / Map / Struct / AggState recursively.
+TEST_F(DataTypeDecimalTest, create_complex_types_with_legacy_decimalv2) {
+    auto make_legacy_decv2_meta = []() {
+        segment_v2::ColumnMetaPB child;
+        child.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+        // precision/frac intentionally unset -> default 0
+        return child;
+    };
+    auto unwrap_nullable = [](const DataTypePtr& dt) -> DataTypePtr {
+        if (dt && dt->is_nullable()) {
+            return assert_cast<const 
DataTypeNullable*>(dt.get())->get_nested_type();
+        }
+        return dt;
+    };
+
+    // Array<DecimalV2>
+    {
+        segment_v2::ColumnMetaPB meta;
+        meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+        *meta.add_children_columns() = make_legacy_decv2_meta();
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = 
DataTypeFactory::instance().create_data_type(meta));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+        const auto* arr = assert_cast<const DataTypeArray*>(dt.get());
+        auto elem = unwrap_nullable(arr->get_nested_type());
+        EXPECT_EQ(elem->get_primitive_type(), TYPE_DECIMALV2);
+        const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(elem.get());
+        EXPECT_EQ(decv2->get_original_precision(), 
BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+    }
+
+    // Map<Int, DecimalV2>
+    {
+        segment_v2::ColumnMetaPB meta;
+        meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+        auto* k = meta.add_children_columns();
+        k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+        *meta.add_children_columns() = make_legacy_decv2_meta();
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = 
DataTypeFactory::instance().create_data_type(meta));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_MAP);
+        const auto* m = assert_cast<const DataTypeMap*>(dt.get());
+        EXPECT_EQ(unwrap_nullable(m->get_key_type())->get_primitive_type(), 
TYPE_INT);
+        auto v = unwrap_nullable(m->get_value_type());
+        EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+        const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(v.get());
+        EXPECT_EQ(decv2->get_original_precision(), 
BeConsts::MAX_DECIMALV2_PRECISION);
+    }
+
+    // Struct<DecimalV2, Int>
+    {
+        segment_v2::ColumnMetaPB meta;
+        meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRUCT));
+        *meta.add_children_columns() = make_legacy_decv2_meta();
+        auto* second = meta.add_children_columns();
+        second->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = 
DataTypeFactory::instance().create_data_type(meta));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_STRUCT);
+        const auto* s = assert_cast<const DataTypeStruct*>(dt.get());
+        ASSERT_EQ(s->get_elements().size(), 2u);
+        EXPECT_EQ(unwrap_nullable(s->get_element(0))->get_primitive_type(), 
TYPE_DECIMALV2);
+        EXPECT_EQ(unwrap_nullable(s->get_element(1))->get_primitive_type(), 
TYPE_INT);
+    }
+
+    // Nested: Array<Map<Int, DecimalV2>>
+    {
+        segment_v2::ColumnMetaPB meta;
+        meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+        auto* map_child = meta.add_children_columns();
+        map_child->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+        auto* k = map_child->add_children_columns();
+        k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+        *map_child->add_children_columns() = make_legacy_decv2_meta();
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = 
DataTypeFactory::instance().create_data_type(meta));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+        auto inner =
+                unwrap_nullable(assert_cast<const 
DataTypeArray*>(dt.get())->get_nested_type());
+        EXPECT_EQ(inner->get_primitive_type(), TYPE_MAP);
+        auto v = unwrap_nullable(assert_cast<const 
DataTypeMap*>(inner.get())->get_value_type());
+        EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+    }
+
+    // AggState (count) with legacy DecimalV2 sub-type
+    {
+        segment_v2::ColumnMetaPB meta;
+        meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_AGG_STATE));
+        meta.set_function_name("count");
+        meta.set_result_is_nullable(false);
+        meta.set_be_exec_version(BeExecVersionManager::get_newest_version());
+        *meta.add_children_columns() = make_legacy_decv2_meta();
+        DataTypePtr dt;
+        EXPECT_NO_THROW(dt = 
DataTypeFactory::instance().create_data_type(meta));
+        ASSERT_NE(dt, nullptr);
+        EXPECT_EQ(dt->get_primitive_type(), TYPE_AGG_STATE);
+        const auto* agg = assert_cast<const DataTypeAggState*>(dt.get());
+        ASSERT_EQ(agg->get_sub_types().size(), 1u);
+        auto sub = unwrap_nullable(agg->get_sub_types()[0]);
+        EXPECT_EQ(sub->get_primitive_type(), TYPE_DECIMALV2);
+        const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(sub.get());
+        EXPECT_EQ(decv2->get_original_precision(), 
BeConsts::MAX_DECIMALV2_PRECISION);
+        EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+    }
+}
+
 TEST_F(DataTypeDecimalTest, to_pb_column_meta) {
     auto test_func = [](auto dt, PGenericType_TypeId expected_type) {
         auto col_meta = std::make_shared<PColumnMeta>();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to