This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 4d3097ec980 [fix](be) Handle legacy DecimalV2 segments with missing
precision/frac (#63569)
4d3097ec980 is described below
commit 4d3097ec9809b9b13fc8e742750af9d01631c765
Author: TengJianPing <[email protected]>
AuthorDate: Wed May 27 18:19:20 2026 +0800
[fix](be) Handle legacy DecimalV2 segments with missing precision/frac
(#63569)
### What problem does this PR solve?
Issue Number: close #xxx
Problem Summary: After upgrading from 3.1.4 to 4.0.4, queries on
DecimalV2 columns fail with:
[INTERNAL_ERROR]meet invalid precision: real_precision=0,
max_decimal_precision=27, min_decimal_precision=1
Root cause: Segments written by Doris < 2.1.0 (before #26572) do not
persist precision/frac in ColumnMetaPB; they default to 0 when read
back. Since #35222, DataTypeFactory passes those raw values as
original_precision/original_scale into DataTypeDecimalV2, which calls
check_type_precision(0) and throws. 3.1.4 was unaffected because the old
code hardcoded (27, 9).
Fix: In _create_primitive_data_type for OLAP_FIELD_TYPE_DECIMAL, when
precision is not positive (legacy segment), pass UINT32_MAX to
DataTypeDecimalV2 to signal that the original precision/scale are
unknown. DecimalScaleInfo<TYPE_DECIMALV2> already treats UINT32_MAX as
'unknown' and falls back to the in-memory (27, 9) representation in
get_original_precision()/get_original_scale().
### Release note
Fix "meet invalid precision: real_precision=0" when querying DecimalV2
columns on segments written by Doris versions older than 2.1.0.
### Check List (For Author)
- Test: Unit Test
- Added DataTypeDecimalTest.create_decimalv2_from_legacy_tablet_column
covering: legacy TabletColumn (unset precision/frac), modern
TabletColumn with decimal(26,6), and legacy segment_v2::ColumnMetaPB
with default-0 precision/frac.
- Behavior changed: No
- Does this need documentation: No
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
Co-authored-by: Copilot <[email protected]>
---
be/src/core/data_type/data_type_factory.cpp | 19 ++-
be/test/core/data_type/data_type_decimal_test.cpp | 179 ++++++++++++++++++++++
2 files changed, 196 insertions(+), 2 deletions(-)
diff --git a/be/src/core/data_type/data_type_factory.cpp
b/be/src/core/data_type/data_type_factory.cpp
index 256493cba04..ab6551d23ad 100644
--- a/be/src/core/data_type/data_type_factory.cpp
+++ b/be/src/core/data_type/data_type_factory.cpp
@@ -379,8 +379,23 @@ DataTypePtr DataTypeFactory::create_data_type(const
segment_v2::ColumnMetaPB& pc
nested = std::make_shared<DataTypeStruct>(dataTypes, names);
} else {
// TODO add precision and frac
- nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
- pcolumn.precision(),
pcolumn.frac(), -1);
+ auto meta_precision = pcolumn.precision();
+ auto meta_scale = pcolumn.frac();
+ if (pcolumn.type() ==
static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL)) {
+ // Segments written by Doris < 2.1.0 (before #26572) do not persist
+ // precision/frac in ColumnMetaPB, so they default to 0 when read
back.
+ // Pass UINT32_MAX to DataTypeDecimalV2 to signal that the original
+ // precision/scale are unknown; otherwise check_type_precision(0)
throws
+ // "meet invalid precision: real_precision=0".
+ UInt32 orig_precision =
+ meta_precision > 0 ? static_cast<UInt32>(meta_precision) :
UINT32_MAX;
+ UInt32 orig_scale = meta_precision > 0 ?
static_cast<UInt32>(meta_scale) : UINT32_MAX;
+ nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+ orig_precision, orig_scale,
-1);
+ } else {
+ nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+ meta_precision, meta_scale,
-1);
+ }
}
if (pcolumn.is_nullable() && nested) {
diff --git a/be/test/core/data_type/data_type_decimal_test.cpp
b/be/test/core/data_type/data_type_decimal_test.cpp
index 312924b5cee..952f8acb366 100644
--- a/be/test/core/data_type/data_type_decimal_test.cpp
+++ b/be/test/core/data_type/data_type_decimal_test.cpp
@@ -28,16 +28,24 @@
#include <limits>
#include <type_traits>
+#include "agent/be_exec_version_manager.h"
#include "common/exception.h"
#include "core/assert_cast.h"
#include "core/column/column.h"
#include "core/data_type/common_data_type_serder_test.h"
#include "core/data_type/common_data_type_test.h"
#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_agg_state.h"
+#include "core/data_type/data_type_array.h"
#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
#include "core/field.h"
#include "core/types.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "storage/tablet/tablet_schema.h"
#include "testutil/test_util.h"
namespace doris {
@@ -446,6 +454,177 @@ TEST_F(DataTypeDecimalTest, ser_deser) {
test_func(dt_decimal256_2, *column_decimal256_2,
BeExecVersionManager::max_be_exec_version);
test_func(dt_decimal256_3, *column_decimal256_3,
BeExecVersionManager::max_be_exec_version);
}
+// Regression for legacy DecimalV2 segments written by Doris < 2.1.0 (before
+// #26572): ColumnMetaPB.precision/frac are absent and default to 0 when read
+// back. DataTypeFactory must not pass 0 as the original precision/scale to
+// DataTypeDecimalV2, otherwise check_type_precision throws
+// "meet invalid precision: real_precision=0".
+TEST_F(DataTypeDecimalTest, create_decimalv2_from_legacy_tablet_column) {
+ // Case 1: legacy segment — precision/frac missing (default 0)
+ {
+ TabletColumn col;
+ col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+ // intentionally do not set precision/frac to mimic old segments
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col,
false));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ // When original precision/scale are unknown, get_original_* should
fall
+ // back to the in-memory (27, 9) representation.
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+ // Case 2: new segment — precision/frac are persisted (e.g. decimal(26,6))
+ {
+ TabletColumn col;
+ col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+ col.set_precision(26);
+ col.set_frac(6);
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col,
false));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ EXPECT_EQ(decv2->get_original_precision(), 26U);
+ EXPECT_EQ(decv2->get_original_scale(), 6U);
+ }
+ // Case 3: same regression via the segment-read path that consumes
+ // ColumnMetaPB directly (this is the exact code path that broke after
+ // PR #35222 when reading old DecimalV2 segments).
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+ // precision/frac intentionally unset -> default 0
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+}
+
+// Regression for complex types wrapping legacy DecimalV2 children whose
+// segment ColumnMetaPB.precision/frac default to 0. The fix in
+// DataTypeFactory::create_data_type(segment_v2::ColumnMetaPB&) must
+// propagate through Array / Map / Struct / AggState recursively.
+TEST_F(DataTypeDecimalTest, create_complex_types_with_legacy_decimalv2) {
+ auto make_legacy_decv2_meta = []() {
+ segment_v2::ColumnMetaPB child;
+ child.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+ // precision/frac intentionally unset -> default 0
+ return child;
+ };
+ auto unwrap_nullable = [](const DataTypePtr& dt) -> DataTypePtr {
+ if (dt && dt->is_nullable()) {
+ return assert_cast<const
DataTypeNullable*>(dt.get())->get_nested_type();
+ }
+ return dt;
+ };
+
+ // Array<DecimalV2>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+ const auto* arr = assert_cast<const DataTypeArray*>(dt.get());
+ auto elem = unwrap_nullable(arr->get_nested_type());
+ EXPECT_EQ(elem->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(elem.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+
+ // Map<Int, DecimalV2>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+ auto* k = meta.add_children_columns();
+ k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_MAP);
+ const auto* m = assert_cast<const DataTypeMap*>(dt.get());
+ EXPECT_EQ(unwrap_nullable(m->get_key_type())->get_primitive_type(),
TYPE_INT);
+ auto v = unwrap_nullable(m->get_value_type());
+ EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(v.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ }
+
+ // Struct<DecimalV2, Int>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRUCT));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ auto* second = meta.add_children_columns();
+ second->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_STRUCT);
+ const auto* s = assert_cast<const DataTypeStruct*>(dt.get());
+ ASSERT_EQ(s->get_elements().size(), 2u);
+ EXPECT_EQ(unwrap_nullable(s->get_element(0))->get_primitive_type(),
TYPE_DECIMALV2);
+ EXPECT_EQ(unwrap_nullable(s->get_element(1))->get_primitive_type(),
TYPE_INT);
+ }
+
+ // Nested: Array<Map<Int, DecimalV2>>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+ auto* map_child = meta.add_children_columns();
+ map_child->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+ auto* k = map_child->add_children_columns();
+ k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ *map_child->add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+ auto inner =
+ unwrap_nullable(assert_cast<const
DataTypeArray*>(dt.get())->get_nested_type());
+ EXPECT_EQ(inner->get_primitive_type(), TYPE_MAP);
+ auto v = unwrap_nullable(assert_cast<const
DataTypeMap*>(inner.get())->get_value_type());
+ EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+ }
+
+ // AggState (count) with legacy DecimalV2 sub-type
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_AGG_STATE));
+ meta.set_function_name("count");
+ meta.set_result_is_nullable(false);
+ meta.set_be_exec_version(BeExecVersionManager::get_newest_version());
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_AGG_STATE);
+ const auto* agg = assert_cast<const DataTypeAggState*>(dt.get());
+ ASSERT_EQ(agg->get_sub_types().size(), 1u);
+ auto sub = unwrap_nullable(agg->get_sub_types()[0]);
+ EXPECT_EQ(sub->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(sub.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+}
+
TEST_F(DataTypeDecimalTest, to_pb_column_meta) {
auto test_func = [](auto dt, PGenericType_TypeId expected_type) {
auto col_meta = std::make_shared<PColumnMeta>();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]