This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b8cd32ab896 [enhancement](zonemap) add beut for olap string convertor
for zonemap (#61277)
b8cd32ab896 is described below
commit b8cd32ab8962eb2e2af54327a4a2b38880166f4a
Author: yiguolei <[email protected]>
AuthorDate: Wed Mar 18 14:15:48 2026 +0800
[enhancement](zonemap) add beut for olap string convertor for zonemap
(#61277)
### What problem does this PR solve?
And there is also a potential bug in zonemap. Should opt.ignore_scale =
(field_type != FieldType::OLAP_FIELD_TYPE_DECIMAL);
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../core/data_type_serde/data_type_bitmap_serde.h | 6 +-
.../data_type_date_or_datetime_serde.cpp | 20 +
.../data_type_date_or_datetime_serde.h | 6 +-
.../data_type_serde/data_type_datetimev2_serde.cpp | 23 +
.../data_type_serde/data_type_datetimev2_serde.h | 7 +-
.../data_type_serde/data_type_datev2_serde.cpp | 13 +
.../core/data_type_serde/data_type_datev2_serde.h | 7 +-
.../data_type_serde/data_type_decimal_serde.cpp | 26 +-
.../core/data_type_serde/data_type_decimal_serde.h | 7 +-
be/src/core/data_type_serde/data_type_hll_serde.h | 7 +-
.../core/data_type_serde/data_type_ipv4_serde.cpp | 9 +
be/src/core/data_type_serde/data_type_ipv4_serde.h | 6 +-
.../core/data_type_serde/data_type_ipv6_serde.cpp | 9 +
be/src/core/data_type_serde/data_type_ipv6_serde.h | 6 +-
.../data_type_serde/data_type_nullable_serde.cpp | 14 +-
.../data_type_serde/data_type_nullable_serde.h | 4 +-
.../data_type_serde/data_type_number_serde.cpp | 31 +
.../core/data_type_serde/data_type_number_serde.h | 7 +-
.../data_type_quantilestate_serde.h | 7 +-
be/src/core/data_type_serde/data_type_serde.h | 51 +-
.../data_type_serde/data_type_string_serde.cpp | 12 +
.../core/data_type_serde/data_type_string_serde.h | 6 +-
be/src/core/data_type_serde/data_type_time_serde.h | 7 +-
.../data_type_serde/data_type_timestamptz_serde.h | 7 +-
be/src/storage/delete/delete_handler.cpp | 383 +++---------
be/src/storage/index/zone_map/zone_map_index.cpp | 12 +-
be/src/storage/schema_change/schema_change.cpp | 5 +-
be/src/storage/segment/column_reader.cpp | 3 +-
be/src/util/io_helper.h | 8 +
be/test/storage/olap_type_test.cpp | 688 +++++++++++++++++++++
30 files changed, 1055 insertions(+), 342 deletions(-)
diff --git a/be/src/core/data_type_serde/data_type_bitmap_serde.h
b/be/src/core/data_type_serde/data_type_bitmap_serde.h
index 49ed3cd875e..023813b8f7f 100644
--- a/be/src/core/data_type_serde/data_type_bitmap_serde.h
+++ b/be/src/core/data_type_serde/data_type_bitmap_serde.h
@@ -37,8 +37,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
@@ -87,5 +85,9 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
index 8c055c22626..757fd15fa93 100644
--- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
@@ -395,6 +395,18 @@ Status DataTypeDateSerDe<T>::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Deserializes a DateV1 or DateTimeV1 value from its OLAP string
representation
+// (e.g. from ZoneMap protobuf). This is the inverse of to_olap_string().
+//
+// Uses CastToDateOrDatetime::from_string_non_strict_mode which accepts
flexible date/time formats.
+//
+// Note: DateTimeV1 (VecDateTimeValue) does NOT support microsecond precision.
+// VecDateTimeValue::microsecond() always returns 0 — the _microsecond field
was removed
+// to reduce memory footprint. So the round-trip format is always
second-level precision.
+//
+// Expected input formats:
+// DateV1: "YYYY-MM-DD" e.g. "2023-10-15"
+// DateTimeV1: "YYYY-MM-DD HH:MM:SS" e.g. "2023-10-15 14:30:00"
template <PrimitiveType T>
Status DataTypeDateSerDe<T>::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
@@ -576,6 +588,14 @@ Status
DataTypeDateSerDe<T>::from_decimal_strict_mode_batch(
return Status::OK();
}
+// Serializes a DateV1 or DateTimeV1 value to its OLAP string representation
for ZoneMap storage.
+// This is the inverse of from_olap_string().
+//
+// Internally calls VecDateTimeValue::to_string(buf) which produces:
+// DateV1: "YYYY-MM-DD" e.g. "2023-10-15"
+// DateTimeV1: "YYYY-MM-DD HH:MM:SS" e.g. "2023-10-15 14:30:00"
+//
+// Note: DateTimeV1 never includes microseconds
(VecDateTimeValue::microsecond() always returns 0).
template <PrimitiveType T>
std::string DataTypeDateSerDe<T>::to_olap_string(const Field& field) const {
char buf[64];
diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
index c28677b117f..693bfca7385 100644
--- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
+++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
@@ -51,9 +51,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -119,6 +116,9 @@ public:
std::string to_olap_string(const Field& field) const override;
protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
template <bool is_date>
Status _read_column_from_arrow(IColumn& column, const arrow::Array*
arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz)
const;
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
index a6f54024f75..e652c23f64c 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
@@ -122,6 +122,19 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef&
str, IColumn& column,
return Status::OK();
}
+// Deserializes a DateTimeV2 value from its OLAP string representation (e.g.
from ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses from_date_format_str("%Y-%m-%d %H:%i:%s.%f") to parse.
+// DateTimeV2 supports microsecond precision (scale 0-6) via a 20-bit
microsecond_ field.
+//
+// Expected input format: "YYYY-MM-DD HH:MM:SS[.ffffff]"
+// Examples:
+// "2023-10-15 14:30:00" => scale 0, microsecond = 0
+// "2023-10-15 14:30:00.123000" => scale 6, microsecond = 123000
+// "2023-10-15 14:30:00.123" => scale 3, microsecond = 123000
+//
+// On parse failure, falls back to MIN_DATETIME_V2.
Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str,
Field& field,
const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -512,6 +525,16 @@ void
DataTypeDateTimeV2SerDe::write_one_cell_to_binary(const IColumn& src_column
data_ref.size);
}
+// Serializes a DateTimeV2 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+//
+// Delegates to CastToString::from_datetimev2(value, scale) with default
scale=-1,
+// meaning microseconds are only shown when nonzero.
+//
+// Output format: "YYYY-MM-DD HH:MM:SS[.ffffff]"
+// Examples:
+// value with microsecond=0 => "2023-10-15 14:30:00"
+// value with microsecond=123000 => "2023-10-15 14:30:00.123000"
std::string DataTypeDateTimeV2SerDe::to_olap_string(const Field& field) const {
return CastToString::from_datetimev2(field.get<TYPE_DATETIMEV2>());
}
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.h
b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
index 22f6d072428..0389432a621 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
@@ -41,9 +41,6 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const final;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode_batch(const ColumnString& str, IColumn&
column,
const FormatOptions& options,
const NullMap::value_type* null_map =
nullptr) const final;
@@ -112,6 +109,10 @@ public:
std::string to_olap_string(const Field& field) const override;
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
int _scale;
};
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
index 87c577d076b..dc6712acaa8 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
@@ -228,6 +228,14 @@ Status DataTypeDateV2SerDe::from_string_batch(const
ColumnString& col_str, Colum
return Status::OK();
}
+// Deserializes a DateV2 value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses strptime("%Y-%m-%d") to parse, then bit-packs into DateV2 internal
format:
+// uint32_t value = (year << 9) | (month << 5) | day
+//
+// Expected input format: "YYYY-MM-DD", e.g. "2023-10-15"
+// On parse failure, falls back to MIN_DATE_V2.
Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -441,6 +449,11 @@ Status DataTypeDateV2SerDe::from_decimal_strict_mode_batch(
return Status::OK();
}
+// Serializes a DateV2 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+//
+// Delegates to CastToString::from_datev2() which calls
DateV2Value::to_string(buf).
+// Output format: "YYYY-MM-DD", e.g. "2023-10-15"
std::string DataTypeDateV2SerDe::to_olap_string(const Field& field) const {
return CastToString::from_datev2(field.get<TYPE_DATEV2>());
}
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.h
b/be/src/core/data_type_serde/data_type_datev2_serde.h
index 0a33c51c806..0375f9be4b4 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.h
@@ -40,9 +40,6 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const final;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode_batch(const ColumnString& str, IColumn&
column,
const FormatOptions& options,
const NullMap::value_type* null_map =
nullptr) const final;
@@ -108,5 +105,9 @@ public:
int64_t row_num) const override;
std::string to_olap_string(const Field& field) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.cpp
b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
index 10babcd9a54..1e277bda86b 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
@@ -133,8 +133,16 @@ Status DataTypeDecimalSerDe<T>::from_olap_string(const
std::string& str, Field&
CastParameters params;
params.is_strict = false;
- // Decimal string in storage is saved as an integer. The scale is
maintained by data type, so we
- // can just parse the string as an integer here.
+ // DecimalV3 (Decimal32/64/128I/256): zonemap stores the raw unscaled
integer string.
+ // E.g., Decimal(9,2) value 123.45 → to_olap_string() → "12345".
+ // Caller sets ignore_scale=true → parse with scale=0 → internal int
12345. Correct.
+ //
+ // DecimalV2: zonemap stores "integer.fraction" with 9 zero-padded
fractional digits.
+ // E.g., DecimalV2 value 123.456 → to_olap_string() → "123.456000000".
+ // Caller sets ignore_scale=false → parse with scale=9 → correctly
restores the value.
+ // Note: read_decimal_text_impl() currently hardcodes
DecimalV2Value::SCALE=9 for
+ // DecimalV2, so the passed-in scale is effectively ignored. But callers
should still
+ // set ignore_scale=false for semantic correctness.
if (!CastToDecimal::from_string(StringRef(str), to,
static_cast<UInt32>(precision),
options.ignore_scale ? 0 :
static_cast<UInt32>(scale),
params)) {
@@ -504,15 +512,29 @@ template <PrimitiveType T>
std::string DataTypeDecimalSerDe<T>::to_olap_string(const Field& field) const {
auto value = field.get<T>();
if constexpr (T == TYPE_DECIMALV2) {
+ // DecimalV2 outputs "integer.fraction" with 9 zero-padded fractional
digits.
+ // E.g., DecimalV2 value 123.456 → int_value=123, frac_value=456000000
+ // → decimal12_t(123, 456000000).to_string() → "123.456000000".
+ // from_zonemap_string() sets ignore_scale=true internally, but
DecimalV2's parser
+ // hardcodes scale=9 regardless, so the round-trip is correct either
way.
decimal12_t decimal_val(value.int_value(), value.frac_value());
return decimal_val.to_string();
} else if constexpr (T == TYPE_DECIMAL256) {
+ // DecimalV3: outputs the raw unscaled integer string.
+ // E.g., Decimal256(76,10) value 123.456 → internal int = 1234560000000
+ // → "1234560000000".
+ // from_zonemap_string() sets ignore_scale=true to parse this as a raw
integer.
return wide::to_string(value.value);
} else if constexpr (T == TYPE_DECIMAL128I) {
+ // Same as Decimal256: raw unscaled integer.
+ // E.g., Decimal(38,6) value 123.456 → internal int128 = 123456000
+ // → "123456000".
fmt::memory_buffer buffer;
fmt::format_to(buffer, "{}", value.value);
return std::string(buffer.data(), buffer.size());
} else {
+ // Decimal32/64: raw unscaled integer.
+ // E.g., Decimal(9,2) value 123.45 → internal int32 = 12345 → "12345".
return std::to_string(value.value);
}
}
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.h
b/be/src/core/data_type_serde/data_type_decimal_serde.h
index 61349fa6eef..140c8e3a292 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.h
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.h
@@ -57,9 +57,6 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode_batch(
const ColumnString& str, IColumn& column, const FormatOptions&
options,
const NullMap::value_type* null_map = nullptr) const override;
@@ -142,6 +139,10 @@ public:
static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
FieldInfo& info);
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
int precision;
int scale;
diff --git a/be/src/core/data_type_serde/data_type_hll_serde.h
b/be/src/core/data_type_serde/data_type_hll_serde.h
index 96b5b083da7..5df89f7f4d7 100644
--- a/be/src/core/data_type_serde/data_type_hll_serde.h
+++ b/be/src/core/data_type_serde/data_type_hll_serde.h
@@ -38,9 +38,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int64_t start_idx,
int64_t end_idx,
@@ -82,5 +79,9 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
b/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
index d5b4813c1f3..4eb969c138f 100644
--- a/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
@@ -178,6 +178,11 @@ Status DataTypeIPv4SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Deserializes an IPv4 value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses CastToIPv4::from_string to parse standard dotted-decimal notation.
+// Expected input format: "A.B.C.D", e.g. "192.168.1.1"
Status DataTypeIPv4SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options) const
{
CastParameters params;
@@ -222,6 +227,10 @@ void DataTypeIPv4SerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data,
data_ref.size);
}
+// Serializes an IPv4 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+// Uses CastToString::from_ip() to produce standard dotted-decimal notation.
+// Output format: "A.B.C.D", e.g. "192.168.1.1"
std::string DataTypeIPv4SerDe::to_olap_string(const Field& field) const {
return CastToString::from_ip(field.get<TYPE_IPV4>());
}
diff --git a/be/src/core/data_type_serde/data_type_ipv4_serde.h
b/be/src/core/data_type_serde/data_type_ipv4_serde.h
index a3ff4bf9036..e23695dbca5 100644
--- a/be/src/core/data_type_serde/data_type_ipv4_serde.h
+++ b/be/src/core/data_type_serde/data_type_ipv4_serde.h
@@ -65,8 +65,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -75,5 +73,9 @@ public:
int64_t row_num) const override;
std::string to_olap_string(const Field& field) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
b/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
index f60dc892fbf..ccd5a236807 100644
--- a/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
@@ -276,6 +276,11 @@ Status DataTypeIPv6SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Deserializes an IPv6 value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses CastToIPv6::from_string to parse standard IPv6 notation.
+// Expected input format: standard IPv6, e.g. "::1", "2001:db8::1",
"fe80::1%25eth0"
Status DataTypeIPv6SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options) const
{
CastParameters params;
@@ -320,6 +325,10 @@ void DataTypeIPv6SerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data,
data_ref.size);
}
+// Serializes an IPv6 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+// Uses CastToString::from_ip() to produce standard IPv6 notation.
+// Output format: standard IPv6, e.g. "::1", "2001:db8::1"
std::string DataTypeIPv6SerDe::to_olap_string(const Field& field) const {
return CastToString::from_ip(field.get<TYPE_IPV6>());
}
diff --git a/be/src/core/data_type_serde/data_type_ipv6_serde.h
b/be/src/core/data_type_serde/data_type_ipv6_serde.h
index 20b6960e61d..226e65663f0 100644
--- a/be/src/core/data_type_serde/data_type_ipv6_serde.h
+++ b/be/src/core/data_type_serde/data_type_ipv6_serde.h
@@ -75,8 +75,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -85,5 +83,9 @@ public:
int64_t row_num) const override;
std::string to_olap_string(const Field& field) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.cpp
b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
index 175da193187..6ca2e07b7b0 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
@@ -494,9 +494,17 @@ Status DataTypeNullableSerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeNullableSerDe::from_olap_string(const std::string& str, Field&
field,
- const FormatOptions& options)
const {
- if (!nested_serde->from_olap_string(str, field, options).ok()) {
+Status DataTypeNullableSerDe::from_zonemap_string(const std::string& str,
Field& field) const {
+ if (!nested_serde->from_zonemap_string(str, field).ok()) {
+ // fill null if fail
+ field = Field();
+ return Status::OK();
+ }
+ return Status::OK();
+}
+
+Status DataTypeNullableSerDe::from_fe_string(const std::string& str, Field&
field) const {
+ if (!nested_serde->from_fe_string(str, field).ok()) {
// fill null if fail
field = Field();
return Status::OK();
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.h
b/be/src/core/data_type_serde/data_type_nullable_serde.h
index 49d9d55f9f2..4363e4a573b 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.h
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.h
@@ -39,8 +39,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_zonemap_string(const std::string& str, Field& field) const
override;
+ Status from_fe_string(const std::string& str, Field& field) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/core/data_type_serde/data_type_number_serde.cpp
b/be/src/core/data_type_serde/data_type_number_serde.cpp
index 6acb1bb9a16..80f6234fcbd 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_number_serde.cpp
@@ -750,6 +750,20 @@ Status DataTypeNumberSerDe<T>::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Serializes a numeric value to its OLAP string representation for ZoneMap
index storage.
+// This is the inverse of from_olap_string().
+//
+// Format by type:
+// - BOOLEAN: "0" or "1" (via snprintf "%d")
+// - TINYINT/SMALLINT/INT/BIGINT: standard integer string, e.g. "42", "-100"
+// - FLOAT: fmt::format("{:.7g}", value), e.g. "3.14", "NaN", "Infinity"
+// - DOUBLE: fmt::format("{:.16g}", value), e.g. "3.141592653589793"
+// - LARGEINT: fmt::format("{}", value), e.g.
"170141183460469231731687303715884105727"
+//
+// Examples:
+// to_olap_string(Field(Int32(12345))) => "12345"
+// to_olap_string(Field(Float32(3.14f))) => "3.14"
+// to_olap_string(Field(Float64(1e300))) => "1e+300"
template <PrimitiveType T>
std::string DataTypeNumberSerDe<T>::to_olap_string(const Field& field) const {
if constexpr (T == TYPE_BOOLEAN) {
@@ -770,6 +784,15 @@ std::string DataTypeNumberSerDe<T>::to_olap_string(const
Field& field) const {
}
}
+// Deserializes a numeric value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string(). Uses try_parse_impl with
non-strict mode.
+//
+// FormatOptions is unused for numeric types — the string format is always a
standard number literal.
+//
+// Examples:
+// from_olap_string("12345", field, ...) => field = Int32(12345)
+// from_olap_string("3.14", field, ...) => field = Float32(3.14)
+// from_olap_string("NaN", field, ...) => returns InvalidArgument
(NaN/Inf are rejected)
template <PrimitiveType T>
Status DataTypeNumberSerDe<T>::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
@@ -779,6 +802,14 @@ Status DataTypeNumberSerDe<T>::from_olap_string(const
std::string& str, Field& f
if (!try_parse_impl<T, false>(val, StringRef(str), params)) {
return Status::InvalidArgument("parse number fail, string: '{}'", str);
}
+ // In zonemap or some float values passed from FE(column's default value or
+ // schema change like operations), Nan and inf is not allowed.
+ if constexpr (is_float_or_double(T)) {
+ if (std::isnan(val) || std::isinf(val)) {
+ return Status::InvalidArgument(
+ "parse number fail: NaN/Infinity not allowed in olap
string: '{}'", str);
+ }
+ }
field = Field::create_field<T>(std::move(val));
return Status::OK();
}
diff --git a/be/src/core/data_type_serde/data_type_number_serde.h
b/be/src/core/data_type_serde/data_type_number_serde.h
index 2158919d112..cc33fe2b684 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.h
+++ b/be/src/core/data_type_serde/data_type_number_serde.h
@@ -62,9 +62,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -152,6 +149,10 @@ public:
static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
FieldInfo& info);
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
template <PrimitiveType T>
diff --git a/be/src/core/data_type_serde/data_type_quantilestate_serde.h
b/be/src/core/data_type_serde/data_type_quantilestate_serde.h
index 61cd18fb420..16f99f5f63e 100644
--- a/be/src/core/data_type_serde/data_type_quantilestate_serde.h
+++ b/be/src/core/data_type_serde/data_type_quantilestate_serde.h
@@ -58,9 +58,6 @@ public:
return Status::OK();
}
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status serialize_column_to_json(const IColumn& column, int64_t start_idx,
int64_t end_idx,
BufferWritable& bw, FormatOptions&
options) const override {
SERIALIZE_COLUMN_TO_JSON();
@@ -191,6 +188,10 @@ public:
data.serialize((uint8_t*)result.data());
bw.write(result.data(), result.size());
}
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
#include "common/compile_check_end.h"
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_serde.h
b/be/src/core/data_type_serde/data_type_serde.h
index 656ef84b76b..39003e2793b 100644
--- a/be/src/core/data_type_serde/data_type_serde.h
+++ b/be/src/core/data_type_serde/data_type_serde.h
@@ -196,8 +196,23 @@ public:
const cctz::time_zone* timezone = nullptr;
/**
- * Ignore scale when converting decimal to string, because decimal in
zone map is stored in
- * unscaled value.
+ * Controls how the `scale` parameter is passed to decimal parsing in
from_olap_string().
+ *
+ * - true: parse with scale=0 (treat the string as a raw unscaled
integer).
+ * Used for DecimalV3 (Decimal32/64/128I/256) whose zonemap
stores the raw
+ * internal integer. E.g., Decimal(9,2) value 123.45 is
stored as "12345";
+ * parsing with scale=0 yields internal int 12345, which is
correct.
+ *
+ * - false: parse with the data type's actual scale.
+ * Used for DecimalV2 whose zonemap stores a human-readable
string with
+ * decimal point via decimal12_t::to_string().
+ * E.g., DecimalV2 value 123.456 is stored as "123.456000000";
+ * parsing with scale=9 correctly restores the original value.
+ *
+ * Note: for DecimalV2, read_decimal_text_impl() currently hardcodes
+ * DecimalV2Value::SCALE=9 regardless of the passed-in scale, so the
flag
+ * does not actually affect DecimalV2 parsing today. However, callers
should
+ * still set it correctly for semantic clarity and future-proofing.
*/
bool ignore_scale = false;
@@ -309,11 +324,24 @@ public:
const FormatOptions& options) const {
return Status::NotSupported("from_string is not supported");
}
- // Convert string which is read from OLAP table to corresponding type.
- // Only used for basic data types, such as Ip, Date, Number, etc.
- virtual Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
- return Status::NotSupported("from_olap_string is not supported");
+ /// Parse a string stored in ZoneMap index back into a Field.
+ /// This is the inverse of to_olap_string(). For DecimalV3,
to_olap_string() stores the
+ /// raw unscaled integer, so from_zonemap_string() internally sets
ignore_scale=true to
+ /// avoid double-scaling. For DecimalV2 and other types, ignore_scale has
no effect.
+ /// Callers: zone_map_index.cpp (min/max deserialization).
+ virtual Status from_zonemap_string(const std::string& str, Field& field)
const {
+ FormatOptions options;
+ options.ignore_scale = true;
+ return from_olap_string(str, field, options);
+ }
+
+ /// Parse a human-readable string from FE (delete conditions, default
values,
+ /// schema change defaults) into a Field. Uses standard decimal parsing
with full
+ /// scale. Callers: delete_handler.cpp, column_reader.cpp
(DefaultValueColumnIterator),
+ /// schema_change.cpp.
+ virtual Status from_fe_string(const std::string& str, Field& field) const {
+ FormatOptions options;
+ return from_olap_string(str, field, options);
}
// For strict mode, we should not have nullable columns, as we will
directly report errors when string conversion fails instead of handling them
@@ -484,6 +512,15 @@ public:
FieldInfo& info);
protected:
+ /// Internal implementation for parsing OLAP storage strings into Fields.
+ /// Not called directly by external code — use from_zonemap_string() or
from_fe_string()
+ /// instead. Subclasses override this to provide type-specific
deserialization.
+ /// For decimals, options.ignore_scale controls whether scale is applied
during parsing.
+ virtual Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const {
+ return Status::NotSupported("from_olap_string is not supported");
+ }
+
bool _return_object_as_string = false;
// This parameter indicates what level the serde belongs to and is mainly
used for complex types
// The default level is 1, and each time you nest, the level increases by
1,
diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp
b/be/src/core/data_type_serde/data_type_string_serde.cpp
index 492bdf0f811..2621f06842b 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_string_serde.cpp
@@ -436,6 +436,8 @@ void DataTypeStringSerDeBase<ColumnType>::to_string(const
IColumn& column, size_
}
}
+// Serializes a STRING/VARCHAR/CHAR value to its OLAP string representation
for ZoneMap storage.
+// This is the inverse of from_olap_string(). Returns the raw string content
directly.
template <typename ColumnType>
std::string DataTypeStringSerDeBase<ColumnType>::to_olap_string(const Field&
field) const {
return field.get<TYPE_STRING>();
@@ -459,6 +461,16 @@ Status
DataTypeStringSerDeBase<ColumnType>::from_string(StringRef& str, IColumn&
return deserialize_one_cell_from_json(column, slice, options);
}
+// Deserializes a STRING/VARCHAR/CHAR value from its OLAP string representation
+// (e.g. from ZoneMap protobuf). This is the inverse of to_olap_string().
+//
+// For CHAR type: if the string is shorter than the declared column length
(_len),
+// pads with '\0' bytes to reach _len. This preserves CHAR's fixed-length
semantics.
+// For STRING/VARCHAR: stores the string as-is.
+//
+// Examples:
+// CHAR(10), str="hello" => field = "hello\0\0\0\0\0" (10 bytes)
+// VARCHAR, str="hello" => field = "hello" (5 bytes)
template <typename ColumnType>
Status DataTypeStringSerDeBase<ColumnType>::from_olap_string(const
std::string& str, Field& field,
const
FormatOptions& options) const {
diff --git a/be/src/core/data_type_serde/data_type_string_serde.h
b/be/src/core/data_type_serde/data_type_string_serde.h
index faa1b149171..3c5716ae661 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.h
+++ b/be/src/core/data_type_serde/data_type_string_serde.h
@@ -101,8 +101,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
@@ -265,6 +263,10 @@ public:
std::string to_olap_string(const Field& field) const override;
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
const PrimitiveType _type;
const int _len = -1;
diff --git a/be/src/core/data_type_serde/data_type_time_serde.h
b/be/src/core/data_type_serde/data_type_time_serde.h
index 924d4b97190..dafaa600eb4 100644
--- a/be/src/core/data_type_serde/data_type_time_serde.h
+++ b/be/src/core/data_type_serde/data_type_time_serde.h
@@ -38,9 +38,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -73,6 +70,10 @@ public:
IColumn& target_col) const;
int get_scale() const override { return _scale; }
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
int _scale;
};
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
index 8048731460f..459003e040f 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
@@ -36,9 +36,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
@@ -77,6 +74,10 @@ public:
std::string to_olap_string(const Field& field) const override;
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
const UInt32 _scale = 6;
};
diff --git a/be/src/storage/delete/delete_handler.cpp
b/be/src/storage/delete/delete_handler.cpp
index 74e29fe5641..c13c17700a5 100644
--- a/be/src/storage/delete/delete_handler.cpp
+++ b/be/src/storage/delete/delete_handler.cpp
@@ -27,15 +27,7 @@
#include "common/config.h"
#include "common/logging.h"
#include "common/status.h"
-#include "exprs/function/cast/cast_parameters.h"
-#include "exprs/function/cast/cast_to_boolean.h"
-#include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp"
-#include "exprs/function/cast/cast_to_datetimev2_impl.hpp"
-#include "exprs/function/cast/cast_to_datev2_impl.hpp"
-#include "exprs/function/cast/cast_to_decimal.h"
-#include "exprs/function/cast/cast_to_float.h"
-#include "exprs/function/cast/cast_to_int.h"
-#include "exprs/function/cast/cast_to_ip.h"
+#include "core/data_type_serde/data_type_serde.h"
#include "storage/olap_common.h"
#include "storage/predicate/block_column_predicate.h"
#include "storage/predicate/predicate_creator.h"
@@ -51,185 +43,62 @@ using ::google::protobuf::RepeatedPtrField;
namespace doris {
+// Parses a string value into a Field using the serde's from_fe_string, then
builds
+// a HybridSetBase for IN/NOT_IN predicates.
+// The type-dispatch via switch/case is still needed because
build_set<PType>() and
+// HybridSet::insert(const void*) require compile-time PrimitiveType, and
Field::get<PType>()
+// must be invoked with the correct type to extract the underlying CppType
value.
template <PrimitiveType PType>
-Status convert(const DataTypePtr& data_type, const std::string& str, Arena&
arena,
- typename PrimitiveTypeTraits<PType>::CppType& res) {
- if constexpr (PType == TYPE_TINYINT || PType == TYPE_SMALLINT || PType ==
TYPE_INT ||
- PType == TYPE_BIGINT || PType == TYPE_LARGEINT) {
- CastParameters parameters;
- if (!CastToInt::from_string<false>({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_FLOAT || PType == TYPE_DOUBLE) {
- CastParameters parameters;
- if (!CastToFloat::from_string({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATE) {
- CastParameters parameters;
- if (!CastToDateOrDatetime::from_string<false>({str.data(),
str.size()}, res, nullptr,
- parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATETIME) {
- CastParameters parameters;
- if (!CastToDateOrDatetime::from_string<true>({str.data(), str.size()},
res, nullptr,
- parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATEV2) {
- CastParameters parameters;
- if (!CastToDateV2::from_string({str.data(), str.size()}, res, nullptr,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATETIMEV2) {
- CastParameters parameters;
- if (!CastToDatetimeV2::from_string({str.data(), str.size()}, res,
nullptr,
- data_type->get_scale(),
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_TIMESTAMPTZ) {
- CastParameters parameters;
- if (!CastToTimstampTz::from_string({str.data(), str.size()}, res,
parameters, nullptr,
- data_type->get_scale())) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_CHAR) {
- size_t target = assert_cast<const
DataTypeString*>(remove_nullable(data_type).get())->len();
- res = {str.data(), str.size()};
- if (target > str.size()) {
- char* buffer = arena.alloc(target);
- memset(buffer, 0, target);
- memcpy(buffer, str.data(), str.size());
- res = {buffer, target};
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_STRING || PType == TYPE_VARCHAR) {
- char* buffer = arena.alloc(str.size());
- memcpy(buffer, str.data(), str.size());
- res = {buffer, str.size()};
- return Status::OK();
- }
- if constexpr (PType == TYPE_BOOLEAN) {
- CastParameters parameters;
- UInt8 tmp;
- if (!CastToBool::from_string({str.data(), str.size()}, tmp,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- res = tmp != 0;
- return Status::OK();
- }
- if constexpr (PType == TYPE_IPV4) {
- CastParameters parameters;
- if (!CastToIPv4::from_string({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_IPV6) {
- CastParameters parameters;
- if (!CastToIPv6::from_string({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DECIMALV2) {
- CastParameters parameters;
- Decimal128V2 tmp;
- if (!CastToDecimal::from_string({str.data(), str.size()}, tmp,
data_type->get_precision(),
- data_type->get_scale(), parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- res = DecimalV2Value(tmp.value);
- return Status::OK();
- } else if constexpr (is_decimal(PType)) {
- CastParameters parameters;
- if (!CastToDecimal::from_string({str.data(), str.size()}, res,
data_type->get_precision(),
- data_type->get_scale(), parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
+void insert_field_to_set(const Field& field, HybridSetBase* set) {
+ if constexpr (is_string_type(PType)) {
+ // StringSet::insert expects const StringRef*, so we must construct a
StringRef
+ // from the std::string returned by Field::get<>.
+ const auto& tmp = field.get<PType>();
+ StringRef ref(tmp.data(), tmp.size());
+ set->insert(reinterpret_cast<const void*>(&ref));
+ } else {
+ auto tmp = field.get<PType>();
+ set->insert(reinterpret_cast<const void*>(&tmp));
}
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "unsupported data type in delete handler. type={}",
- type_to_string(data_type->get_primitive_type()));
}
-#define CONVERT_CASE(PType) \
- case PType: { \
- set = build_set<PType>(); \
- for (const auto& s : str) { \
- typename PrimitiveTypeTraits<PType>::CppType tmp; \
- RETURN_IF_ERROR(convert<PType>(data_type, s, arena, tmp)); \
- set->insert(reinterpret_cast<const void*>(&tmp)); \
- } \
- return Status::OK(); \
- }
-Status convert(const DataTypePtr& data_type, const std::list<std::string>&
str, Arena& arena,
+#define FROM_FE_STRING_CASE(PType) \
+ case PType: { \
+ set = build_set<PType>(); \
+ for (const auto& s : str) { \
+ Field field; \
+ RETURN_IF_ERROR(serde->from_fe_string(s, field)); \
+ insert_field_to_set<PType>(field, set.get()); \
+ } \
+ return Status::OK(); \
+ }
+Status convert(const DataTypePtr& data_type, const std::list<std::string>& str,
std::shared_ptr<HybridSetBase>& set) {
+ auto serde = data_type->get_serde();
switch (data_type->get_primitive_type()) {
- CONVERT_CASE(TYPE_TINYINT);
- CONVERT_CASE(TYPE_SMALLINT);
- CONVERT_CASE(TYPE_INT);
- CONVERT_CASE(TYPE_BIGINT);
- CONVERT_CASE(TYPE_LARGEINT);
- CONVERT_CASE(TYPE_FLOAT);
- CONVERT_CASE(TYPE_DOUBLE);
- CONVERT_CASE(TYPE_DATE);
- CONVERT_CASE(TYPE_DATETIME);
- CONVERT_CASE(TYPE_DATEV2);
- CONVERT_CASE(TYPE_DATETIMEV2);
- CONVERT_CASE(TYPE_TIMESTAMPTZ);
- CONVERT_CASE(TYPE_BOOLEAN);
- CONVERT_CASE(TYPE_IPV4);
- CONVERT_CASE(TYPE_IPV6);
- CONVERT_CASE(TYPE_DECIMALV2);
- CONVERT_CASE(TYPE_DECIMAL32);
- CONVERT_CASE(TYPE_DECIMAL64);
- CONVERT_CASE(TYPE_DECIMAL128I);
- CONVERT_CASE(TYPE_DECIMAL256);
- CONVERT_CASE(TYPE_CHAR);
- CONVERT_CASE(TYPE_VARCHAR);
- CONVERT_CASE(TYPE_STRING);
+ FROM_FE_STRING_CASE(TYPE_TINYINT);
+ FROM_FE_STRING_CASE(TYPE_SMALLINT);
+ FROM_FE_STRING_CASE(TYPE_INT);
+ FROM_FE_STRING_CASE(TYPE_BIGINT);
+ FROM_FE_STRING_CASE(TYPE_LARGEINT);
+ FROM_FE_STRING_CASE(TYPE_FLOAT);
+ FROM_FE_STRING_CASE(TYPE_DOUBLE);
+ FROM_FE_STRING_CASE(TYPE_DATE);
+ FROM_FE_STRING_CASE(TYPE_DATETIME);
+ FROM_FE_STRING_CASE(TYPE_DATEV2);
+ FROM_FE_STRING_CASE(TYPE_DATETIMEV2);
+ FROM_FE_STRING_CASE(TYPE_TIMESTAMPTZ);
+ FROM_FE_STRING_CASE(TYPE_BOOLEAN);
+ FROM_FE_STRING_CASE(TYPE_IPV4);
+ FROM_FE_STRING_CASE(TYPE_IPV6);
+ FROM_FE_STRING_CASE(TYPE_DECIMALV2);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL32);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL64);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL128I);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL256);
+ FROM_FE_STRING_CASE(TYPE_CHAR);
+ FROM_FE_STRING_CASE(TYPE_VARCHAR);
+ FROM_FE_STRING_CASE(TYPE_STRING);
default:
return Status::Error<ErrorCode::INVALID_ARGUMENT>(
"unsupported data type in delete handler. type={}",
@@ -237,43 +106,14 @@ Status convert(const DataTypePtr& data_type, const
std::list<std::string>& str,
}
return Status::OK();
}
-#undef CONVERT_CASE
-
-#define CONVERT_CASE(PType)
\
- case PType: {
\
- typename PrimitiveTypeTraits<PType>::CppType tmp;
\
- RETURN_IF_ERROR(convert<PType>(type, res.value_str.front(), arena,
tmp)); \
- v = Field::create_field<PType>(tmp);
\
- switch (res.condition_op) {
\
- case PredicateType::EQ:
\
- predicate = create_comparison_predicate<PredicateType::EQ>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::NE:
\
- predicate = create_comparison_predicate<PredicateType::NE>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::GT:
\
- predicate = create_comparison_predicate<PredicateType::GT>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::GE:
\
- predicate = create_comparison_predicate<PredicateType::GE>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::LT:
\
- predicate = create_comparison_predicate<PredicateType::LT>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::LE:
\
- predicate = create_comparison_predicate<PredicateType::LE>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- default:
\
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
\
- "invalid condition operator. operator={}",
type_to_op_str(res.condition_op)); \
- }
\
- }
+#undef FROM_FE_STRING_CASE
+
+// Parses a single condition value string into a Field and creates a
comparison predicate.
+// Uses serde->from_fe_string to do the parsing, which handles all
type-specific
+// conversions (including decimal scale, etc.).
+// For CHAR type, the value is padded with '\0' to the declared column length,
consistent
+// with the IN list path in convert() above.
+// For VARCHAR/STRING, the Field is created directly from the raw string.
Status parse_to_predicate(const uint32_t index, const std::string col_name,
const DataTypePtr& type,
DeleteHandler::ConditionParseResult& res, Arena&
arena,
std::shared_ptr<ColumnPredicate>& predicate) {
@@ -285,70 +125,53 @@ Status parse_to_predicate(const uint32_t index, const
std::string col_name, cons
type->get_primitive_type());
return Status::OK();
}
+
Field v;
- switch (type->get_primitive_type()) {
- CONVERT_CASE(TYPE_TINYINT);
- CONVERT_CASE(TYPE_SMALLINT);
- CONVERT_CASE(TYPE_INT);
- CONVERT_CASE(TYPE_BIGINT);
- CONVERT_CASE(TYPE_LARGEINT);
- CONVERT_CASE(TYPE_FLOAT);
- CONVERT_CASE(TYPE_DOUBLE);
- CONVERT_CASE(TYPE_DATE);
- CONVERT_CASE(TYPE_DATETIME);
- CONVERT_CASE(TYPE_DATEV2);
- CONVERT_CASE(TYPE_DATETIMEV2);
- CONVERT_CASE(TYPE_TIMESTAMPTZ);
- CONVERT_CASE(TYPE_BOOLEAN);
- CONVERT_CASE(TYPE_IPV4);
- CONVERT_CASE(TYPE_IPV6);
- CONVERT_CASE(TYPE_DECIMALV2);
- CONVERT_CASE(TYPE_DECIMAL32);
- CONVERT_CASE(TYPE_DECIMAL64);
- CONVERT_CASE(TYPE_DECIMAL128I);
- CONVERT_CASE(TYPE_DECIMAL256);
- case TYPE_CHAR:
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- v = Field::create_field<TYPE_STRING>(res.value_str.front());
- switch (res.condition_op) {
- case PredicateType::EQ:
- predicate =
- create_comparison_predicate<PredicateType::EQ>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::NE:
- predicate =
- create_comparison_predicate<PredicateType::NE>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::GT:
- predicate =
- create_comparison_predicate<PredicateType::GT>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::GE:
- predicate =
- create_comparison_predicate<PredicateType::GE>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::LT:
- predicate =
- create_comparison_predicate<PredicateType::LT>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::LE:
- predicate =
- create_comparison_predicate<PredicateType::LE>(index,
col_name, type, v, true);
- return Status::OK();
- default:
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid condition operator. operator={}",
type_to_op_str(res.condition_op));
+ if (type->get_primitive_type() == TYPE_CHAR) {
+ // CHAR type: create Field and pad with '\0' to the declared column
length,
+ // consistent with IN list path (convert() above) and
create_comparison_predicate.
+ const auto& str = res.value_str.front();
+ auto char_len = cast_set<size_t>(
+ assert_cast<const
DataTypeString*>(remove_nullable(type).get())->len());
+ auto target = std::max(char_len, str.size());
+ if (target > str.size()) {
+ std::string padded(target, '\0');
+ memcpy(padded.data(), str.data(), str.size());
+ v = Field::create_field<TYPE_CHAR>(std::move(padded));
+ } else {
+ v = Field::create_field<TYPE_CHAR>(str);
}
- break;
+ } else if (is_string_type(type->get_primitive_type())) {
+ // VARCHAR/STRING: create Field directly from the raw string, no
padding needed.
+ v = Field::create_field<TYPE_STRING>(res.value_str.front());
+ } else {
+ auto serde = type->get_serde();
+ RETURN_IF_ERROR(serde->from_fe_string(res.value_str.front(), v));
}
+
+ switch (res.condition_op) {
+ case PredicateType::EQ:
+ predicate = create_comparison_predicate<PredicateType::EQ>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::NE:
+ predicate = create_comparison_predicate<PredicateType::NE>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::GT:
+ predicate = create_comparison_predicate<PredicateType::GT>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::GE:
+ predicate = create_comparison_predicate<PredicateType::GE>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::LT:
+ predicate = create_comparison_predicate<PredicateType::LT>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::LE:
+ predicate = create_comparison_predicate<PredicateType::LE>(index,
col_name, type, v, true);
+ return Status::OK();
default:
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "unsupported data type in delete handler. type={}",
- type_to_string(type->get_primitive_type()));
+ return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition
operator. operator={}",
+
type_to_op_str(res.condition_op));
}
- return Status::OK();
-#undef CONVERT_CASE
}
Status parse_to_in_predicate(const uint32_t index, const std::string& col_name,
@@ -358,14 +181,14 @@ Status parse_to_in_predicate(const uint32_t index, const
std::string& col_name,
switch (res.condition_op) {
case PredicateType::IN_LIST: {
std::shared_ptr<HybridSetBase> set;
- RETURN_IF_ERROR(convert(type, res.value_str, arena, set));
+ RETURN_IF_ERROR(convert(type, res.value_str, set));
predicate =
create_in_list_predicate<PredicateType::IN_LIST>(index,
col_name, type, set, true);
break;
}
case PredicateType::NOT_IN_LIST: {
std::shared_ptr<HybridSetBase> set;
- RETURN_IF_ERROR(convert(type, res.value_str, arena, set));
+ RETURN_IF_ERROR(convert(type, res.value_str, set));
predicate =
create_in_list_predicate<PredicateType::NOT_IN_LIST>(index, col_name, type, set,
true);
break;
diff --git a/be/src/storage/index/zone_map/zone_map_index.cpp
b/be/src/storage/index/zone_map/zone_map_index.cpp
index 4dd2540a083..366f377a87f 100644
--- a/be/src/storage/index/zone_map/zone_map_index.cpp
+++ b/be/src/storage/index/zone_map/zone_map_index.cpp
@@ -70,10 +70,8 @@ Status ZoneMap::from_proto(const ZoneMapPB& zone_map, const
DataTypePtr& data_ty
}
} else {
if (!zone_map_info.pass_all) {
- DataTypeSerDe::FormatOptions opt;
- opt.ignore_scale = true;
- RETURN_IF_ERROR(data_type->get_serde()->from_olap_string(
- zone_map.min(), zone_map_info.min_value, opt));
+ RETURN_IF_ERROR(data_type->get_serde()->from_zonemap_string(
+ zone_map.min(), zone_map_info.min_value));
}
}
@@ -99,10 +97,8 @@ Status ZoneMap::from_proto(const ZoneMapPB& zone_map, const
DataTypePtr& data_ty
}
} else {
if (!zone_map_info.pass_all) {
- DataTypeSerDe::FormatOptions opt;
- opt.ignore_scale = true;
- RETURN_IF_ERROR(data_type->get_serde()->from_olap_string(
- zone_map.max(), zone_map_info.max_value, opt));
+ RETURN_IF_ERROR(data_type->get_serde()->from_zonemap_string(
+ zone_map.max(), zone_map_info.max_value));
}
}
}
diff --git a/be/src/storage/schema_change/schema_change.cpp
b/be/src/storage/schema_change/schema_change.cpp
index 530f04bc91d..e9cfcb83068 100644
--- a/be/src/storage/schema_change/schema_change.cpp
+++ b/be/src/storage/schema_change/schema_change.cpp
@@ -1543,9 +1543,8 @@ Status
SchemaChangeJob::_init_column_mapping(ColumnMapping* column_mapping,
}
if (!column_schema.is_nullable() || value.length() != 0) {
- DataTypeSerDe::FormatOptions options;
-
RETURN_IF_ERROR(column_schema.get_vec_type()->get_serde()->from_olap_string(
- value, column_mapping->default_value, options));
+
RETURN_IF_ERROR(column_schema.get_vec_type()->get_serde()->from_fe_string(
+ value, column_mapping->default_value));
}
return Status::OK();
diff --git a/be/src/storage/segment/column_reader.cpp
b/be/src/storage/segment/column_reader.cpp
index 5badb4a7008..9bbffcfaff8 100644
--- a/be/src/storage/segment/column_reader.cpp
+++ b/be/src/storage/segment/column_reader.cpp
@@ -2246,8 +2246,7 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
const auto serde = DataTypeFactory::instance()
.create_data_type(t, _precision,
_scale, _len)
->get_serde();
- DataTypeSerDe::FormatOptions opt;
- RETURN_IF_ERROR(serde->from_olap_string(_default_value,
_default_value_field, opt));
+ RETURN_IF_ERROR(serde->from_fe_string(_default_value,
_default_value_field));
}
} else if (_is_nullable) {
_default_value_field = Field::create_field<TYPE_NULL>(Null {});
diff --git a/be/src/util/io_helper.h b/be/src/util/io_helper.h
index 64016e47f7a..5a1ba1775d6 100644
--- a/be/src/util/io_helper.h
+++ b/be/src/util/io_helper.h
@@ -165,11 +165,19 @@ StringParser::ParseResult read_decimal_text_impl(T& x,
const StringRef& buf, UIn
UInt32 scale) {
static_assert(IsDecimalNumber<T>);
if constexpr (!std::is_same_v<DecimalV2Value, T>) {
+ // DecimalV3: uses the caller-supplied precision and scale.
+ // When called from from_olap_string with ignore_scale=true, scale=0
means the
+ // string is treated as an unscaled integer (e.g. "12345" → internal
int 12345).
StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
x.value = StringParser::string_to_decimal<P>(buf.data, (int)buf.size,
precision, scale,
&result);
return result;
} else {
+ // DecimalV2: IGNORES the caller-supplied precision/scale and hardcodes
+ // DecimalV2Value::PRECISION (27) and DecimalV2Value::SCALE (9).
+ // This means from_olap_string's ignore_scale flag has no actual
effect on DecimalV2
+ // parsing today — the string "123.456000000" is always parsed with
scale=9.
+ // Callers should still set ignore_scale=false for DecimalV2 for
semantic correctness.
StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
x = DecimalV2Value(StringParser::string_to_decimal<TYPE_DECIMALV2>(
buf.data, (int)buf.size, DecimalV2Value::PRECISION,
DecimalV2Value::SCALE,
diff --git a/be/test/storage/olap_type_test.cpp
b/be/test/storage/olap_type_test.cpp
index afa8af1f4a5..1b000a5e435 100644
--- a/be/test/storage/olap_type_test.cpp
+++ b/be/test/storage/olap_type_test.cpp
@@ -23,6 +23,12 @@
#include "core/data_type/data_type_factory.hpp"
#include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "core/decimal12.h"
+#include "core/field.h"
+#include "core/types.h"
+#include "core/value/decimalv2_value.h"
+#include "core/value/vdatetime_value.h"
#include "exprs/function/cast/cast_to_string.h"
#include "gtest/gtest_pred_impl.h"
#include "storage/olap_common.h"
@@ -582,4 +588,686 @@ TEST_F(OlapTypeTest, ser_deser_double) {
<< ", diff_ratio: " << fmt::format("{:.17g}", diff_ratio);
}
}
+
+//
=============================================================================
+// Tests for to_olap_string / from_zonemap_string on DataTypeSerDe
+//
+// Background:
+// ZoneMap index serializes min/max values via to_olap_string()
+// and deserializes via from_zonemap_string(). The from_zonemap_string()
+// method internally sets ignore_scale=true for DecimalV3 types to avoid
+// double-scaling the raw unscaled integer stored in ZoneMap.
+//
+// Key difference vs normal from_fe_string:
+// - DecimalV2: to_olap_string uses decimal12_t::to_string() which outputs
+// "integer.fraction" with 9 zero-padded fractional digits (e.g.
"123.456000000").
+// from_zonemap_string still works correctly because DecimalV2's parser
+// hardcodes scale=9 regardless of the ignore_scale setting.
+// - Decimal32/64/128I/256: to_olap_string outputs the RAW INTEGER string
+// (the unscaled internal value). E.g., Decimal(9,2) value 123.45 has
+// internal integer 12345, so to_olap_string outputs "12345".
+// from_zonemap_string uses ignore_scale=true → scale=0, parsing as
integer.
+// - Float/Double: to_olap_string uses CastToString::from_number, which
outputs
+// "NaN", "Infinity", "-Infinity" for special values. But
from_zonemap_string
+// uses fast_float::from_chars which REJECTS these strings. In practice,
ZoneMap
+// tracks NaN/Inf via boolean flags (has_nan, has_positive_inf,
has_negative_inf),
+// so the min/max values never contain NaN/Inf.
+// - DateV1 (TYPE_DATE): to_olap_string outputs "YYYY-MM-DD".
+// - DateTimeV1 (TYPE_DATETIME): to_olap_string outputs "YYYY-MM-DD
HH:MM:SS".
+// - DateV2: to_olap_string outputs "YYYY-MM-DD".
+// - DateTimeV2: to_olap_string outputs "YYYY-MM-DD HH:MM:SS[.ffffff]".
+// Microsecond part only appears when microsecond > 0 (default scale=-1).
+// Note: the old ZoneMap code used hardcoded scale=6 (always 6
fractional digits),
+// but the new to_olap_string omits trailing fractional zeros.
+//
=============================================================================
+
+// ---------------------------------------------------------------------------
+// Decimal32: to_olap_string outputs RAW integer (unscaled value).
+// Internal representation: value * 10^scale.
+// E.g., Decimal(9,2) value 123.45 → internal int32 = 12345 → "12345".
+// from_zonemap_string reads "12345" as integer 12345 (ignore_scale=true
internally).
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal32) {
+ // Create Decimal(9,2) data type (precision=9, scale=2)
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL32, /*precision=*/9,
/*scale=*/2);
+ auto serde = data_type_ptr->get_serde();
+
+ // Test cases: {internal_int32_value, expected_olap_string}
+ // actual_decimal_value = internal / 10^scale
+ std::vector<std::pair<int32_t, std::string>> test_cases = {
+ // 123.45 → internal=12345 → "12345"
+ {12345, "12345"},
+ // -1.00 → internal=-100 → "-100"
+ {-100, "-100"},
+ // 0.00 → internal=0 → "0"
+ {0, "0"},
+ // 999999999 → max for Decimal(9,2): 9999999.99
+ {999999999, "999999999"},
+ // -999999999 → min for Decimal(9,2): -9999999.99
+ {-999999999, "-999999999"},
+ // 1 → 0.01
+ {1, "1"},
+ // -1 → -0.01
+ {-1, "-1"},
+ };
+
+ for (const auto& [int_val, expected_str] : test_cases) {
+ Decimal32 dec(int_val);
+ auto field = Field::create_field<TYPE_DECIMAL32>(dec);
+ // Verify to_olap_string output matches expected raw integer string
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Decimal32 to_olap_string failed for internal value " <<
int_val;
+
+ // Verify round-trip: from_zonemap_string should restore the same
internal value
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL32>();
+ EXPECT_EQ(restored_value.value, int_val)
+ << "Decimal32 round-trip failed for string '" << result_str <<
"'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Decimal64: same pattern as Decimal32, but 64-bit integer.
+// E.g., Decimal(18,4) value 12345.6789 → internal int64 = 123456789 →
"123456789".
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal64) {
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL64, /*precision=*/18,
/*scale=*/4);
+ auto serde = data_type_ptr->get_serde();
+
+ std::vector<std::pair<int64_t, std::string>> test_cases = {
+ // 12345.6789 → internal=123456789
+ {123456789L, "123456789"},
+ // 0 → "0"
+ {0L, "0"},
+ // -1 → -0.0001
+ {-1L, "-1"},
+ // Large value near max
+ {999999999999999999L, "999999999999999999"},
+ {-999999999999999999L, "-999999999999999999"},
+ // Small fractional: 0.0001
+ {1L, "1"},
+ };
+
+ for (const auto& [int_val, expected_str] : test_cases) {
+ Decimal64 dec(int_val);
+ auto field = Field::create_field<TYPE_DECIMAL64>(dec);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Decimal64 to_olap_string failed for internal value " <<
int_val;
+
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL64>();
+ EXPECT_EQ(restored_value.value, int_val)
+ << "Decimal64 round-trip failed for string '" << result_str <<
"'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Decimal128I: to_olap_string uses fmt::format("{}", int128_value).
+// E.g., Decimal(38,6) value 123456789.123456 → internal int128 =
123456789123456.
+// Output: "123456789123456".
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal128i) {
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL128I, /*precision=*/38,
/*scale=*/6);
+ auto serde = data_type_ptr->get_serde();
+
+ // int128_t values and expected strings
+ struct TestCase {
+ int128_t int_val;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // 123456789.123456 → internal=123456789123456
+ {(int128_t)123456789123456LL, "123456789123456"},
+ // 0
+ {(int128_t)0, "0"},
+ // -1
+ {(int128_t)-1, "-1"},
+ // Positive large value exceeding int64 range
+ // 10^18 * 100 = 10^20
+ {(int128_t)1000000000000000000LL * 100, "100000000000000000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ Decimal128V3 dec(tc.int_val);
+ auto field = Field::create_field<TYPE_DECIMAL128I>(dec);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "Decimal128I to_olap_string failed for expected '" <<
tc.expected_str << "'";
+
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL128I>();
+ EXPECT_EQ(restored_value.value, tc.int_val)
+ << "Decimal128I round-trip failed for string '" << result_str
<< "'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Decimal256: to_olap_string uses wide::to_string(value).
+// Same pattern: raw integer string from internal representation.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal256) {
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL256, /*precision=*/76,
/*scale=*/10);
+ auto serde = data_type_ptr->get_serde();
+
+ // Use int128_t-constructible values for simplicity
+ // (wide::Int256 can be constructed from int128_t)
+ struct TestCase {
+ wide::Int256 int_val;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // Simple positive
+ {wide::Int256(123456789LL), "123456789"},
+ // Zero
+ {wide::Int256(0), "0"},
+ // Negative
+ {wide::Int256(-99999LL), "-99999"},
+ // Large value: 10^20
+ {wide::Int256((int128_t)1000000000000000000LL * 100),
"100000000000000000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ Decimal256 dec(tc.int_val);
+ auto field = Field::create_field<TYPE_DECIMAL256>(dec);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "Decimal256 to_olap_string failed for expected '" <<
tc.expected_str << "'";
+
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL256>();
+ EXPECT_EQ(restored_value.value, tc.int_val)
+ << "Decimal256 round-trip failed for string '" << result_str
<< "'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DecimalV2: to_olap_string uses decimal12_t(int_value,
frac_value).to_string().
+// decimal12_t::to_string() outputs "integer.fraction" with 9 zero-padded
fractional
+// digits. E.g., DecimalV2(123.456) → int_value=123, frac_value=456000000 →
+// decimal12_t(123, 456000000).to_string() → "123.456000000".
+//
+// from_zonemap_string with ignore_scale=TRUE internally parses this as a
normal decimal string
+// with the data type's scale (9). With ignore_scale=TRUE, scale would be 0
and the
+// fractional part would be truncated — that is WRONG for DecimalV2.
+// However, from_zonemap_string uses ignore_scale=TRUE, and this still works
because
+// DecimalV2's parser (read_decimal_text_impl) hardcodes
DecimalV2Value::SCALE=9
+// regardless of the passed-in scale, making ignore_scale irrelevant for
DecimalV2.
+//
+// Note: this is different from DecimalV3 where storage is raw integer.
+// DecimalV2 storage string always contains a decimal point.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimalv2) {
+ auto data_type_ptr =
+ DataTypeFactory::instance().create_data_type(TYPE_DECIMALV2,
/*is_nullable=*/false,
+ /*precision=*/27,
/*scale=*/9);
+ auto serde = data_type_ptr->get_serde();
+ // DecimalV2 storage string has decimal point. from_zonemap_string sets
ignore_scale=true,
+ // but DecimalV2's parser hardcodes scale=9 regardless, so round-trip
works correctly.
+
+ // Test cases: {DecimalV2Value, expected_to_olap_string}
+ // DecimalV2Value internally stores value * 10^9.
+ // decimal12_t::to_string format: "integer.fraction" with %09u for
fraction.
+ struct TestCase {
+ DecimalV2Value value;
+ std::string expected_str;
+ };
+
+ std::vector<TestCase> test_cases = {
+ // 123.456 → int=123, frac=456000000 → "123.456000000"
+ {DecimalV2Value(123, 456000000), "123.456000000"},
+ // 0.0 → int=0, frac=0 → "0.000000000"
+ {DecimalV2Value(0, 0), "0.000000000"},
+ // -1.5 → int=-1, frac=-500000000 → "-1.500000000"
+ {DecimalV2Value(-1, -500000000), "-1.500000000"},
+ // Pure integer: 42.0 → "42.000000000"
+ {DecimalV2Value(42, 0), "42.000000000"},
+ // Tiny fraction: 0.000000001 → int=0, frac=1 → "0.000000001"
+ {DecimalV2Value(0, 1), "0.000000001"},
+ // Max fraction: 0.999999999 → int=0, frac=999999999 →
"0.999999999"
+ {DecimalV2Value(0, 999999999), "0.999999999"},
+ // Large integer: 999999999999999999.0
+ {DecimalV2Value(999999999999999999LL, 0),
"999999999999999999.000000000"},
+ // Negative with fraction
+ {DecimalV2Value(-123, -456000000), "-123.456000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_DECIMALV2>(tc.value);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str) << "DecimalV2 to_olap_string
failed";
+
+ // Round-trip: from_zonemap_string should restore the same value
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMALV2>();
+ EXPECT_EQ(restored_value, tc.value)
+ << "DecimalV2 round-trip failed for string '" << result_str <<
"'"
+ << ", expected int_value=" << tc.value.int_value()
+ << ", frac_value=" << tc.value.frac_value()
+ << ", got int_value=" << restored_value.int_value()
+ << ", frac_value=" << restored_value.frac_value();
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Float: to_olap_string / from_zonemap_string for normal values.
+// to_olap_string uses CastToString::from_number which calls _fast_to_buffer.
+// Format: fmt "{:.7g}" (digits10+1=7 significant digits).
+// NaN/Inf are serialized as "NaN", "Infinity", "-Infinity" but
from_zonemap_string
+// (which uses fast_float::from_chars) CANNOT parse them back → returns
error.
+// In ZoneMap, NaN/Inf are tracked via boolean flags, not stored in min/max
values.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_float_olap_string) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_FLOAT, false);
+ auto serde = data_type_ptr->get_serde();
+
+ // Normal float values: to_olap_string → from_zonemap_string round-trip
+ std::vector<std::pair<float, std::string>> normal_cases = {
+ {0.0f, "0"}, {1.0f, "1"},
+ {-1.0f, "-1"}, {123.456f, "123.456"},
+ {0.001f, "0.001"}, {1234567.0f, "1234567"},
+ {1e-10f, "1e-10"}, {3.402823e+38f, "3.402823e+38"},
+ };
+
+ for (const auto& [val, expected_str] : normal_cases) {
+ auto field = Field::create_field<TYPE_FLOAT>(val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Float to_olap_string failed for " << fmt::format("{:.9g}",
val);
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ float restored_val = restored_field.get<TYPE_FLOAT>();
+ float diff = std::abs(restored_val - val);
+ EXPECT_TRUE(val == 0 ? restored_val == 0 : diff / std::abs(val) < 1e-6)
+ << "Float round-trip: expected " << val << ", got " <<
restored_val;
+ }
+
+ // Special values: to_olap_string produces strings, but
from_zonemap_string FAILS
+ // This documents the intentional behavior: ZoneMap uses boolean flags for
these.
+ {
+ // NaN → "NaN", but from_zonemap_string cannot parse "NaN"
+ auto field =
Field::create_field<TYPE_FLOAT>(std::numeric_limits<float>::quiet_NaN());
+ EXPECT_EQ(serde->to_olap_string(field), "NaN");
+ Field restored_field;
+ auto status = serde->from_zonemap_string("NaN", restored_field);
+ EXPECT_FALSE(status.ok()) << "from_zonemap_string should reject 'NaN'";
+ }
+ {
+ // +Infinity → "Infinity"
+ auto field =
Field::create_field<TYPE_FLOAT>(std::numeric_limits<float>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "Infinity");
+ Field restored_field;
+ auto status = serde->from_zonemap_string("Infinity", restored_field);
+ EXPECT_FALSE(status.ok()) << "from_zonemap_string should reject
'Infinity'";
+ }
+ {
+ // -Infinity → "-Infinity"
+ auto field =
Field::create_field<TYPE_FLOAT>(-std::numeric_limits<float>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "-Infinity");
+ Field restored_field;
+ auto status = serde->from_zonemap_string("-Infinity", restored_field);
+ EXPECT_FALSE(status.ok()) << "from_zonemap_string should reject
'-Infinity'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Double: same pattern as Float.
+// The expected strings in this case follow current serializer behavior.
+// Note: for DBL_MAX/lowest, current formatting rounds to a boundary string
that
+// is rejected by from_zonemap_string (parsed as Infinity), so these two
values
+// are validated for to_olap_string only.
+// NaN/Inf same behavior: to_olap_string works, from_zonemap_string rejects.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_double_olap_string) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DOUBLE, false);
+ auto serde = data_type_ptr->get_serde();
+
+ std::vector<std::pair<double, std::string>> normal_cases = {
+ {0.0, "0"},
+ {1.0, "1"},
+ {-1.0, "-1"},
+ {123.456789, "123.456789"},
+ {0.001, "0.001"},
+ {1234567890123456.0, "1234567890123456"},
+ {1e-100, "1e-100"},
+ {std::numeric_limits<double>::lowest(), "-1.797693134862316e+308"},
+ {std::numeric_limits<double>::max(), "1.797693134862316e+308"},
+ };
+
+ for (const auto& [val, expected_str] : normal_cases) {
+ auto field = Field::create_field<TYPE_DOUBLE>(val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Double to_olap_string failed for " <<
fmt::format("{:.17g}", val);
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ if (val == std::numeric_limits<double>::lowest() ||
+ val == std::numeric_limits<double>::max()) {
+ EXPECT_FALSE(status.ok());
+ EXPECT_NE(status.to_string().find("NaN/Infinity not allowed in
olap string"),
+ std::string::npos)
+ << status.to_string();
+ continue;
+ }
+
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ double restored_val = restored_field.get<TYPE_DOUBLE>();
+ double diff = std::abs(restored_val - val);
+ EXPECT_TRUE(val == 0 ? restored_val == 0 : diff / std::abs(val) <
1e-15)
+ << "Double round-trip: expected " << val << ", got " <<
restored_val;
+ }
+
+ // Special values
+ {
+ auto field =
Field::create_field<TYPE_DOUBLE>(std::numeric_limits<double>::quiet_NaN());
+ EXPECT_EQ(serde->to_olap_string(field), "NaN");
+ Field restored_field;
+ EXPECT_FALSE(serde->from_zonemap_string("NaN", restored_field).ok());
+ }
+ {
+ auto field =
Field::create_field<TYPE_DOUBLE>(std::numeric_limits<double>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "Infinity");
+ Field restored_field;
+ EXPECT_FALSE(serde->from_zonemap_string("Infinity",
restored_field).ok());
+ }
+ {
+ auto field =
Field::create_field<TYPE_DOUBLE>(-std::numeric_limits<double>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "-Infinity");
+ Field restored_field;
+ EXPECT_FALSE(serde->from_zonemap_string("-Infinity",
restored_field).ok());
+ }
+ {
+ // -0.0 → "-0"
+ auto field = Field::create_field<TYPE_DOUBLE>(-0.0);
+ EXPECT_EQ(serde->to_olap_string(field), "-0");
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateV1 (TYPE_DATE): to_olap_string outputs "YYYY-MM-DD".
+// Internal representation: VecDateTimeValue, stored as uint24_t in OLAP.
+// The old ZoneMap used VecDateTimeValue::to_string(buf) → "YYYY-MM-DD\0".
+// from_zonemap_string uses
CastToDateOrDatetime::from_string_non_strict_mode.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datev1) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DATE, false);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 1, 1, "2023-01-01"}, {2000, 12, 31, "2000-12-31"}, {1970,
1, 1, "1970-01-01"},
+ {9999, 12, 31, "9999-12-31"}, {1, 1, 1, "0001-01-01"},
+ };
+
+ for (const auto& tc : test_cases) {
+ VecDateTimeValue date_val;
+ date_val.unchecked_set_time(tc.year, tc.month, tc.day, 0, 0, 0);
+ date_val.set_type(TIME_DATE);
+
+ auto field = Field::create_field<TYPE_DATE>(date_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str) << "DateV1 to_olap_string
failed for " << tc.year
+ << "-" << tc.month << "-" <<
tc.day;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATE>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateTimeV1 (TYPE_DATETIME): to_olap_string outputs "YYYY-MM-DD HH:MM:SS".
+// Internal representation: VecDateTimeValue, stored as uint64_t in OLAP.
+// The old ZoneMap used the format:
+// YYYYMMDDHHMMSSxxxxxx → "YYYY-MM-DD HH:MM:SS".
+// from_zonemap_string uses
CastToDateOrDatetime::from_string_non_strict_mode<true>.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datetimev1) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DATETIME, false);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day, hour, minute, second;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 6, 15, 14, 30, 59, "2023-06-15 14:30:59"},
+ {2000, 1, 1, 0, 0, 0, "2000-01-01 00:00:00"},
+ {1970, 1, 1, 0, 0, 0, "1970-01-01 00:00:00"},
+ {9999, 12, 31, 23, 59, 59, "9999-12-31 23:59:59"},
+ };
+
+ for (const auto& tc : test_cases) {
+ VecDateTimeValue dt_val;
+ dt_val.unchecked_set_time(tc.year, tc.month, tc.day, tc.hour,
tc.minute, tc.second);
+ dt_val.set_type(TIME_DATETIME);
+
+ auto field = Field::create_field<TYPE_DATETIME>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateTimeV1 to_olap_string failed for " << tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIME>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ EXPECT_EQ(restored_val.hour(), tc.hour);
+ EXPECT_EQ(restored_val.minute(), tc.minute);
+ EXPECT_EQ(restored_val.second(), tc.second);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateV2 (TYPE_DATEV2): to_olap_string outputs "YYYY-MM-DD".
+// Internal: DateV2Value<DateV2ValueType>, stored as uint32_t (bit-packed).
+// Bit layout: year(16bits) << 9 | month(4bits) << 5 | day(5bits).
+// from_zonemap_string uses strptime "%Y-%m-%d", then bit-packs the parsed
date.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datev2) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 1, 15, "2023-01-15"}, {2000, 12, 31, "2000-12-31"}, {1970,
1, 1, "1970-01-01"},
+ {9999, 12, 31, "9999-12-31"}, {1, 1, 1, "0001-01-01"},
+ };
+
+ for (const auto& tc : test_cases) {
+ DateV2Value<DateV2ValueType> date_val;
+ date_val.unchecked_set_time(tc.year, tc.month, tc.day, 0, 0, 0, 0);
+
+ auto field = Field::create_field<TYPE_DATEV2>(date_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateV2 to_olap_string failed for " << tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATEV2>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateTimeV2 (TYPE_DATETIMEV2): to_olap_string outputs "YYYY-MM-DD
HH:MM:SS[.ffffff]".
+// Internal: DateV2Value<DateTimeV2ValueType>, stored as uint64_t
(bit-packed).
+// to_olap_string calls CastToString::from_datetimev2(value) with DEFAULT
scale=-1.
+// With scale=-1, the microsecond part is appended ONLY if microsecond > 0,
+// and always with 6 digits when present.
+//
+// Note: the old ZoneMap code in types.h used value.to_string(6) which ALWAYS
+// outputs 6 fractional digits even when microsecond=0.
+//
+// Multiple scale values are tested:
+// scale=0: no fractional seconds (input microseconds are stored but
to_olap_string
+// still uses default scale=-1, so microseconds appear if
non-zero)
+// scale=3: millisecond precision
+// scale=6: microsecond precision (full precision)
+//
+// from_zonemap_string uses from_date_format_str("%Y-%m-%d %H:%i:%s.%f").
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datetimev2_no_microsecond) {
+ // Test with scale=0: no fractional seconds expected
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ TYPE_DATETIMEV2, /*is_nullable=*/false, /*precision=*/0,
/*scale=*/0);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day, hour, minute, second;
+ uint32_t microsecond;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // No microseconds → no fractional part in output
+ {2023, 6, 15, 14, 30, 59, 0, "2023-06-15 14:30:59"},
+ {2000, 1, 1, 0, 0, 0, 0, "2000-01-01 00:00:00"},
+ {9999, 12, 31, 23, 59, 59, 0, "9999-12-31 23:59:59"},
+ };
+
+ for (const auto& tc : test_cases) {
+ DateV2Value<DateTimeV2ValueType> dt_val;
+ dt_val.unchecked_set_time(tc.year, tc.month, tc.day, tc.hour,
tc.minute, tc.second,
+ tc.microsecond);
+ auto field = Field::create_field<TYPE_DATETIMEV2>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateTimeV2(scale=0) to_olap_string failed for " <<
tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIMEV2>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ EXPECT_EQ(restored_val.hour(), tc.hour);
+ EXPECT_EQ(restored_val.minute(), tc.minute);
+ EXPECT_EQ(restored_val.second(), tc.second);
+ }
+}
+
+TEST_F(OlapTypeTest, ser_deser_datetimev2_with_microsecond) {
+ // Test with scale=6 (full microsecond precision)
+ // to_olap_string default scale=-1 will output microseconds when > 0
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ TYPE_DATETIMEV2, /*is_nullable=*/false, /*precision=*/0,
/*scale=*/6);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day, hour, minute, second;
+ uint32_t microsecond;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // microsecond=123456 → ".123456"
+ {2023, 6, 15, 14, 30, 59, 123456, "2023-06-15 14:30:59.123456"},
+ // microsecond=1 → ".000001"
+ {2023, 1, 1, 0, 0, 0, 1, "2023-01-01 00:00:00.000001"},
+ // microsecond=999999 → ".999999"
+ {9999, 12, 31, 23, 59, 59, 999999, "9999-12-31 23:59:59.999999"},
+ // microsecond=100000 → ".100000"
+ {2023, 3, 15, 12, 0, 0, 100000, "2023-03-15 12:00:00.100000"},
+ // microsecond=0 → no fractional part (scale=-1 omits when
microsecond=0)
+ {2023, 3, 15, 12, 0, 0, 0, "2023-03-15 12:00:00"},
+ };
+
+ for (const auto& tc : test_cases) {
+ DateV2Value<DateTimeV2ValueType> dt_val;
+ dt_val.unchecked_set_time(tc.year, tc.month, tc.day, tc.hour,
tc.minute, tc.second,
+ tc.microsecond);
+ auto field = Field::create_field<TYPE_DATETIMEV2>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateTimeV2(scale=6) to_olap_string failed for " <<
tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIMEV2>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ EXPECT_EQ(restored_val.hour(), tc.hour);
+ EXPECT_EQ(restored_val.minute(), tc.minute);
+ EXPECT_EQ(restored_val.second(), tc.second);
+ EXPECT_EQ(restored_val.microsecond(), tc.microsecond);
+ }
+}
+
+TEST_F(OlapTypeTest, ser_deser_datetimev2_scale3) {
+ // Test with scale=3 (millisecond precision)
+ // to_olap_string uses default scale=-1, so behavior is the same as scale=6
+ // for the output: microsecond part appears ONLY if > 0, always 6 digits.
+ // However, the data type has scale=3, meaning from_zonemap_string should
still
+ // be able to parse back the full microsecond value stored in the field.
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ TYPE_DATETIMEV2, /*is_nullable=*/false, /*precision=*/0,
/*scale=*/3);
+ auto serde = data_type_ptr->get_serde();
+
+ {
+ // 123000 microseconds (= 123 milliseconds)
+ // to_olap_string outputs full 6+digit microsecond: ".123000"
+ DateV2Value<DateTimeV2ValueType> dt_val;
+ dt_val.unchecked_set_time(2023, 6, 15, 14, 30, 59, 123000);
+ auto field = Field::create_field<TYPE_DATETIMEV2>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, "2023-06-15 14:30:59.123000")
+ << "DateTimeV2(scale=3) to_olap_string failed";
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIMEV2>();
+ EXPECT_EQ(restored_val.year(), 2023);
+ EXPECT_EQ(restored_val.month(), 6);
+ EXPECT_EQ(restored_val.day(), 15);
+ EXPECT_EQ(restored_val.hour(), 14);
+ EXPECT_EQ(restored_val.minute(), 30);
+ EXPECT_EQ(restored_val.second(), 59);
+ EXPECT_EQ(restored_val.microsecond(), 123000);
+ }
+}
} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]