This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 05c33161b23 pick memcpy alignement and datetime related pr to 4.1
(#61620)
05c33161b23 is described below
commit 05c33161b23f1c95a8e79dcd395273e2884d8a9f
Author: yiguolei <[email protected]>
AuthorDate: Mon Mar 23 21:45:30 2026 +0800
pick memcpy alignement and datetime related pr to 4.1 (#61620)
---
.../core/data_type_serde/data_type_bitmap_serde.h | 6 +-
.../data_type_date_or_datetime_serde.cpp | 20 +
.../data_type_date_or_datetime_serde.h | 6 +-
.../data_type_serde/data_type_datetimev2_serde.cpp | 13 +
.../data_type_serde/data_type_datetimev2_serde.h | 7 +-
.../data_type_serde/data_type_datev2_serde.cpp | 13 +
.../core/data_type_serde/data_type_datev2_serde.h | 7 +-
.../data_type_serde/data_type_decimal_serde.cpp | 26 +-
.../core/data_type_serde/data_type_decimal_serde.h | 7 +-
be/src/core/data_type_serde/data_type_hll_serde.h | 7 +-
.../core/data_type_serde/data_type_ipv4_serde.cpp | 9 +
be/src/core/data_type_serde/data_type_ipv4_serde.h | 6 +-
.../core/data_type_serde/data_type_ipv6_serde.cpp | 9 +
be/src/core/data_type_serde/data_type_ipv6_serde.h | 6 +-
.../data_type_serde/data_type_nullable_serde.cpp | 14 +-
.../data_type_serde/data_type_nullable_serde.h | 4 +-
.../data_type_serde/data_type_number_serde.cpp | 31 +
.../core/data_type_serde/data_type_number_serde.h | 7 +-
.../data_type_quantilestate_serde.h | 7 +-
be/src/core/data_type_serde/data_type_serde.h | 51 +-
.../data_type_serde/data_type_string_serde.cpp | 12 +
.../core/data_type_serde/data_type_string_serde.h | 6 +-
be/src/core/data_type_serde/data_type_time_serde.h | 7 +-
.../data_type_serde/data_type_timestamptz_serde.h | 7 +-
be/src/core/memcpy_small.h | 8 +-
be/src/exec/common/hash_table/hash_map_context.h | 41 +-
be/src/storage/delete/delete_handler.cpp | 383 ++----
be/src/storage/index/zone_map/zone_map_index.cpp | 12 +-
be/src/storage/schema_change/schema_change.cpp | 5 +-
be/src/storage/segment/column_reader.cpp | 3 +-
be/src/util/io_helper.h | 8 +
be/test/storage/olap_type_test.cpp | 1424 ++++++++++++++++++++
32 files changed, 1823 insertions(+), 349 deletions(-)
diff --git a/be/src/core/data_type_serde/data_type_bitmap_serde.h
b/be/src/core/data_type_serde/data_type_bitmap_serde.h
index 49ed3cd875e..023813b8f7f 100644
--- a/be/src/core/data_type_serde/data_type_bitmap_serde.h
+++ b/be/src/core/data_type_serde/data_type_bitmap_serde.h
@@ -37,8 +37,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
@@ -87,5 +85,9 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
index 8c055c22626..757fd15fa93 100644
--- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
@@ -395,6 +395,18 @@ Status DataTypeDateSerDe<T>::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Deserializes a DateV1 or DateTimeV1 value from its OLAP string
representation
+// (e.g. from ZoneMap protobuf). This is the inverse of to_olap_string().
+//
+// Uses CastToDateOrDatetime::from_string_non_strict_mode which accepts
flexible date/time formats.
+//
+// Note: DateTimeV1 (VecDateTimeValue) does NOT support microsecond precision.
+// VecDateTimeValue::microsecond() always returns 0 — the _microsecond field
was removed
+// to reduce memory footprint. So the round-trip format is always
second-level precision.
+//
+// Expected input formats:
+// DateV1: "YYYY-MM-DD" e.g. "2023-10-15"
+// DateTimeV1: "YYYY-MM-DD HH:MM:SS" e.g. "2023-10-15 14:30:00"
template <PrimitiveType T>
Status DataTypeDateSerDe<T>::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
@@ -576,6 +588,14 @@ Status
DataTypeDateSerDe<T>::from_decimal_strict_mode_batch(
return Status::OK();
}
+// Serializes a DateV1 or DateTimeV1 value to its OLAP string representation
for ZoneMap storage.
+// This is the inverse of from_olap_string().
+//
+// Internally calls VecDateTimeValue::to_string(buf) which produces:
+// DateV1: "YYYY-MM-DD" e.g. "2023-10-15"
+// DateTimeV1: "YYYY-MM-DD HH:MM:SS" e.g. "2023-10-15 14:30:00"
+//
+// Note: DateTimeV1 never includes microseconds
(VecDateTimeValue::microsecond() always returns 0).
template <PrimitiveType T>
std::string DataTypeDateSerDe<T>::to_olap_string(const Field& field) const {
char buf[64];
diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
index c28677b117f..693bfca7385 100644
--- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
+++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
@@ -51,9 +51,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -119,6 +116,9 @@ public:
std::string to_olap_string(const Field& field) const override;
protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
template <bool is_date>
Status _read_column_from_arrow(IColumn& column, const arrow::Array*
arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz)
const;
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
index d5e64b0b52c..12f65e86bd7 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
@@ -122,6 +122,19 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef&
str, IColumn& column,
return Status::OK();
}
+// Deserializes a DateTimeV2 value from its OLAP string representation (e.g.
from ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses from_date_format_str("%Y-%m-%d %H:%i:%s.%f") to parse.
+// DateTimeV2 supports microsecond precision (scale 0-6) via a 20-bit
microsecond_ field.
+//
+// Expected input format: "YYYY-MM-DD HH:MM:SS[.ffffff]"
+// Examples:
+// "2023-10-15 14:30:00" => scale 0, microsecond = 0
+// "2023-10-15 14:30:00.123000" => scale 6, microsecond = 123000
+// "2023-10-15 14:30:00.123" => scale 3, microsecond = 123000
+//
+// On parse failure, falls back to MIN_DATETIME_V2.
Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str,
Field& field,
const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.h
b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
index 22f6d072428..0389432a621 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
@@ -41,9 +41,6 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const final;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode_batch(const ColumnString& str, IColumn&
column,
const FormatOptions& options,
const NullMap::value_type* null_map =
nullptr) const final;
@@ -112,6 +109,10 @@ public:
std::string to_olap_string(const Field& field) const override;
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
int _scale;
};
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
index 87c577d076b..dc6712acaa8 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
@@ -228,6 +228,14 @@ Status DataTypeDateV2SerDe::from_string_batch(const
ColumnString& col_str, Colum
return Status::OK();
}
+// Deserializes a DateV2 value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses strptime("%Y-%m-%d") to parse, then bit-packs into DateV2 internal
format:
+// uint32_t value = (year << 9) | (month << 5) | day
+//
+// Expected input format: "YYYY-MM-DD", e.g. "2023-10-15"
+// On parse failure, falls back to MIN_DATE_V2.
Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -441,6 +449,11 @@ Status DataTypeDateV2SerDe::from_decimal_strict_mode_batch(
return Status::OK();
}
+// Serializes a DateV2 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+//
+// Delegates to CastToString::from_datev2() which calls
DateV2Value::to_string(buf).
+// Output format: "YYYY-MM-DD", e.g. "2023-10-15"
std::string DataTypeDateV2SerDe::to_olap_string(const Field& field) const {
return CastToString::from_datev2(field.get<TYPE_DATEV2>());
}
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.h
b/be/src/core/data_type_serde/data_type_datev2_serde.h
index 0a33c51c806..0375f9be4b4 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.h
@@ -40,9 +40,6 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const final;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode_batch(const ColumnString& str, IColumn&
column,
const FormatOptions& options,
const NullMap::value_type* null_map =
nullptr) const final;
@@ -108,5 +105,9 @@ public:
int64_t row_num) const override;
std::string to_olap_string(const Field& field) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.cpp
b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
index 10babcd9a54..1e277bda86b 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
@@ -133,8 +133,16 @@ Status DataTypeDecimalSerDe<T>::from_olap_string(const
std::string& str, Field&
CastParameters params;
params.is_strict = false;
- // Decimal string in storage is saved as an integer. The scale is
maintained by data type, so we
- // can just parse the string as an integer here.
+ // DecimalV3 (Decimal32/64/128I/256): zonemap stores the raw unscaled
integer string.
+ // E.g., Decimal(9,2) value 123.45 → to_olap_string() → "12345".
+ // Caller sets ignore_scale=true → parse with scale=0 → internal int
12345. Correct.
+ //
+ // DecimalV2: zonemap stores "integer.fraction" with 9 zero-padded
fractional digits.
+ // E.g., DecimalV2 value 123.456 → to_olap_string() → "123.456000000".
+ // Caller sets ignore_scale=false → parse with scale=9 → correctly
restores the value.
+ // Note: read_decimal_text_impl() currently hardcodes
DecimalV2Value::SCALE=9 for
+ // DecimalV2, so the passed-in scale is effectively ignored. But callers
should still
+ // set ignore_scale=false for semantic correctness.
if (!CastToDecimal::from_string(StringRef(str), to,
static_cast<UInt32>(precision),
options.ignore_scale ? 0 :
static_cast<UInt32>(scale),
params)) {
@@ -504,15 +512,29 @@ template <PrimitiveType T>
std::string DataTypeDecimalSerDe<T>::to_olap_string(const Field& field) const {
auto value = field.get<T>();
if constexpr (T == TYPE_DECIMALV2) {
+ // DecimalV2 outputs "integer.fraction" with 9 zero-padded fractional
digits.
+ // E.g., DecimalV2 value 123.456 → int_value=123, frac_value=456000000
+ // → decimal12_t(123, 456000000).to_string() → "123.456000000".
+ // from_zonemap_string() sets ignore_scale=true internally, but
DecimalV2's parser
+ // hardcodes scale=9 regardless, so the round-trip is correct either
way.
decimal12_t decimal_val(value.int_value(), value.frac_value());
return decimal_val.to_string();
} else if constexpr (T == TYPE_DECIMAL256) {
+ // DecimalV3: outputs the raw unscaled integer string.
+ // E.g., Decimal256(76,10) value 123.456 → internal int = 1234560000000
+ // → "1234560000000".
+ // from_zonemap_string() sets ignore_scale=true to parse this as a raw
integer.
return wide::to_string(value.value);
} else if constexpr (T == TYPE_DECIMAL128I) {
+ // Same as Decimal256: raw unscaled integer.
+ // E.g., Decimal(38,6) value 123.456 → internal int128 = 123456000
+ // → "123456000".
fmt::memory_buffer buffer;
fmt::format_to(buffer, "{}", value.value);
return std::string(buffer.data(), buffer.size());
} else {
+ // Decimal32/64: raw unscaled integer.
+ // E.g., Decimal(9,2) value 123.45 → internal int32 = 12345 → "12345".
return std::to_string(value.value);
}
}
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.h
b/be/src/core/data_type_serde/data_type_decimal_serde.h
index 61349fa6eef..140c8e3a292 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.h
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.h
@@ -57,9 +57,6 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode_batch(
const ColumnString& str, IColumn& column, const FormatOptions&
options,
const NullMap::value_type* null_map = nullptr) const override;
@@ -142,6 +139,10 @@ public:
static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
FieldInfo& info);
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
int precision;
int scale;
diff --git a/be/src/core/data_type_serde/data_type_hll_serde.h
b/be/src/core/data_type_serde/data_type_hll_serde.h
index 96b5b083da7..5df89f7f4d7 100644
--- a/be/src/core/data_type_serde/data_type_hll_serde.h
+++ b/be/src/core/data_type_serde/data_type_hll_serde.h
@@ -38,9 +38,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int64_t start_idx,
int64_t end_idx,
@@ -82,5 +79,9 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
b/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
index d5b4813c1f3..4eb969c138f 100644
--- a/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_ipv4_serde.cpp
@@ -178,6 +178,11 @@ Status DataTypeIPv4SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Deserializes an IPv4 value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses CastToIPv4::from_string to parse standard dotted-decimal notation.
+// Expected input format: "A.B.C.D", e.g. "192.168.1.1"
Status DataTypeIPv4SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options) const
{
CastParameters params;
@@ -222,6 +227,10 @@ void DataTypeIPv4SerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data,
data_ref.size);
}
+// Serializes an IPv4 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+// Uses CastToString::from_ip() to produce standard dotted-decimal notation.
+// Output format: "A.B.C.D", e.g. "192.168.1.1"
std::string DataTypeIPv4SerDe::to_olap_string(const Field& field) const {
return CastToString::from_ip(field.get<TYPE_IPV4>());
}
diff --git a/be/src/core/data_type_serde/data_type_ipv4_serde.h
b/be/src/core/data_type_serde/data_type_ipv4_serde.h
index a3ff4bf9036..e23695dbca5 100644
--- a/be/src/core/data_type_serde/data_type_ipv4_serde.h
+++ b/be/src/core/data_type_serde/data_type_ipv4_serde.h
@@ -65,8 +65,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -75,5 +73,9 @@ public:
int64_t row_num) const override;
std::string to_olap_string(const Field& field) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
b/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
index f60dc892fbf..ccd5a236807 100644
--- a/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_ipv6_serde.cpp
@@ -276,6 +276,11 @@ Status DataTypeIPv6SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Deserializes an IPv6 value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string().
+//
+// Uses CastToIPv6::from_string to parse standard IPv6 notation.
+// Expected input format: standard IPv6, e.g. "::1", "2001:db8::1",
"fe80::1%25eth0"
Status DataTypeIPv6SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options) const
{
CastParameters params;
@@ -320,6 +325,10 @@ void DataTypeIPv6SerDe::write_one_cell_to_binary(const
IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data,
data_ref.size);
}
+// Serializes an IPv6 value to its OLAP string representation for ZoneMap
storage.
+// This is the inverse of from_olap_string().
+// Uses CastToString::from_ip() to produce standard IPv6 notation.
+// Output format: standard IPv6, e.g. "::1", "2001:db8::1"
std::string DataTypeIPv6SerDe::to_olap_string(const Field& field) const {
return CastToString::from_ip(field.get<TYPE_IPV6>());
}
diff --git a/be/src/core/data_type_serde/data_type_ipv6_serde.h
b/be/src/core/data_type_serde/data_type_ipv6_serde.h
index 20b6960e61d..226e65663f0 100644
--- a/be/src/core/data_type_serde/data_type_ipv6_serde.h
+++ b/be/src/core/data_type_serde/data_type_ipv6_serde.h
@@ -75,8 +75,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -85,5 +83,9 @@ public:
int64_t row_num) const override;
std::string to_olap_string(const Field& field) const override;
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.cpp
b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
index 175da193187..6ca2e07b7b0 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
@@ -494,9 +494,17 @@ Status DataTypeNullableSerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeNullableSerDe::from_olap_string(const std::string& str, Field&
field,
- const FormatOptions& options)
const {
- if (!nested_serde->from_olap_string(str, field, options).ok()) {
+Status DataTypeNullableSerDe::from_zonemap_string(const std::string& str,
Field& field) const {
+ if (!nested_serde->from_zonemap_string(str, field).ok()) {
+ // fill null if fail
+ field = Field();
+ return Status::OK();
+ }
+ return Status::OK();
+}
+
+Status DataTypeNullableSerDe::from_fe_string(const std::string& str, Field&
field) const {
+ if (!nested_serde->from_fe_string(str, field).ok()) {
// fill null if fail
field = Field();
return Status::OK();
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.h
b/be/src/core/data_type_serde/data_type_nullable_serde.h
index 49d9d55f9f2..4363e4a573b 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.h
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.h
@@ -39,8 +39,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_zonemap_string(const std::string& str, Field& field) const
override;
+ Status from_fe_string(const std::string& str, Field& field) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/core/data_type_serde/data_type_number_serde.cpp
b/be/src/core/data_type_serde/data_type_number_serde.cpp
index ac488d1a6ae..30b7ad47911 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_number_serde.cpp
@@ -742,6 +742,20 @@ Status DataTypeNumberSerDe<T>::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
+// Serializes a numeric value to its OLAP string representation for ZoneMap
index storage.
+// This is the inverse of from_olap_string().
+//
+// Format by type:
+// - BOOLEAN: "0" or "1" (via snprintf "%d")
+// - TINYINT/SMALLINT/INT/BIGINT: standard integer string, e.g. "42", "-100"
+// - FLOAT: fmt::format("{:.7g}", value), e.g. "3.14", "NaN", "Infinity"
+// - DOUBLE: fmt::format("{:.16g}", value), e.g. "3.141592653589793"
+// - LARGEINT: fmt::format("{}", value), e.g.
"170141183460469231731687303715884105727"
+//
+// Examples:
+// to_olap_string(Field(Int32(12345))) => "12345"
+// to_olap_string(Field(Float32(3.14f))) => "3.14"
+// to_olap_string(Field(Float64(1e300))) => "1e+300"
template <PrimitiveType T>
std::string DataTypeNumberSerDe<T>::to_olap_string(const Field& field) const {
if constexpr (T == TYPE_BOOLEAN) {
@@ -762,6 +776,15 @@ std::string DataTypeNumberSerDe<T>::to_olap_string(const
Field& field) const {
}
}
+// Deserializes a numeric value from its OLAP string representation (e.g. from
ZoneMap protobuf).
+// This is the inverse of to_olap_string(). Uses try_parse_impl with
non-strict mode.
+//
+// FormatOptions is unused for numeric types — the string format is always a
standard number literal.
+//
+// Examples:
+// from_olap_string("12345", field, ...) => field = Int32(12345)
+// from_olap_string("3.14", field, ...) => field = Float32(3.14)
+// from_olap_string("NaN", field, ...) => returns InvalidArgument
(NaN/Inf are rejected)
template <PrimitiveType T>
Status DataTypeNumberSerDe<T>::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
@@ -771,6 +794,14 @@ Status DataTypeNumberSerDe<T>::from_olap_string(const
std::string& str, Field& f
if (!try_parse_impl<T, false>(val, StringRef(str), params)) {
return Status::InvalidArgument("parse number fail, string: '{}'", str);
}
+ // In zonemap or some float values passed from FE(column's default value or
+ // schema change like operations), Nan and inf is not allowed.
+ if constexpr (is_float_or_double(T)) {
+ if (std::isnan(val) || std::isinf(val)) {
+ return Status::InvalidArgument(
+ "parse number fail: NaN/Infinity not allowed in olap
string: '{}'", str);
+ }
+ }
field = Field::create_field<T>(std::move(val));
return Status::OK();
}
diff --git a/be/src/core/data_type_serde/data_type_number_serde.h
b/be/src/core/data_type_serde/data_type_number_serde.h
index 2158919d112..cc33fe2b684 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.h
+++ b/be/src/core/data_type_serde/data_type_number_serde.h
@@ -62,9 +62,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -152,6 +149,10 @@ public:
static const uint8_t* deserialize_binary_to_field(const uint8_t* data,
Field& field,
FieldInfo& info);
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
template <PrimitiveType T>
diff --git a/be/src/core/data_type_serde/data_type_quantilestate_serde.h
b/be/src/core/data_type_serde/data_type_quantilestate_serde.h
index 858ef2adc72..730a7a64a78 100644
--- a/be/src/core/data_type_serde/data_type_quantilestate_serde.h
+++ b/be/src/core/data_type_serde/data_type_quantilestate_serde.h
@@ -58,9 +58,6 @@ public:
return Status::OK();
}
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status serialize_column_to_json(const IColumn& column, int64_t start_idx,
int64_t end_idx,
BufferWritable& bw, FormatOptions&
options) const override {
SERIALIZE_COLUMN_TO_JSON();
@@ -189,6 +186,10 @@ public:
data.serialize((uint8_t*)result.data());
bw.write(result.data(), result.size());
}
+
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
};
#include "common/compile_check_end.h"
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_serde.h
b/be/src/core/data_type_serde/data_type_serde.h
index 1f9ac312cce..1643b0cf151 100644
--- a/be/src/core/data_type_serde/data_type_serde.h
+++ b/be/src/core/data_type_serde/data_type_serde.h
@@ -209,8 +209,23 @@ public:
const cctz::time_zone* timezone = nullptr;
/**
- * Ignore scale when converting decimal to string, because decimal in
zone map is stored in
- * unscaled value.
+ * Controls how the `scale` parameter is passed to decimal parsing in
from_olap_string().
+ *
+ * - true: parse with scale=0 (treat the string as a raw unscaled
integer).
+ * Used for DecimalV3 (Decimal32/64/128I/256) whose zonemap
stores the raw
+ * internal integer. E.g., Decimal(9,2) value 123.45 is
stored as "12345";
+ * parsing with scale=0 yields internal int 12345, which is
correct.
+ *
+ * - false: parse with the data type's actual scale.
+ * Used for DecimalV2 whose zonemap stores a human-readable
string with
+ * decimal point via decimal12_t::to_string().
+ * E.g., DecimalV2 value 123.456 is stored as "123.456000000";
+ * parsing with scale=9 correctly restores the original value.
+ *
+ * Note: for DecimalV2, read_decimal_text_impl() currently hardcodes
+ * DecimalV2Value::SCALE=9 regardless of the passed-in scale, so the
flag
+ * does not actually affect DecimalV2 parsing today. However, callers
should
+ * still set it correctly for semantic clarity and future-proofing.
*/
bool ignore_scale = false;
@@ -322,11 +337,24 @@ public:
const FormatOptions& options) const {
return Status::NotSupported("from_string is not supported");
}
- // Convert string which is read from OLAP table to corresponding type.
- // Only used for basic data types, such as Ip, Date, Number, etc.
- virtual Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
- return Status::NotSupported("from_olap_string is not supported");
+ /// Parse a string stored in ZoneMap index back into a Field.
+ /// This is the inverse of to_olap_string(). For DecimalV3,
to_olap_string() stores the
+ /// raw unscaled integer, so from_zonemap_string() internally sets
ignore_scale=true to
+ /// avoid double-scaling. For DecimalV2 and other types, ignore_scale has
no effect.
+ /// Callers: zone_map_index.cpp (min/max deserialization).
+ virtual Status from_zonemap_string(const std::string& str, Field& field)
const {
+ FormatOptions options;
+ options.ignore_scale = true;
+ return from_olap_string(str, field, options);
+ }
+
+ /// Parse a human-readable string from FE (delete conditions, default
values,
+ /// schema change defaults) into a Field. Uses standard decimal parsing
with full
+ /// scale. Callers: delete_handler.cpp, column_reader.cpp
(DefaultValueColumnIterator),
+ /// schema_change.cpp.
+ virtual Status from_fe_string(const std::string& str, Field& field) const {
+ FormatOptions options;
+ return from_olap_string(str, field, options);
}
// For strict mode, we should not have nullable columns, as we will
directly report errors when string conversion fails instead of handling them
@@ -497,6 +525,15 @@ public:
FieldInfo& info);
protected:
+ /// Internal implementation for parsing OLAP storage strings into Fields.
+ /// Not called directly by external code — use from_zonemap_string() or
from_fe_string()
+ /// instead. Subclasses override this to provide type-specific
deserialization.
+ /// For decimals, options.ignore_scale controls whether scale is applied
during parsing.
+ virtual Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const {
+ return Status::NotSupported("from_olap_string is not supported");
+ }
+
bool _return_object_as_string = false;
// This parameter indicates what level the serde belongs to and is mainly
used for complex types
// The default level is 1, and each time you nest, the level increases by
1,
diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp
b/be/src/core/data_type_serde/data_type_string_serde.cpp
index f7466721883..e766e4bb563 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_string_serde.cpp
@@ -435,6 +435,8 @@ void DataTypeStringSerDeBase<ColumnType>::to_string(const
IColumn& column, size_
}
}
+// Serializes a STRING/VARCHAR/CHAR value to its OLAP string representation
for ZoneMap storage.
+// This is the inverse of from_olap_string(). Returns the raw string content
directly.
template <typename ColumnType>
std::string DataTypeStringSerDeBase<ColumnType>::to_olap_string(const Field&
field) const {
return field.get<TYPE_STRING>();
@@ -458,6 +460,16 @@ Status
DataTypeStringSerDeBase<ColumnType>::from_string(StringRef& str, IColumn&
return deserialize_one_cell_from_json(column, slice, options);
}
+// Deserializes a STRING/VARCHAR/CHAR value from its OLAP string representation
+// (e.g. from ZoneMap protobuf). This is the inverse of to_olap_string().
+//
+// For CHAR type: if the string is shorter than the declared column length
(_len),
+// pads with '\0' bytes to reach _len. This preserves CHAR's fixed-length
semantics.
+// For STRING/VARCHAR: stores the string as-is.
+//
+// Examples:
+// CHAR(10), str="hello" => field = "hello\0\0\0\0\0" (10 bytes)
+// VARCHAR, str="hello" => field = "hello" (5 bytes)
template <typename ColumnType>
Status DataTypeStringSerDeBase<ColumnType>::from_olap_string(const
std::string& str, Field& field,
const
FormatOptions& options) const {
diff --git a/be/src/core/data_type_serde/data_type_string_serde.h
b/be/src/core/data_type_serde/data_type_string_serde.h
index c6c0438369c..1b07739b8f6 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.h
+++ b/be/src/core/data_type_serde/data_type_string_serde.h
@@ -101,8 +101,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
@@ -265,6 +263,10 @@ public:
std::string to_olap_string(const Field& field) const override;
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
const PrimitiveType _type;
const int _len = -1;
diff --git a/be/src/core/data_type_serde/data_type_time_serde.h
b/be/src/core/data_type_serde/data_type_time_serde.h
index 924d4b97190..dafaa600eb4 100644
--- a/be/src/core/data_type_serde/data_type_time_serde.h
+++ b/be/src/core/data_type_serde/data_type_time_serde.h
@@ -38,9 +38,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
@@ -73,6 +70,10 @@ public:
IColumn& target_col) const;
int get_scale() const override { return _scale; }
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
int _scale;
};
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
index 8048731460f..459003e040f 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
@@ -36,9 +36,6 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_olap_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
-
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
@@ -77,6 +74,10 @@ public:
std::string to_olap_string(const Field& field) const override;
+protected:
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
+
private:
const UInt32 _scale = 6;
};
diff --git a/be/src/core/memcpy_small.h b/be/src/core/memcpy_small.h
index 32d195c1d75..bcdcaa84549 100644
--- a/be/src/core/memcpy_small.h
+++ b/be/src/core/memcpy_small.h
@@ -24,6 +24,7 @@
#include <string.h>
#include <cstdint>
+#include <memory>
#if defined(__SSE2__) || defined(__aarch64__)
#include "util/sse_util.hpp"
@@ -86,10 +87,13 @@ inline void memcpy_small_allow_read_write_overflow15(void*
__restrict dst,
#endif
// assume input address not aligned by default
+// hint to compiler that we are copying fixed size data, so it can optimize
the copy using SIMD instructions if possible.
template <typename T, bool aligned = false>
void memcpy_fixed(char* lhs, const char* rhs) {
- if constexpr (aligned || sizeof(T) <= 8) {
- *(T*)lhs = *(T*)rhs;
+ if constexpr (aligned) {
+ // hint aligned address to compiler
+ memcpy(std::assume_aligned<alignof(T)>(lhs),
std::assume_aligned<alignof(T)>(rhs),
+ sizeof(T));
} else {
memcpy(lhs, rhs, sizeof(T));
}
diff --git a/be/src/exec/common/hash_table/hash_map_context.h
b/be/src/exec/common/hash_table/hash_map_context.h
index df7f4aacf04..82dde918d5c 100644
--- a/be/src/exec/common/hash_table/hash_map_context.h
+++ b/be/src/exec/common/hash_table/hash_map_context.h
@@ -945,7 +945,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
for (size_t j = 0; j < key_columns.size(); ++j) {
const char* __restrict data = key_columns[j]->get_raw_data().data;
- auto foo = [&]<typename Fixed>(Fixed zero) {
+ auto goo = [&]<typename Fixed, bool aligned>(Fixed zero) {
CHECK_EQ(sizeof(Fixed), key_sizes[j]);
if (has_null_column.size() && has_null_column[j]) {
const auto* nullmap =
@@ -955,11 +955,24 @@ struct MethodKeysFixed : public MethodBase<TData> {
}
auto* __restrict current = result_data + offset;
for (size_t i = 0; i < row_numbers; ++i) {
- memcpy_fixed<Fixed, true>(current, data);
+ memcpy_fixed<Fixed, aligned>(current, data);
current += sizeof(T);
data += sizeof(Fixed);
}
};
+ auto foo = [&]<typename Fixed>(Fixed zero) {
+ // Check alignment of both destination and source pointers.
+ // Also verify that the stride sizeof(T) is a multiple of
alignof(Fixed),
+ // otherwise alignment will be lost on subsequent loop
iterations
+ // (e.g. UInt96 has sizeof=12, stride 12 is not a multiple of
alignof(uint64_t)=8).
+ if (sizeof(T) % alignof(Fixed) == 0 &&
+ reinterpret_cast<uintptr_t>(result_data + offset) %
alignof(Fixed) == 0 &&
+ reinterpret_cast<uintptr_t>(data) % alignof(Fixed) == 0) {
+ goo.template operator()<Fixed, true>(zero);
+ } else {
+ goo.template operator()<Fixed, false>(zero);
+ }
+ };
if (key_sizes[j] == sizeof(uint8_t)) {
foo(uint8_t());
@@ -1037,6 +1050,9 @@ struct MethodKeysFixed : public MethodBase<TData> {
void insert_keys_into_columns(std::vector<typename Base::Key>& input_keys,
MutableColumns& key_columns, const uint32_t
num_rows) override {
+ if (num_rows == 0) {
+ return;
+ }
size_t pos = std::ranges::any_of(key_columns,
[](const auto& col) { return
col->is_nullable(); });
@@ -1062,11 +1078,26 @@ struct MethodKeysFixed : public MethodBase<TData> {
data = const_cast<char*>(key_columns[i]->get_raw_data().data);
}
- auto foo = [&]<typename Fixed>(Fixed zero) {
+ auto goo = [&]<typename Fixed, bool aligned>(Fixed zero) {
CHECK_EQ(sizeof(Fixed), size);
for (size_t j = 0; j < num_rows; j++) {
- memcpy_fixed<Fixed, true>(data + j * sizeof(Fixed),
- (char*)(&input_keys[j]) + pos);
+ memcpy_fixed<Fixed, aligned>(data + j * sizeof(Fixed),
+ (char*)(&input_keys[j]) +
pos);
+ }
+ };
+ auto foo = [&]<typename Fixed>(Fixed zero) {
+ // Check alignment of both source and destination pointers.
+ // The source steps by sizeof(Key) between iterations, so
sizeof(Key)
+ // must be a multiple of alignof(Fixed) to maintain alignment
across
+ // all iterations (e.g. UInt96 has sizeof=12, not a multiple
of 8).
+ if (sizeof(typename Base::Key) % alignof(Fixed) == 0 &&
+ reinterpret_cast<uintptr_t>((char*)(input_keys.data()) +
pos) %
+ alignof(Fixed) ==
+ 0 &&
+ reinterpret_cast<uintptr_t>(data) % alignof(Fixed) == 0) {
+ goo.template operator()<Fixed, true>(zero);
+ } else {
+ goo.template operator()<Fixed, false>(zero);
}
};
diff --git a/be/src/storage/delete/delete_handler.cpp
b/be/src/storage/delete/delete_handler.cpp
index 74e29fe5641..c13c17700a5 100644
--- a/be/src/storage/delete/delete_handler.cpp
+++ b/be/src/storage/delete/delete_handler.cpp
@@ -27,15 +27,7 @@
#include "common/config.h"
#include "common/logging.h"
#include "common/status.h"
-#include "exprs/function/cast/cast_parameters.h"
-#include "exprs/function/cast/cast_to_boolean.h"
-#include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp"
-#include "exprs/function/cast/cast_to_datetimev2_impl.hpp"
-#include "exprs/function/cast/cast_to_datev2_impl.hpp"
-#include "exprs/function/cast/cast_to_decimal.h"
-#include "exprs/function/cast/cast_to_float.h"
-#include "exprs/function/cast/cast_to_int.h"
-#include "exprs/function/cast/cast_to_ip.h"
+#include "core/data_type_serde/data_type_serde.h"
#include "storage/olap_common.h"
#include "storage/predicate/block_column_predicate.h"
#include "storage/predicate/predicate_creator.h"
@@ -51,185 +43,62 @@ using ::google::protobuf::RepeatedPtrField;
namespace doris {
+// Parses a string value into a Field using the serde's from_fe_string, then
builds
+// a HybridSetBase for IN/NOT_IN predicates.
+// The type-dispatch via switch/case is still needed because
build_set<PType>() and
+// HybridSet::insert(const void*) require compile-time PrimitiveType, and
Field::get<PType>()
+// must be invoked with the correct type to extract the underlying CppType
value.
template <PrimitiveType PType>
-Status convert(const DataTypePtr& data_type, const std::string& str, Arena&
arena,
- typename PrimitiveTypeTraits<PType>::CppType& res) {
- if constexpr (PType == TYPE_TINYINT || PType == TYPE_SMALLINT || PType ==
TYPE_INT ||
- PType == TYPE_BIGINT || PType == TYPE_LARGEINT) {
- CastParameters parameters;
- if (!CastToInt::from_string<false>({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_FLOAT || PType == TYPE_DOUBLE) {
- CastParameters parameters;
- if (!CastToFloat::from_string({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATE) {
- CastParameters parameters;
- if (!CastToDateOrDatetime::from_string<false>({str.data(),
str.size()}, res, nullptr,
- parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATETIME) {
- CastParameters parameters;
- if (!CastToDateOrDatetime::from_string<true>({str.data(), str.size()},
res, nullptr,
- parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATEV2) {
- CastParameters parameters;
- if (!CastToDateV2::from_string({str.data(), str.size()}, res, nullptr,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DATETIMEV2) {
- CastParameters parameters;
- if (!CastToDatetimeV2::from_string({str.data(), str.size()}, res,
nullptr,
- data_type->get_scale(),
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_TIMESTAMPTZ) {
- CastParameters parameters;
- if (!CastToTimstampTz::from_string({str.data(), str.size()}, res,
parameters, nullptr,
- data_type->get_scale())) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_CHAR) {
- size_t target = assert_cast<const
DataTypeString*>(remove_nullable(data_type).get())->len();
- res = {str.data(), str.size()};
- if (target > str.size()) {
- char* buffer = arena.alloc(target);
- memset(buffer, 0, target);
- memcpy(buffer, str.data(), str.size());
- res = {buffer, target};
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_STRING || PType == TYPE_VARCHAR) {
- char* buffer = arena.alloc(str.size());
- memcpy(buffer, str.data(), str.size());
- res = {buffer, str.size()};
- return Status::OK();
- }
- if constexpr (PType == TYPE_BOOLEAN) {
- CastParameters parameters;
- UInt8 tmp;
- if (!CastToBool::from_string({str.data(), str.size()}, tmp,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- res = tmp != 0;
- return Status::OK();
- }
- if constexpr (PType == TYPE_IPV4) {
- CastParameters parameters;
- if (!CastToIPv4::from_string({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_IPV6) {
- CastParameters parameters;
- if (!CastToIPv6::from_string({str.data(), str.size()}, res,
parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
- }
- if constexpr (PType == TYPE_DECIMALV2) {
- CastParameters parameters;
- Decimal128V2 tmp;
- if (!CastToDecimal::from_string({str.data(), str.size()}, tmp,
data_type->get_precision(),
- data_type->get_scale(), parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- res = DecimalV2Value(tmp.value);
- return Status::OK();
- } else if constexpr (is_decimal(PType)) {
- CastParameters parameters;
- if (!CastToDecimal::from_string({str.data(), str.size()}, res,
data_type->get_precision(),
- data_type->get_scale(), parameters)) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid {} string. str={}",
type_to_string(data_type->get_primitive_type()),
- str);
- }
- return Status::OK();
+void insert_field_to_set(const Field& field, HybridSetBase* set) {
+ if constexpr (is_string_type(PType)) {
+ // StringSet::insert expects const StringRef*, so we must construct a
StringRef
+ // from the std::string returned by Field::get<>.
+ const auto& tmp = field.get<PType>();
+ StringRef ref(tmp.data(), tmp.size());
+ set->insert(reinterpret_cast<const void*>(&ref));
+ } else {
+ auto tmp = field.get<PType>();
+ set->insert(reinterpret_cast<const void*>(&tmp));
}
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "unsupported data type in delete handler. type={}",
- type_to_string(data_type->get_primitive_type()));
}
-#define CONVERT_CASE(PType) \
- case PType: { \
- set = build_set<PType>(); \
- for (const auto& s : str) { \
- typename PrimitiveTypeTraits<PType>::CppType tmp; \
- RETURN_IF_ERROR(convert<PType>(data_type, s, arena, tmp)); \
- set->insert(reinterpret_cast<const void*>(&tmp)); \
- } \
- return Status::OK(); \
- }
-Status convert(const DataTypePtr& data_type, const std::list<std::string>&
str, Arena& arena,
+#define FROM_FE_STRING_CASE(PType) \
+ case PType: { \
+ set = build_set<PType>(); \
+ for (const auto& s : str) { \
+ Field field; \
+ RETURN_IF_ERROR(serde->from_fe_string(s, field)); \
+ insert_field_to_set<PType>(field, set.get()); \
+ } \
+ return Status::OK(); \
+ }
+Status convert(const DataTypePtr& data_type, const std::list<std::string>& str,
std::shared_ptr<HybridSetBase>& set) {
+ auto serde = data_type->get_serde();
switch (data_type->get_primitive_type()) {
- CONVERT_CASE(TYPE_TINYINT);
- CONVERT_CASE(TYPE_SMALLINT);
- CONVERT_CASE(TYPE_INT);
- CONVERT_CASE(TYPE_BIGINT);
- CONVERT_CASE(TYPE_LARGEINT);
- CONVERT_CASE(TYPE_FLOAT);
- CONVERT_CASE(TYPE_DOUBLE);
- CONVERT_CASE(TYPE_DATE);
- CONVERT_CASE(TYPE_DATETIME);
- CONVERT_CASE(TYPE_DATEV2);
- CONVERT_CASE(TYPE_DATETIMEV2);
- CONVERT_CASE(TYPE_TIMESTAMPTZ);
- CONVERT_CASE(TYPE_BOOLEAN);
- CONVERT_CASE(TYPE_IPV4);
- CONVERT_CASE(TYPE_IPV6);
- CONVERT_CASE(TYPE_DECIMALV2);
- CONVERT_CASE(TYPE_DECIMAL32);
- CONVERT_CASE(TYPE_DECIMAL64);
- CONVERT_CASE(TYPE_DECIMAL128I);
- CONVERT_CASE(TYPE_DECIMAL256);
- CONVERT_CASE(TYPE_CHAR);
- CONVERT_CASE(TYPE_VARCHAR);
- CONVERT_CASE(TYPE_STRING);
+ FROM_FE_STRING_CASE(TYPE_TINYINT);
+ FROM_FE_STRING_CASE(TYPE_SMALLINT);
+ FROM_FE_STRING_CASE(TYPE_INT);
+ FROM_FE_STRING_CASE(TYPE_BIGINT);
+ FROM_FE_STRING_CASE(TYPE_LARGEINT);
+ FROM_FE_STRING_CASE(TYPE_FLOAT);
+ FROM_FE_STRING_CASE(TYPE_DOUBLE);
+ FROM_FE_STRING_CASE(TYPE_DATE);
+ FROM_FE_STRING_CASE(TYPE_DATETIME);
+ FROM_FE_STRING_CASE(TYPE_DATEV2);
+ FROM_FE_STRING_CASE(TYPE_DATETIMEV2);
+ FROM_FE_STRING_CASE(TYPE_TIMESTAMPTZ);
+ FROM_FE_STRING_CASE(TYPE_BOOLEAN);
+ FROM_FE_STRING_CASE(TYPE_IPV4);
+ FROM_FE_STRING_CASE(TYPE_IPV6);
+ FROM_FE_STRING_CASE(TYPE_DECIMALV2);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL32);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL64);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL128I);
+ FROM_FE_STRING_CASE(TYPE_DECIMAL256);
+ FROM_FE_STRING_CASE(TYPE_CHAR);
+ FROM_FE_STRING_CASE(TYPE_VARCHAR);
+ FROM_FE_STRING_CASE(TYPE_STRING);
default:
return Status::Error<ErrorCode::INVALID_ARGUMENT>(
"unsupported data type in delete handler. type={}",
@@ -237,43 +106,14 @@ Status convert(const DataTypePtr& data_type, const
std::list<std::string>& str,
}
return Status::OK();
}
-#undef CONVERT_CASE
-
-#define CONVERT_CASE(PType)
\
- case PType: {
\
- typename PrimitiveTypeTraits<PType>::CppType tmp;
\
- RETURN_IF_ERROR(convert<PType>(type, res.value_str.front(), arena,
tmp)); \
- v = Field::create_field<PType>(tmp);
\
- switch (res.condition_op) {
\
- case PredicateType::EQ:
\
- predicate = create_comparison_predicate<PredicateType::EQ>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::NE:
\
- predicate = create_comparison_predicate<PredicateType::NE>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::GT:
\
- predicate = create_comparison_predicate<PredicateType::GT>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::GE:
\
- predicate = create_comparison_predicate<PredicateType::GE>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::LT:
\
- predicate = create_comparison_predicate<PredicateType::LT>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- case PredicateType::LE:
\
- predicate = create_comparison_predicate<PredicateType::LE>(index,
col_name, type, v, \
- true);
\
- return Status::OK();
\
- default:
\
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
\
- "invalid condition operator. operator={}",
type_to_op_str(res.condition_op)); \
- }
\
- }
+#undef FROM_FE_STRING_CASE
+
+// Parses a single condition value string into a Field and creates a
comparison predicate.
+// Uses serde->from_fe_string to do the parsing, which handles all
type-specific
+// conversions (including decimal scale, etc.).
+// For CHAR type, the value is padded with '\0' to the declared column length,
consistent
+// with the IN list path in convert() above.
+// For VARCHAR/STRING, the Field is created directly from the raw string.
Status parse_to_predicate(const uint32_t index, const std::string col_name,
const DataTypePtr& type,
DeleteHandler::ConditionParseResult& res, Arena&
arena,
std::shared_ptr<ColumnPredicate>& predicate) {
@@ -285,70 +125,53 @@ Status parse_to_predicate(const uint32_t index, const
std::string col_name, cons
type->get_primitive_type());
return Status::OK();
}
+
Field v;
- switch (type->get_primitive_type()) {
- CONVERT_CASE(TYPE_TINYINT);
- CONVERT_CASE(TYPE_SMALLINT);
- CONVERT_CASE(TYPE_INT);
- CONVERT_CASE(TYPE_BIGINT);
- CONVERT_CASE(TYPE_LARGEINT);
- CONVERT_CASE(TYPE_FLOAT);
- CONVERT_CASE(TYPE_DOUBLE);
- CONVERT_CASE(TYPE_DATE);
- CONVERT_CASE(TYPE_DATETIME);
- CONVERT_CASE(TYPE_DATEV2);
- CONVERT_CASE(TYPE_DATETIMEV2);
- CONVERT_CASE(TYPE_TIMESTAMPTZ);
- CONVERT_CASE(TYPE_BOOLEAN);
- CONVERT_CASE(TYPE_IPV4);
- CONVERT_CASE(TYPE_IPV6);
- CONVERT_CASE(TYPE_DECIMALV2);
- CONVERT_CASE(TYPE_DECIMAL32);
- CONVERT_CASE(TYPE_DECIMAL64);
- CONVERT_CASE(TYPE_DECIMAL128I);
- CONVERT_CASE(TYPE_DECIMAL256);
- case TYPE_CHAR:
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- v = Field::create_field<TYPE_STRING>(res.value_str.front());
- switch (res.condition_op) {
- case PredicateType::EQ:
- predicate =
- create_comparison_predicate<PredicateType::EQ>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::NE:
- predicate =
- create_comparison_predicate<PredicateType::NE>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::GT:
- predicate =
- create_comparison_predicate<PredicateType::GT>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::GE:
- predicate =
- create_comparison_predicate<PredicateType::GE>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::LT:
- predicate =
- create_comparison_predicate<PredicateType::LT>(index,
col_name, type, v, true);
- return Status::OK();
- case PredicateType::LE:
- predicate =
- create_comparison_predicate<PredicateType::LE>(index,
col_name, type, v, true);
- return Status::OK();
- default:
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "invalid condition operator. operator={}",
type_to_op_str(res.condition_op));
+ if (type->get_primitive_type() == TYPE_CHAR) {
+ // CHAR type: create Field and pad with '\0' to the declared column
length,
+ // consistent with IN list path (convert() above) and
create_comparison_predicate.
+ const auto& str = res.value_str.front();
+ auto char_len = cast_set<size_t>(
+ assert_cast<const
DataTypeString*>(remove_nullable(type).get())->len());
+ auto target = std::max(char_len, str.size());
+ if (target > str.size()) {
+ std::string padded(target, '\0');
+ memcpy(padded.data(), str.data(), str.size());
+ v = Field::create_field<TYPE_CHAR>(std::move(padded));
+ } else {
+ v = Field::create_field<TYPE_CHAR>(str);
}
- break;
+ } else if (is_string_type(type->get_primitive_type())) {
+ // VARCHAR/STRING: create Field directly from the raw string, no
padding needed.
+ v = Field::create_field<TYPE_STRING>(res.value_str.front());
+ } else {
+ auto serde = type->get_serde();
+ RETURN_IF_ERROR(serde->from_fe_string(res.value_str.front(), v));
}
+
+ switch (res.condition_op) {
+ case PredicateType::EQ:
+ predicate = create_comparison_predicate<PredicateType::EQ>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::NE:
+ predicate = create_comparison_predicate<PredicateType::NE>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::GT:
+ predicate = create_comparison_predicate<PredicateType::GT>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::GE:
+ predicate = create_comparison_predicate<PredicateType::GE>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::LT:
+ predicate = create_comparison_predicate<PredicateType::LT>(index,
col_name, type, v, true);
+ return Status::OK();
+ case PredicateType::LE:
+ predicate = create_comparison_predicate<PredicateType::LE>(index,
col_name, type, v, true);
+ return Status::OK();
default:
- return Status::Error<ErrorCode::INVALID_ARGUMENT>(
- "unsupported data type in delete handler. type={}",
- type_to_string(type->get_primitive_type()));
+ return Status::Error<ErrorCode::INVALID_ARGUMENT>("invalid condition
operator. operator={}",
+
type_to_op_str(res.condition_op));
}
- return Status::OK();
-#undef CONVERT_CASE
}
Status parse_to_in_predicate(const uint32_t index, const std::string& col_name,
@@ -358,14 +181,14 @@ Status parse_to_in_predicate(const uint32_t index, const
std::string& col_name,
switch (res.condition_op) {
case PredicateType::IN_LIST: {
std::shared_ptr<HybridSetBase> set;
- RETURN_IF_ERROR(convert(type, res.value_str, arena, set));
+ RETURN_IF_ERROR(convert(type, res.value_str, set));
predicate =
create_in_list_predicate<PredicateType::IN_LIST>(index,
col_name, type, set, true);
break;
}
case PredicateType::NOT_IN_LIST: {
std::shared_ptr<HybridSetBase> set;
- RETURN_IF_ERROR(convert(type, res.value_str, arena, set));
+ RETURN_IF_ERROR(convert(type, res.value_str, set));
predicate =
create_in_list_predicate<PredicateType::NOT_IN_LIST>(index, col_name, type, set,
true);
break;
diff --git a/be/src/storage/index/zone_map/zone_map_index.cpp
b/be/src/storage/index/zone_map/zone_map_index.cpp
index 3ab02af15ed..47414375db2 100644
--- a/be/src/storage/index/zone_map/zone_map_index.cpp
+++ b/be/src/storage/index/zone_map/zone_map_index.cpp
@@ -70,10 +70,8 @@ Status ZoneMap::from_proto(const ZoneMapPB& zone_map, const
DataTypePtr& data_ty
}
} else {
if (!zone_map_info.pass_all) {
- DataTypeSerDe::FormatOptions opt;
- opt.ignore_scale = true;
- RETURN_IF_ERROR(data_type->get_serde()->from_olap_string(
- zone_map.min(), zone_map_info.min_value, opt));
+ RETURN_IF_ERROR(data_type->get_serde()->from_zonemap_string(
+ zone_map.min(), zone_map_info.min_value));
}
}
@@ -99,10 +97,8 @@ Status ZoneMap::from_proto(const ZoneMapPB& zone_map, const
DataTypePtr& data_ty
}
} else {
if (!zone_map_info.pass_all) {
- DataTypeSerDe::FormatOptions opt;
- opt.ignore_scale = true;
- RETURN_IF_ERROR(data_type->get_serde()->from_olap_string(
- zone_map.max(), zone_map_info.max_value, opt));
+ RETURN_IF_ERROR(data_type->get_serde()->from_zonemap_string(
+ zone_map.max(), zone_map_info.max_value));
}
}
}
diff --git a/be/src/storage/schema_change/schema_change.cpp
b/be/src/storage/schema_change/schema_change.cpp
index 6fc12c90f92..64db6ced4c2 100644
--- a/be/src/storage/schema_change/schema_change.cpp
+++ b/be/src/storage/schema_change/schema_change.cpp
@@ -1523,9 +1523,8 @@ Status
SchemaChangeJob::_init_column_mapping(ColumnMapping* column_mapping,
}
if (!column_schema.is_nullable() || value.length() != 0) {
- DataTypeSerDe::FormatOptions options;
-
RETURN_IF_ERROR(column_schema.get_vec_type()->get_serde()->from_olap_string(
- value, column_mapping->default_value, options));
+
RETURN_IF_ERROR(column_schema.get_vec_type()->get_serde()->from_fe_string(
+ value, column_mapping->default_value));
}
return Status::OK();
diff --git a/be/src/storage/segment/column_reader.cpp
b/be/src/storage/segment/column_reader.cpp
index a37bf424d48..3908b8019f3 100644
--- a/be/src/storage/segment/column_reader.cpp
+++ b/be/src/storage/segment/column_reader.cpp
@@ -2012,8 +2012,7 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
const auto serde = DataTypeFactory::instance()
.create_data_type(t, _precision,
_scale, _len)
->get_serde();
- DataTypeSerDe::FormatOptions opt;
- RETURN_IF_ERROR(serde->from_olap_string(_default_value,
_default_value_field, opt));
+ RETURN_IF_ERROR(serde->from_fe_string(_default_value,
_default_value_field));
}
} else if (_is_nullable) {
_default_value_field = Field::create_field<TYPE_NULL>(Null {});
diff --git a/be/src/util/io_helper.h b/be/src/util/io_helper.h
index 64016e47f7a..5a1ba1775d6 100644
--- a/be/src/util/io_helper.h
+++ b/be/src/util/io_helper.h
@@ -165,11 +165,19 @@ StringParser::ParseResult read_decimal_text_impl(T& x,
const StringRef& buf, UIn
UInt32 scale) {
static_assert(IsDecimalNumber<T>);
if constexpr (!std::is_same_v<DecimalV2Value, T>) {
+ // DecimalV3: uses the caller-supplied precision and scale.
+ // When called from from_olap_string with ignore_scale=true, scale=0
means the
+ // string is treated as an unscaled integer (e.g. "12345" → internal
int 12345).
StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
x.value = StringParser::string_to_decimal<P>(buf.data, (int)buf.size,
precision, scale,
&result);
return result;
} else {
+ // DecimalV2: IGNORES the caller-supplied precision/scale and hardcodes
+ // DecimalV2Value::PRECISION (27) and DecimalV2Value::SCALE (9).
+ // This means from_olap_string's ignore_scale flag has no actual
effect on DecimalV2
+ // parsing today — the string "123.456000000" is always parsed with
scale=9.
+ // Callers should still set ignore_scale=false for DecimalV2 for
semantic correctness.
StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
x = DecimalV2Value(StringParser::string_to_decimal<TYPE_DECIMALV2>(
buf.data, (int)buf.size, DecimalV2Value::PRECISION,
DecimalV2Value::SCALE,
diff --git a/be/test/storage/olap_type_test.cpp
b/be/test/storage/olap_type_test.cpp
index afa8af1f4a5..2511b5158a0 100644
--- a/be/test/storage/olap_type_test.cpp
+++ b/be/test/storage/olap_type_test.cpp
@@ -18,11 +18,18 @@
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <cstring>
#include <fstream>
#include <iostream>
#include "core/data_type/data_type_factory.hpp"
#include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "core/decimal12.h"
+#include "core/field.h"
+#include "core/types.h"
+#include "core/value/decimalv2_value.h"
+#include "core/value/vdatetime_value.h"
#include "exprs/function/cast/cast_to_string.h"
#include "gtest/gtest_pred_impl.h"
#include "storage/olap_common.h"
@@ -582,4 +589,1421 @@ TEST_F(OlapTypeTest, ser_deser_double) {
<< ", diff_ratio: " << fmt::format("{:.17g}", diff_ratio);
}
}
+
+//
=============================================================================
+// Tests for to_olap_string / from_zonemap_string on DataTypeSerDe
+//
+// Background:
+// ZoneMap index serializes min/max values via to_olap_string()
+// and deserializes via from_zonemap_string(). The from_zonemap_string()
+// method internally sets ignore_scale=true for DecimalV3 types to avoid
+// double-scaling the raw unscaled integer stored in ZoneMap.
+//
+// Key difference vs normal from_fe_string:
+// - DecimalV2: to_olap_string uses decimal12_t::to_string() which outputs
+// "integer.fraction" with 9 zero-padded fractional digits (e.g.
"123.456000000").
+// from_zonemap_string still works correctly because DecimalV2's parser
+// hardcodes scale=9 regardless of the ignore_scale setting.
+// - Decimal32/64/128I/256: to_olap_string outputs the RAW INTEGER string
+// (the unscaled internal value). E.g., Decimal(9,2) value 123.45 has
+// internal integer 12345, so to_olap_string outputs "12345".
+// from_zonemap_string uses ignore_scale=true → scale=0, parsing as
integer.
+// - Float/Double: to_olap_string uses CastToString::from_number, which
outputs
+// "NaN", "Infinity", "-Infinity" for special values. But
from_zonemap_string
+// uses fast_float::from_chars which REJECTS these strings. In practice,
ZoneMap
+// tracks NaN/Inf via boolean flags (has_nan, has_positive_inf,
has_negative_inf),
+// so the min/max values never contain NaN/Inf.
+// - DateV1 (TYPE_DATE): to_olap_string outputs "YYYY-MM-DD".
+// - DateTimeV1 (TYPE_DATETIME): to_olap_string outputs "YYYY-MM-DD
HH:MM:SS".
+// - DateV2: to_olap_string outputs "YYYY-MM-DD".
+// - DateTimeV2: to_olap_string outputs "YYYY-MM-DD HH:MM:SS[.ffffff]".
+// Microsecond part only appears when microsecond > 0 (default scale=-1).
+// Note: the old ZoneMap code used hardcoded scale=6 (always 6
fractional digits),
+// but the new to_olap_string omits trailing fractional zeros.
+//
=============================================================================
+
+// ---------------------------------------------------------------------------
+// Decimal32: to_olap_string outputs RAW integer (unscaled value).
+// Internal representation: value * 10^scale.
+// E.g., Decimal(9,2) value 123.45 → internal int32 = 12345 → "12345".
+// from_zonemap_string reads "12345" as integer 12345 (ignore_scale=true
internally).
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal32) {
+ // Create Decimal(9,2) data type (precision=9, scale=2)
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL32, /*precision=*/9,
/*scale=*/2);
+ auto serde = data_type_ptr->get_serde();
+
+ // Test cases: {internal_int32_value, expected_olap_string}
+ // actual_decimal_value = internal / 10^scale
+ std::vector<std::pair<int32_t, std::string>> test_cases = {
+ // 123.45 → internal=12345 → "12345"
+ {12345, "12345"},
+ // -1.00 → internal=-100 → "-100"
+ {-100, "-100"},
+ // 0.00 → internal=0 → "0"
+ {0, "0"},
+ // 999999999 → max for Decimal(9,2): 9999999.99
+ {999999999, "999999999"},
+ // -999999999 → min for Decimal(9,2): -9999999.99
+ {-999999999, "-999999999"},
+ // 1 → 0.01
+ {1, "1"},
+ // -1 → -0.01
+ {-1, "-1"},
+ };
+
+ for (const auto& [int_val, expected_str] : test_cases) {
+ Decimal32 dec(int_val);
+ auto field = Field::create_field<TYPE_DECIMAL32>(dec);
+ // Verify to_olap_string output matches expected raw integer string
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Decimal32 to_olap_string failed for internal value " <<
int_val;
+
+ // Verify round-trip: from_zonemap_string should restore the same
internal value
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL32>();
+ EXPECT_EQ(restored_value.value, int_val)
+ << "Decimal32 round-trip failed for string '" << result_str <<
"'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Decimal64: same pattern as Decimal32, but 64-bit integer.
+// E.g., Decimal(18,4) value 12345.6789 → internal int64 = 123456789 →
"123456789".
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal64) {
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL64, /*precision=*/18,
/*scale=*/4);
+ auto serde = data_type_ptr->get_serde();
+
+ std::vector<std::pair<int64_t, std::string>> test_cases = {
+ // 12345.6789 → internal=123456789
+ {123456789L, "123456789"},
+ // 0 → "0"
+ {0L, "0"},
+ // -1 → -0.0001
+ {-1L, "-1"},
+ // Large value near max
+ {999999999999999999L, "999999999999999999"},
+ {-999999999999999999L, "-999999999999999999"},
+ // Small fractional: 0.0001
+ {1L, "1"},
+ };
+
+ for (const auto& [int_val, expected_str] : test_cases) {
+ Decimal64 dec(int_val);
+ auto field = Field::create_field<TYPE_DECIMAL64>(dec);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Decimal64 to_olap_string failed for internal value " <<
int_val;
+
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL64>();
+ EXPECT_EQ(restored_value.value, int_val)
+ << "Decimal64 round-trip failed for string '" << result_str <<
"'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Decimal128I: to_olap_string uses fmt::format("{}", int128_value).
+// E.g., Decimal(38,6) value 123456789.123456 → internal int128 =
123456789123456.
+// Output: "123456789123456".
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal128i) {
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL128I, /*precision=*/38,
/*scale=*/6);
+ auto serde = data_type_ptr->get_serde();
+
+ // int128_t values and expected strings
+ struct TestCase {
+ int128_t int_val;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // 123456789.123456 → internal=123456789123456
+ {(int128_t)123456789123456LL, "123456789123456"},
+ // 0
+ {(int128_t)0, "0"},
+ // -1
+ {(int128_t)-1, "-1"},
+ // Positive large value exceeding int64 range
+ // 10^18 * 100 = 10^20
+ {(int128_t)1000000000000000000LL * 100, "100000000000000000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ Decimal128V3 dec(tc.int_val);
+ auto field = Field::create_field<TYPE_DECIMAL128I>(dec);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "Decimal128I to_olap_string failed for expected '" <<
tc.expected_str << "'";
+
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL128I>();
+ EXPECT_EQ(restored_value.value, tc.int_val)
+ << "Decimal128I round-trip failed for string '" << result_str
<< "'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Decimal256: to_olap_string uses wide::to_string(value).
+// Same pattern: raw integer string from internal representation.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimal256) {
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL256, /*precision=*/76,
/*scale=*/10);
+ auto serde = data_type_ptr->get_serde();
+
+ // Use int128_t-constructible values for simplicity
+ // (wide::Int256 can be constructed from int128_t)
+ struct TestCase {
+ wide::Int256 int_val;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // Simple positive
+ {wide::Int256(123456789LL), "123456789"},
+ // Zero
+ {wide::Int256(0), "0"},
+ // Negative
+ {wide::Int256(-99999LL), "-99999"},
+ // Large value: 10^20
+ {wide::Int256((int128_t)1000000000000000000LL * 100),
"100000000000000000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ Decimal256 dec(tc.int_val);
+ auto field = Field::create_field<TYPE_DECIMAL256>(dec);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "Decimal256 to_olap_string failed for expected '" <<
tc.expected_str << "'";
+
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMAL256>();
+ EXPECT_EQ(restored_value.value, tc.int_val)
+ << "Decimal256 round-trip failed for string '" << result_str
<< "'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DecimalV2: to_olap_string uses decimal12_t(int_value,
frac_value).to_string().
+// decimal12_t::to_string() outputs "integer.fraction" with 9 zero-padded
fractional
+// digits. E.g., DecimalV2(123.456) → int_value=123, frac_value=456000000 →
+// decimal12_t(123, 456000000).to_string() → "123.456000000".
+//
+// from_zonemap_string with ignore_scale=TRUE internally parses this as a
normal decimal string
+// with the data type's scale (9). With ignore_scale=TRUE, scale would be 0
and the
+// fractional part would be truncated — that is WRONG for DecimalV2.
+// However, from_zonemap_string uses ignore_scale=TRUE, and this still works
because
+// DecimalV2's parser (read_decimal_text_impl) hardcodes
DecimalV2Value::SCALE=9
+// regardless of the passed-in scale, making ignore_scale irrelevant for
DecimalV2.
+//
+// Note: this is different from DecimalV3 where storage is raw integer.
+// DecimalV2 storage string always contains a decimal point.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_decimalv2) {
+ auto data_type_ptr =
+ DataTypeFactory::instance().create_data_type(TYPE_DECIMALV2,
/*is_nullable=*/false,
+ /*precision=*/27,
/*scale=*/9);
+ auto serde = data_type_ptr->get_serde();
+ // DecimalV2 storage string has decimal point. from_zonemap_string sets
ignore_scale=true,
+ // but DecimalV2's parser hardcodes scale=9 regardless, so round-trip
works correctly.
+
+ // Test cases: {DecimalV2Value, expected_to_olap_string}
+ // DecimalV2Value internally stores value * 10^9.
+ // decimal12_t::to_string format: "integer.fraction" with %09u for
fraction.
+ struct TestCase {
+ DecimalV2Value value;
+ std::string expected_str;
+ };
+
+ std::vector<TestCase> test_cases = {
+ // 123.456 → int=123, frac=456000000 → "123.456000000"
+ {DecimalV2Value(123, 456000000), "123.456000000"},
+ // 0.0 → int=0, frac=0 → "0.000000000"
+ {DecimalV2Value(0, 0), "0.000000000"},
+ // -1.5 → int=-1, frac=-500000000 → "-1.500000000"
+ {DecimalV2Value(-1, -500000000), "-1.500000000"},
+ // Pure integer: 42.0 → "42.000000000"
+ {DecimalV2Value(42, 0), "42.000000000"},
+ // Tiny fraction: 0.000000001 → int=0, frac=1 → "0.000000001"
+ {DecimalV2Value(0, 1), "0.000000001"},
+ // Max fraction: 0.999999999 → int=0, frac=999999999 →
"0.999999999"
+ {DecimalV2Value(0, 999999999), "0.999999999"},
+ // Large integer: 999999999999999999.0
+ {DecimalV2Value(999999999999999999LL, 0),
"999999999999999999.000000000"},
+ // Negative with fraction
+ {DecimalV2Value(-123, -456000000), "-123.456000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_DECIMALV2>(tc.value);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str) << "DecimalV2 to_olap_string
failed";
+
+ // Round-trip: from_zonemap_string should restore the same value
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_value = restored_field.get<TYPE_DECIMALV2>();
+ EXPECT_EQ(restored_value, tc.value)
+ << "DecimalV2 round-trip failed for string '" << result_str <<
"'"
+ << ", expected int_value=" << tc.value.int_value()
+ << ", frac_value=" << tc.value.frac_value()
+ << ", got int_value=" << restored_value.int_value()
+ << ", frac_value=" << restored_value.frac_value();
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Float: to_olap_string / from_zonemap_string for normal values.
+// to_olap_string uses CastToString::from_number which calls _fast_to_buffer.
+// Format: fmt "{:.7g}" (digits10+1=7 significant digits).
+// NaN/Inf are serialized as "NaN", "Infinity", "-Infinity" but
from_zonemap_string
+// (which uses fast_float::from_chars) CANNOT parse them back → returns
error.
+// In ZoneMap, NaN/Inf are tracked via boolean flags, not stored in min/max
values.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_float_olap_string) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_FLOAT, false);
+ auto serde = data_type_ptr->get_serde();
+
+ // Normal float values: to_olap_string → from_zonemap_string round-trip
+ std::vector<std::pair<float, std::string>> normal_cases = {
+ {0.0f, "0"}, {1.0f, "1"},
+ {-1.0f, "-1"}, {123.456f, "123.456"},
+ {0.001f, "0.001"}, {1234567.0f, "1234567"},
+ {1e-10f, "1e-10"}, {3.402823e+38f, "3.402823e+38"},
+ };
+
+ for (const auto& [val, expected_str] : normal_cases) {
+ auto field = Field::create_field<TYPE_FLOAT>(val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Float to_olap_string failed for " << fmt::format("{:.9g}",
val);
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ float restored_val = restored_field.get<TYPE_FLOAT>();
+ float diff = std::abs(restored_val - val);
+ EXPECT_TRUE(val == 0 ? restored_val == 0 : diff / std::abs(val) < 1e-6)
+ << "Float round-trip: expected " << val << ", got " <<
restored_val;
+ }
+
+ // Special values: to_olap_string produces strings, but
from_zonemap_string FAILS
+ // This documents the intentional behavior: ZoneMap uses boolean flags for
these.
+ {
+ // NaN → "NaN", but from_zonemap_string cannot parse "NaN"
+ auto field =
Field::create_field<TYPE_FLOAT>(std::numeric_limits<float>::quiet_NaN());
+ EXPECT_EQ(serde->to_olap_string(field), "NaN");
+ Field restored_field;
+ auto status = serde->from_zonemap_string("NaN", restored_field);
+ EXPECT_FALSE(status.ok()) << "from_zonemap_string should reject 'NaN'";
+ }
+ {
+ // +Infinity → "Infinity"
+ auto field =
Field::create_field<TYPE_FLOAT>(std::numeric_limits<float>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "Infinity");
+ Field restored_field;
+ auto status = serde->from_zonemap_string("Infinity", restored_field);
+ EXPECT_FALSE(status.ok()) << "from_zonemap_string should reject
'Infinity'";
+ }
+ {
+ // -Infinity → "-Infinity"
+ auto field =
Field::create_field<TYPE_FLOAT>(-std::numeric_limits<float>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "-Infinity");
+ Field restored_field;
+ auto status = serde->from_zonemap_string("-Infinity", restored_field);
+ EXPECT_FALSE(status.ok()) << "from_zonemap_string should reject
'-Infinity'";
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Double: same pattern as Float.
+// The expected strings in this case follow current serializer behavior.
+// Note: for DBL_MAX/lowest, current formatting rounds to a boundary string
that
+// is rejected by from_zonemap_string (parsed as Infinity), so these two
values
+// are validated for to_olap_string only.
+// NaN/Inf same behavior: to_olap_string works, from_zonemap_string rejects.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_double_olap_string) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DOUBLE, false);
+ auto serde = data_type_ptr->get_serde();
+
+ std::vector<std::pair<double, std::string>> normal_cases = {
+ {0.0, "0"},
+ {1.0, "1"},
+ {-1.0, "-1"},
+ {123.456789, "123.456789"},
+ {0.001, "0.001"},
+ {1234567890123456.0, "1234567890123456"},
+ {1e-100, "1e-100"},
+ {std::numeric_limits<double>::lowest(), "-1.797693134862316e+308"},
+ {std::numeric_limits<double>::max(), "1.797693134862316e+308"},
+ };
+
+ for (const auto& [val, expected_str] : normal_cases) {
+ auto field = Field::create_field<TYPE_DOUBLE>(val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, expected_str)
+ << "Double to_olap_string failed for " <<
fmt::format("{:.17g}", val);
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ if (val == std::numeric_limits<double>::lowest() ||
+ val == std::numeric_limits<double>::max()) {
+ EXPECT_FALSE(status.ok());
+ EXPECT_NE(status.to_string().find("NaN/Infinity not allowed in
olap string"),
+ std::string::npos)
+ << status.to_string();
+ continue;
+ }
+
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ double restored_val = restored_field.get<TYPE_DOUBLE>();
+ double diff = std::abs(restored_val - val);
+ EXPECT_TRUE(val == 0 ? restored_val == 0 : diff / std::abs(val) <
1e-15)
+ << "Double round-trip: expected " << val << ", got " <<
restored_val;
+ }
+
+ // Special values
+ {
+ auto field =
Field::create_field<TYPE_DOUBLE>(std::numeric_limits<double>::quiet_NaN());
+ EXPECT_EQ(serde->to_olap_string(field), "NaN");
+ Field restored_field;
+ EXPECT_FALSE(serde->from_zonemap_string("NaN", restored_field).ok());
+ }
+ {
+ auto field =
Field::create_field<TYPE_DOUBLE>(std::numeric_limits<double>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "Infinity");
+ Field restored_field;
+ EXPECT_FALSE(serde->from_zonemap_string("Infinity",
restored_field).ok());
+ }
+ {
+ auto field =
Field::create_field<TYPE_DOUBLE>(-std::numeric_limits<double>::infinity());
+ EXPECT_EQ(serde->to_olap_string(field), "-Infinity");
+ Field restored_field;
+ EXPECT_FALSE(serde->from_zonemap_string("-Infinity",
restored_field).ok());
+ }
+ {
+ // -0.0 → "-0"
+ auto field = Field::create_field<TYPE_DOUBLE>(-0.0);
+ EXPECT_EQ(serde->to_olap_string(field), "-0");
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateV1 (TYPE_DATE): to_olap_string outputs "YYYY-MM-DD".
+// Internal representation: VecDateTimeValue, stored as uint24_t in OLAP.
+// The old ZoneMap used VecDateTimeValue::to_string(buf) → "YYYY-MM-DD\0".
+// from_zonemap_string uses
CastToDateOrDatetime::from_string_non_strict_mode.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datev1) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DATE, false);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 1, 1, "2023-01-01"}, {2000, 12, 31, "2000-12-31"}, {1970,
1, 1, "1970-01-01"},
+ {9999, 12, 31, "9999-12-31"}, {1, 1, 1, "0001-01-01"},
+ };
+
+ for (const auto& tc : test_cases) {
+ VecDateTimeValue date_val;
+ date_val.unchecked_set_time(tc.year, tc.month, tc.day, 0, 0, 0);
+ date_val.set_type(TIME_DATE);
+
+ auto field = Field::create_field<TYPE_DATE>(date_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str) << "DateV1 to_olap_string
failed for " << tc.year
+ << "-" << tc.month << "-" <<
tc.day;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATE>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateTimeV1 (TYPE_DATETIME): to_olap_string outputs "YYYY-MM-DD HH:MM:SS".
+// Internal representation: VecDateTimeValue, stored as uint64_t in OLAP.
+// The old ZoneMap used the format:
+// YYYYMMDDHHMMSSxxxxxx → "YYYY-MM-DD HH:MM:SS".
+// from_zonemap_string uses
CastToDateOrDatetime::from_string_non_strict_mode<true>.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datetimev1) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DATETIME, false);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day, hour, minute, second;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 6, 15, 14, 30, 59, "2023-06-15 14:30:59"},
+ {2000, 1, 1, 0, 0, 0, "2000-01-01 00:00:00"},
+ {1970, 1, 1, 0, 0, 0, "1970-01-01 00:00:00"},
+ {9999, 12, 31, 23, 59, 59, "9999-12-31 23:59:59"},
+ };
+
+ for (const auto& tc : test_cases) {
+ VecDateTimeValue dt_val;
+ dt_val.unchecked_set_time(tc.year, tc.month, tc.day, tc.hour,
tc.minute, tc.second);
+ dt_val.set_type(TIME_DATETIME);
+
+ auto field = Field::create_field<TYPE_DATETIME>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateTimeV1 to_olap_string failed for " << tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIME>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ EXPECT_EQ(restored_val.hour(), tc.hour);
+ EXPECT_EQ(restored_val.minute(), tc.minute);
+ EXPECT_EQ(restored_val.second(), tc.second);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateV2 (TYPE_DATEV2): to_olap_string outputs "YYYY-MM-DD".
+// Internal: DateV2Value<DateV2ValueType>, stored as uint32_t (bit-packed).
+// Bit layout: year(16bits) << 9 | month(4bits) << 5 | day(5bits).
+// from_zonemap_string uses strptime "%Y-%m-%d", then bit-packs the parsed
date.
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datev2) {
+ auto data_type_ptr =
DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 1, 15, "2023-01-15"}, {2000, 12, 31, "2000-12-31"}, {1970,
1, 1, "1970-01-01"},
+ {9999, 12, 31, "9999-12-31"}, {1, 1, 1, "0001-01-01"},
+ };
+
+ for (const auto& tc : test_cases) {
+ DateV2Value<DateV2ValueType> date_val;
+ date_val.unchecked_set_time(tc.year, tc.month, tc.day, 0, 0, 0, 0);
+
+ auto field = Field::create_field<TYPE_DATEV2>(date_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateV2 to_olap_string failed for " << tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATEV2>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// DateTimeV2 (TYPE_DATETIMEV2): to_olap_string outputs "YYYY-MM-DD
HH:MM:SS.ffffff".
+// Internal: DateV2Value<DateTimeV2ValueType>, stored as uint64_t
(bit-packed).
+// to_olap_string always calls CastToString::from_datetimev2(value, 6)
because
+// historically the Field type for DateTimeV2 always stores 6-digit
(microsecond) precision.
+// With scale=6, the fractional part is ALWAYS written with 6 digits, even
when microsecond=0.
+//
+// Multiple serde scale values are tested, but since to_olap_string always
uses scale=6,
+// the output format is the same regardless of the serde's own scale:
+// scale=0: output is still "YYYY-MM-DD HH:MM:SS.000000" (fractional part
always present)
+// scale=3: fractional part always present, 6 digits
+// scale=6: fractional part always present, 6 digits
+//
+// from_zonemap_string uses from_date_format_str("%Y-%m-%d %H:%i:%s.%f").
+// ---------------------------------------------------------------------------
+TEST_F(OlapTypeTest, ser_deser_datetimev2_no_microsecond) {
+ // Test with scale=0 serde, but to_olap_string always uses scale=6:
+ // fractional part is always written even when microsecond=0.
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ TYPE_DATETIMEV2, /*is_nullable=*/false, /*precision=*/0,
/*scale=*/0);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day, hour, minute, second;
+ uint32_t microsecond;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // No microseconds → fractional part is still written as .000000
(scale=6 always)
+ {2023, 6, 15, 14, 30, 59, 0, "2023-06-15 14:30:59.000000"},
+ {2000, 1, 1, 0, 0, 0, 0, "2000-01-01 00:00:00.000000"},
+ {9999, 12, 31, 23, 59, 59, 0, "9999-12-31 23:59:59.000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ DateV2Value<DateTimeV2ValueType> dt_val;
+ dt_val.unchecked_set_time(tc.year, tc.month, tc.day, tc.hour,
tc.minute, tc.second,
+ tc.microsecond);
+ auto field = Field::create_field<TYPE_DATETIMEV2>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateTimeV2(scale=0) to_olap_string failed for " <<
tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIMEV2>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ EXPECT_EQ(restored_val.hour(), tc.hour);
+ EXPECT_EQ(restored_val.minute(), tc.minute);
+ EXPECT_EQ(restored_val.second(), tc.second);
+ }
+}
+
+TEST_F(OlapTypeTest, ser_deser_datetimev2_with_microsecond) {
+ // Test with scale=6 (full microsecond precision)
+ // to_olap_string always uses scale=6: fractional part is always written
with 6 digits.
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ TYPE_DATETIMEV2, /*is_nullable=*/false, /*precision=*/0,
/*scale=*/6);
+ auto serde = data_type_ptr->get_serde();
+
+ struct TestCase {
+ int year, month, day, hour, minute, second;
+ uint32_t microsecond;
+ std::string expected_str;
+ };
+ std::vector<TestCase> test_cases = {
+ // microsecond=123456 → ".123456"
+ {2023, 6, 15, 14, 30, 59, 123456, "2023-06-15 14:30:59.123456"},
+ // microsecond=1 → ".000001"
+ {2023, 1, 1, 0, 0, 0, 1, "2023-01-01 00:00:00.000001"},
+ // microsecond=999999 → ".999999"
+ {9999, 12, 31, 23, 59, 59, 999999, "9999-12-31 23:59:59.999999"},
+ // microsecond=100000 → ".100000"
+ {2023, 3, 15, 12, 0, 0, 100000, "2023-03-15 12:00:00.100000"},
+ // microsecond=0 → fractional part is still written as .000000
(scale=6 always)
+ {2023, 3, 15, 12, 0, 0, 0, "2023-03-15 12:00:00.000000"},
+ };
+
+ for (const auto& tc : test_cases) {
+ DateV2Value<DateTimeV2ValueType> dt_val;
+ dt_val.unchecked_set_time(tc.year, tc.month, tc.day, tc.hour,
tc.minute, tc.second,
+ tc.microsecond);
+ auto field = Field::create_field<TYPE_DATETIMEV2>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, tc.expected_str)
+ << "DateTimeV2(scale=6) to_olap_string failed for " <<
tc.expected_str;
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIMEV2>();
+ EXPECT_EQ(restored_val.year(), tc.year);
+ EXPECT_EQ(restored_val.month(), tc.month);
+ EXPECT_EQ(restored_val.day(), tc.day);
+ EXPECT_EQ(restored_val.hour(), tc.hour);
+ EXPECT_EQ(restored_val.minute(), tc.minute);
+ EXPECT_EQ(restored_val.second(), tc.second);
+ EXPECT_EQ(restored_val.microsecond(), tc.microsecond);
+ }
+}
+
+TEST_F(OlapTypeTest, ser_deser_datetimev2_scale3) {
+ // Test with scale=3 (millisecond precision)
+ // to_olap_string always uses scale=6: fractional part is always written
with 6 digits.
+ // The data type has scale=3, but to_olap_string ignores this and always
uses scale=6
+ // because historically Field type for DateTimeV2 always stores 6-digit
precision.
+ // from_zonemap_string should still parse back the full microsecond value
stored in the field.
+ auto data_type_ptr = DataTypeFactory::instance().create_data_type(
+ TYPE_DATETIMEV2, /*is_nullable=*/false, /*precision=*/0,
/*scale=*/3);
+ auto serde = data_type_ptr->get_serde();
+
+ {
+ // 123000 microseconds (= 123 milliseconds)
+ // to_olap_string outputs full 6+digit microsecond: ".123000"
+ DateV2Value<DateTimeV2ValueType> dt_val;
+ dt_val.unchecked_set_time(2023, 6, 15, 14, 30, 59, 123000);
+ auto field = Field::create_field<TYPE_DATETIMEV2>(dt_val);
+ auto result_str = serde->to_olap_string(field);
+ EXPECT_EQ(result_str, "2023-06-15 14:30:59.123000")
+ << "DateTimeV2(scale=3) to_olap_string failed";
+
+ // Round-trip
+ Field restored_field;
+ auto status = serde->from_zonemap_string(result_str, restored_field);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ auto restored_val = restored_field.get<TYPE_DATETIMEV2>();
+ EXPECT_EQ(restored_val.year(), 2023);
+ EXPECT_EQ(restored_val.month(), 6);
+ EXPECT_EQ(restored_val.day(), 15);
+ EXPECT_EQ(restored_val.hour(), 14);
+ EXPECT_EQ(restored_val.minute(), 30);
+ EXPECT_EQ(restored_val.second(), 59);
+ EXPECT_EQ(restored_val.microsecond(), 123000);
+ }
+}
+
+TEST_F(OlapTypeTest, char_type_with_padding) {
+ auto data_type =
+
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_CHAR,
0, 0, 20);
+ auto serde = data_type->get_serde();
+
+ {
+ char buf[20];
+ memset(buf, 0, sizeof(buf));
+ memcpy(buf, "hello", 5);
+ Slice olap_value(buf, 20);
+
+ std::string expected("hello", 5);
+ expected.append(15, '\0');
+ std::string expected_serde = expected;
+
+ auto field = Field::create_field_from_olap_value<TYPE_CHAR>(
+ StringRef(olap_value.data, olap_value.size));
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(expected_serde, serde_str) << "serde mismatch for CHAR(20)
'hello'"
+ << "\n expected len=" <<
expected_serde.size()
+ << "\n actual len=" <<
serde_str.size();
+ }
+
+ {
+ char buf[20];
+ memset(buf, 'x', 20);
+ Slice olap_value(buf, 20);
+
+ std::string expected(20, 'x');
+ std::string expected_serde = expected;
+
+ auto field = Field::create_field_from_olap_value<TYPE_CHAR>(
+ StringRef(olap_value.data, olap_value.size));
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(expected_serde, serde_str) << "serde mismatch for CHAR(20)
filled 'x'";
+ }
+
+ {
+ char buf[20];
+ memset(buf, 0, 20);
+ Slice olap_value(buf, 20);
+
+ std::string expected(20, '\0');
+ std::string expected_serde = expected;
+
+ auto field = Field::create_field_from_olap_value<TYPE_CHAR>(
+ StringRef(olap_value.data, olap_value.size));
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(expected_serde, serde_str) << "serde mismatch for CHAR(20)
empty"
+ << "\n expected len=" <<
expected_serde.size()
+ << "\n actual len=" <<
serde_str.size();
+ }
+}
+
+TEST_F(OlapTypeTest, varchar_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_VARCHAR, 0, 0, 100);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ std::string input;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {"hello world", "hello world", "hello world"},
+ {"", "", ""},
+ };
+
+ for (auto& tc : test_cases) {
+ Slice olap_value(tc.input.data(), tc.input.size());
+
+ auto field = Field::create_field_from_olap_value<TYPE_VARCHAR>(
+ StringRef(olap_value.data, olap_value.size));
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for VARCHAR '" << tc.input << "'";
+ }
+}
+
+TEST_F(OlapTypeTest, date_v1_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_DATE,
false);
+ auto serde = data_type->get_serde();
+
+ auto make_olap_date = [](int year, int mon, int day) -> uint24_t {
+ return uint24_t(year * 16 * 32 + mon * 32 + day);
+ };
+
+ struct TestCase {
+ int year, month, day;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 6, 15, "2023-06-15", "2023-06-15"},
+ {2000, 1, 1, "2000-01-01", "2000-01-01"},
+ {9999, 12, 31, "9999-12-31", "9999-12-31"},
+ {1, 1, 1, "0001-01-01", "0001-01-01"},
+ };
+
+ for (auto& tc : test_cases) {
+ uint24_t olap_value = make_olap_date(tc.year, tc.month, tc.day);
+
+ auto field =
Field::create_field_from_olap_value<TYPE_DATE>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for DATE "
<< tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, datetime_v1_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_DATETIME, false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int64_t olap_value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {20230615120000L, "2023-06-15 12:00:00", "2023-06-15 12:00:00"},
+ {20000101000000L, "2000-01-01 00:00:00", "2000-01-01 00:00:00"},
+ {99991231235959L, "9999-12-31 23:59:59", "9999-12-31 23:59:59"},
+ {20230615123456L, "2023-06-15 12:34:56", "2023-06-15 12:34:56"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field =
Field::create_field_from_olap_value<TYPE_DATETIME>((uint64_t)tc.olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for
DATETIME " << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, datev2_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_DATEV2,
false);
+ auto serde = data_type->get_serde();
+
+ auto make_datev2 = [](int year, int month, int day) -> uint32_t {
+ return (year << 9) | (month << 5) | day;
+ };
+
+ struct TestCase {
+ int year, month, day;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {2023, 6, 15, "2023-06-15", "2023-06-15"},
+ {2000, 1, 1, "2000-01-01", "2000-01-01"},
+ {9999, 12, 31, "9999-12-31", "9999-12-31"},
+ {1, 1, 1, "0001-01-01", "0001-01-01"},
+ };
+
+ for (auto& tc : test_cases) {
+ uint32_t olap_value = make_datev2(tc.year, tc.month, tc.day);
+
+ auto field =
Field::create_field_from_olap_value<TYPE_DATEV2>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for DATEV2
" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, datetimev2_type) {
+ auto make_datetimev2 = [](int year, int month, int day, int hour, int
minute, int second,
+ int microsecond) -> uint64_t {
+ return ((uint64_t)year << 46) | ((uint64_t)month << 42) |
((uint64_t)day << 37) |
+ ((uint64_t)hour << 32) | ((uint64_t)minute << 26) |
((uint64_t)second << 20) |
+ (uint64_t)microsecond;
+ };
+
+ struct TestCase {
+ uint64_t olap_value;
+ std::string expected;
+ std::string expected_serde;
+ std::string desc;
+ };
+ std::vector<TestCase> test_cases = {
+ {make_datetimev2(2023, 6, 15, 12, 34, 56, 123456), "2023-06-15
12:34:56.123456",
+ "2023-06-15 12:34:56.123456", "non-zero microseconds"},
+ {make_datetimev2(2023, 6, 15, 12, 34, 56, 0), "2023-06-15
12:34:56.000000",
+ "2023-06-15 12:34:56", "zero microseconds"},
+ {make_datetimev2(2023, 1, 1, 0, 0, 0, 123000), "2023-01-01
00:00:00.123000",
+ "2023-01-01 00:00:00.123000", "trailing zeros in microseconds"},
+ {make_datetimev2(2000, 1, 1, 0, 0, 0, 0), "2000-01-01
00:00:00.000000",
+ "2000-01-01 00:00:00", "epoch zero microseconds"},
+ {make_datetimev2(2023, 6, 15, 12, 34, 56, 1), "2023-06-15
12:34:56.000001",
+ "2023-06-15 12:34:56.000001", "1 microsecond"},
+ {make_datetimev2(9999, 12, 31, 23, 59, 59, 999999), "9999-12-31
23:59:59.999999",
+ "9999-12-31 23:59:59.999999", "max datetime"},
+ };
+
+ for (int scale = 0; scale <= 6; ++scale) {
+ auto data_type =
+ DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2,
false, 0, scale);
+ auto serde = data_type->get_serde();
+
+ for (auto& tc : test_cases) {
+ auto field =
Field::create_field_from_olap_value<TYPE_DATETIMEV2>(tc.olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected, serde_str)
+ << "serde mismatch for DATETIMEV2 scale=" << scale << ": "
<< tc.desc
+ << "\n expected: " << tc.expected << "\n serde: " <<
serde_str;
+ }
+ }
+}
+
+TEST_F(OlapTypeTest, datetime_v1_vs_v2_precision_difference) {
+ {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_DATETIME, false);
+ auto serde = data_type->get_serde();
+
+ int64_t olap_value = 20230615123456L;
+ std::string expected = "2023-06-15 12:34:56";
+ std::string expected_serde = expected;
+ auto field =
Field::create_field_from_olap_value<TYPE_DATETIME>((uint64_t)olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(expected_serde, serde_str) << "serde mismatch for DATETIME
V1";
+ EXPECT_EQ(expected.find('.'), std::string::npos)
+ << "DATETIME V1 should NOT have fractional seconds";
+ }
+
+ {
+ auto make_datetimev2 = [](int year, int month, int day, int hour, int
minute, int second,
+ int microsecond) -> uint64_t {
+ return ((uint64_t)year << 46) | ((uint64_t)month << 42) |
((uint64_t)day << 37) |
+ ((uint64_t)hour << 32) | ((uint64_t)minute << 26) |
((uint64_t)second << 20) |
+ (uint64_t)microsecond;
+ };
+
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2, false, 0, 6);
+ auto serde = data_type->get_serde();
+
+ uint64_t olap_value = make_datetimev2(2023, 6, 15, 12, 34, 56, 123456);
+ std::string expected = "2023-06-15 12:34:56.123456";
+ std::string expected_serde = expected;
+ auto field =
Field::create_field_from_olap_value<TYPE_DATETIMEV2>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(expected_serde, serde_str) << "serde mismatch for
DATETIMEV2";
+ EXPECT_NE(expected.find('.'), std::string::npos)
+ << "DATETIMEV2 should have fractional seconds";
+ }
+}
+
+TEST_F(OlapTypeTest, decimalv2_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_DECIMALV2, false, 27, 9);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int64_t integer;
+ int32_t fraction;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, 0, "0.000000000", "0.000000000"},
+ {1, 0, "1.000000000", "1.000000000"},
+ {0, 100000000, "0.100000000", "0.100000000"},
+ {123, 456000000, "123.456000000", "123.456000000"},
+ {-123, -456000000, "-123.456000000", "-123.456000000"},
+ {999999999999999999L, 999999999, "999999999999999999.999999999",
+ "999999999999999999.999999999"},
+ {-999999999999999999L, -999999999, "-999999999999999999.999999999",
+ "-999999999999999999.999999999"},
+ {1, 1, "1.000000001", "1.000000001"},
+ {1, 10, "1.000000010", "1.000000010"},
+ {1, 100, "1.000000100", "1.000000100"},
+ {1, 1000, "1.000001000", "1.000001000"},
+ {1, 10000, "1.000010000", "1.000010000"},
+ {1, 100000, "1.000100000", "1.000100000"},
+ {1, 1000000, "1.001000000", "1.001000000"},
+ {1, 10000000, "1.010000000", "1.010000000"},
+ {1, 100000000, "1.100000000", "1.100000000"},
+ {0, 123456789, "0.123456789", "0.123456789"},
+ {42, 500000000, "42.500000000", "42.500000000"},
+ };
+
+ for (auto& tc : test_cases) {
+ decimal12_t olap_value;
+ olap_value.integer = tc.integer;
+ olap_value.fraction = tc.fraction;
+
+ auto field =
Field::create_field_from_olap_value<TYPE_DECIMALV2>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for DECIMALV2 (" << tc.integer << ", " <<
tc.fraction << ")";
+ }
+}
+
+TEST_F(OlapTypeTest, decimal32_type) {
+ struct TestCase {
+ int32_t value;
+ int precision;
+ int scale;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, 9, 0, "0", "0"},
+ {12345, 9, 0, "12345", "12345"},
+ {12345, 9, 2, "12345", "12345"},
+ {12345, 9, 4, "12345", "12345"},
+ {-12345, 9, 2, "-12345", "-12345"},
+ {1, 9, 9, "1", "1"},
+ {999999999, 9, 0, "999999999", "999999999"},
+ {-999999999, 9, 0, "-999999999", "-999999999"},
+ {100000000, 9, 9, "100000000", "100000000"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto data_type = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL32, tc.precision, tc.scale);
+ auto serde = data_type->get_serde();
+
+ int32_t olap_value = tc.value;
+ auto field =
Field::create_field_from_olap_value<TYPE_DECIMAL32>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for DECIMAL32 value=" << tc.value;
+ }
+}
+
+TEST_F(OlapTypeTest, decimal64_type) {
+ struct TestCase {
+ int64_t value;
+ int precision;
+ int scale;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, 18, 0, "0", "0"},
+ {123456789012345678L, 18, 0, "123456789012345678",
"123456789012345678"},
+ {123456789012345678L, 18, 6, "123456789012345678",
"123456789012345678"},
+ {-123456789012345678L, 18, 6, "-123456789012345678",
"-123456789012345678"},
+ {1, 18, 18, "1", "1"},
+ {100000, 18, 5, "100000", "100000"},
+ {1000000000000L, 18, 6, "1000000000000", "1000000000000"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto data_type = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL64, tc.precision, tc.scale);
+ auto serde = data_type->get_serde();
+
+ int64_t olap_value = tc.value;
+ auto field =
Field::create_field_from_olap_value<TYPE_DECIMAL64>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for DECIMAL64 value=" << tc.value;
+ }
+}
+
+TEST_F(OlapTypeTest, decimal128i_type) {
+ struct TestCase {
+ int128_t value;
+ int precision;
+ int scale;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, 38, 0, "0", "0"},
+ {123456789, 38, 0, "123456789", "123456789"},
+ {123456789, 38, 6, "123456789", "123456789"},
+ {-123456789, 38, 6, "-123456789", "-123456789"},
+ {1, 38, 38, "1", "1"},
+ {(int128_t)999999999999999999L * 1000000000L + 999999999, 38, 9,
+ "999999999999999999999999999", "999999999999999999999999999"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto data_type = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL128I, tc.precision,
tc.scale);
+ auto serde = data_type->get_serde();
+
+ int128_t olap_value = tc.value;
+ auto field =
Field::create_field_from_olap_value<TYPE_DECIMAL128I>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for DECIMAL128I expected=" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, decimal256_type) {
+ struct TestCase {
+ wide::Int256 value;
+ int precision;
+ int scale;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {wide::Int256(0), 76, 0, "0", "0"},
+ {wide::Int256(123456789), 76, 0, "123456789", "123456789"},
+ {wide::Int256(123456789), 76, 6, "123456789", "123456789"},
+ {wide::Int256(-123456789), 76, 6, "-123456789", "-123456789"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto data_type = DataTypeFactory::instance().create_data_type(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL256, tc.precision, tc.scale);
+ auto serde = data_type->get_serde();
+
+ wide::Int256 olap_value = tc.value;
+ auto field =
Field::create_field_from_olap_value<TYPE_DECIMAL256>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for DECIMAL256 expected=" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, float_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_FLOAT,
false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ float value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0.0f, "0", "0"},
+ {-0.0f, "-0", "-0"},
+ {1.0f, "1", "1"},
+ {-1.0f, "-1", "-1"},
+ {0.5f, "0.5", "0.5"},
+ {1.5f, "1.5", "1.5"},
+ {0.25f, "0.25", "0.25"},
+ {100.0f, "100", "100"},
+ {0.001f, "0.001", "0.001"},
+ {std::numeric_limits<float>::quiet_NaN(), "NaN", "NaN"},
+ {std::numeric_limits<float>::infinity(), "Infinity", "Infinity"},
+ {-std::numeric_limits<float>::infinity(), "-Infinity",
"-Infinity"},
+ {std::numeric_limits<float>::max(), "3.402823e+38",
"3.402823e+38"},
+ {std::numeric_limits<float>::lowest(), "-3.402823e+38",
"-3.402823e+38"},
+ {std::numeric_limits<float>::min(), "1.175494e-38",
"1.175494e-38"},
+ {std::numeric_limits<float>::denorm_min(), "1.401298e-45",
"1.401298e-45"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_FLOAT>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for FLOAT expected='" << tc.expected << "'";
+ }
+}
+
+TEST_F(OlapTypeTest, double_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_DOUBLE,
false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ double value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0.0, "0", "0"},
+ {-0.0, "-0", "-0"},
+ {1.0, "1", "1"},
+ {-1.0, "-1", "-1"},
+ {0.5, "0.5", "0.5"},
+ {1.5, "1.5", "1.5"},
+ {0.25, "0.25", "0.25"},
+ {100.0, "100", "100"},
+ {3.141592653589793, "3.141592653589793", "3.141592653589793"},
+ {0.001, "0.001", "0.001"},
+ {std::numeric_limits<double>::quiet_NaN(), "NaN", "NaN"},
+ {std::numeric_limits<double>::infinity(), "Infinity", "Infinity"},
+ {-std::numeric_limits<double>::infinity(), "-Infinity",
"-Infinity"},
+ {std::numeric_limits<double>::max(), "1.797693134862316e+308",
+ "1.797693134862316e+308"},
+ {std::numeric_limits<double>::lowest(), "-1.797693134862316e+308",
+ "-1.797693134862316e+308"},
+ {std::numeric_limits<double>::min(), "2.225073858507201e-308",
+ "2.225073858507201e-308"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_DOUBLE>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for DOUBLE expected='" << tc.expected <<
"'";
+ }
+}
+
+TEST_F(OlapTypeTest, bool_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_BOOLEAN, false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ uint8_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, "0", "0"},
+ {1, "1", "1"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_BOOLEAN>((bool)tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for BOOL="
<< (int)tc.value;
+ }
+}
+
+TEST_F(OlapTypeTest, tinyint_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_TINYINT, false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int8_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, "0", "0"}, {1, "1", "1"}, {-1, "-1", "-1"},
+ {127, "127", "127"}, {-128, "-128", "-128"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_TINYINT>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for
TINYINT=" << (int)tc.value;
+ }
+}
+
+TEST_F(OlapTypeTest, smallint_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_SMALLINT, false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int16_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, "0", "0"},
+ {1, "1", "1"},
+ {-1, "-1", "-1"},
+ {32767, "32767", "32767"},
+ {-32768, "-32768", "-32768"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_SMALLINT>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for
SMALLINT=" << tc.value;
+ }
+}
+
+TEST_F(OlapTypeTest, int_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_INT,
false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int32_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, "0", "0"},
+ {1, "1", "1"},
+ {-1, "-1", "-1"},
+ {2147483647, "2147483647", "2147483647"},
+ {-2147483648, "-2147483648", "-2147483648"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_INT>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str) << "serde mismatch for INT="
<< tc.value;
+ }
+}
+
+TEST_F(OlapTypeTest, bigint_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_BIGINT,
false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int64_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, "0", "0"},
+ {1, "1", "1"},
+ {-1, "-1", "-1"},
+ {9223372036854775807L, "9223372036854775807",
"9223372036854775807"},
+ {-9223372036854775807L - 1, "-9223372036854775808",
"-9223372036854775808"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_BIGINT>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for BIGINT expected=" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, largeint_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_LARGEINT, false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ int128_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {(int128_t)0, "0", "0"},
+ {(int128_t)1, "1", "1"},
+ {(int128_t)-1, "-1", "-1"},
+ {(int128_t)9223372036854775807L, "9223372036854775807",
"9223372036854775807"},
+ {(int128_t)(-9223372036854775807L - 1), "-9223372036854775808",
"-9223372036854775808"},
+ {~((int128_t)(1) << 127),
"170141183460469231731687303715884105727",
+ "170141183460469231731687303715884105727"},
+ {(int128_t)(1) << 127, "-170141183460469231731687303715884105728",
+ "-170141183460469231731687303715884105728"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_LARGEINT>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for LARGEINT expected=" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, ipv4_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_IPV4,
false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ uint32_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {0, "0.0.0.0", "0.0.0.0"},
+ {0xFFFFFFFF, "255.255.255.255", "255.255.255.255"},
+ {0x7F000001, "127.0.0.1", "127.0.0.1"},
+ {0xC0A80001, "192.168.0.1", "192.168.0.1"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field = Field::create_field<TYPE_IPV4>(tc.value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for IPV4 expected=" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, ipv6_type) {
+ auto data_type = DataTypeFactory::instance().create_data_type(TYPE_IPV6,
false);
+ auto serde = data_type->get_serde();
+
+ struct TestCase {
+ uint128_t value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {(uint128_t)0, "::", "::"},
+ {(uint128_t)1, "::1", "::1"},
+ {(uint128_t)(-1), "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"},
+ };
+
+ for (auto& tc : test_cases) {
+ uint128_t olap_value = tc.value;
+ auto field = Field::create_field<TYPE_IPV6>(olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for IPV6 expected=" << tc.expected;
+ }
+}
+
+TEST_F(OlapTypeTest, timestamptz_type) {
+ auto data_type =
DataTypeFactory::instance().create_data_type(TYPE_TIMESTAMPTZ, false, 0, 6);
+ auto serde = data_type->get_serde();
+
+ auto make_datetimev2 = [](int year, int month, int day, int hour, int
minute, int second,
+ int microsecond) -> uint64_t {
+ return ((uint64_t)year << 46) | ((uint64_t)month << 42) |
((uint64_t)day << 37) |
+ ((uint64_t)hour << 32) | ((uint64_t)minute << 26) |
((uint64_t)second << 20) |
+ (uint64_t)microsecond;
+ };
+
+ struct TestCase {
+ uint64_t olap_value;
+ std::string expected;
+ std::string expected_serde;
+ };
+ std::vector<TestCase> test_cases = {
+ {make_datetimev2(2023, 6, 15, 12, 34, 56, 123456), "2023-06-15
12:34:56.123456+00:00",
+ "2023-06-15 12:34:56.123456+00:00"},
+ {make_datetimev2(2023, 6, 15, 12, 34, 56, 0), "2023-06-15
12:34:56.000000+00:00",
+ "2023-06-15 12:34:56.000000+00:00"},
+ {make_datetimev2(2000, 1, 1, 0, 0, 0, 0), "2000-01-01
00:00:00.000000+00:00",
+ "2000-01-01 00:00:00.000000+00:00"},
+ };
+
+ for (auto& tc : test_cases) {
+ auto field =
Field::create_field_from_olap_value<TYPE_TIMESTAMPTZ>(tc.olap_value);
+ std::string serde_str = serde->to_olap_string(field);
+
+ EXPECT_EQ(tc.expected_serde, serde_str)
+ << "serde mismatch for TIMESTAMPTZ expected=" << tc.expected;
+ }
+}
} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]