This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 32b7d910992 [refine](from string) remove from string function in
datatype (#54375)
32b7d910992 is described below
commit 32b7d9109923226d136a71b0f47614ed41e518e7
Author: Mryange <[email protected]>
AuthorDate: Fri Aug 8 11:23:56 2025 +0800
[refine](from string) remove from string function in datatype (#54375)
We should use the from string in DataTypeSerDe instead of DataType.
---
be/src/olap/base_tablet.cpp | 7 +-
.../rowset/segment_v2/vertical_segment_writer.cpp | 6 +-
be/src/vec/data_types/data_type.cpp | 6 -
be/src/vec/data_types/data_type.h | 10 +-
be/src/vec/data_types/data_type_array.cpp | 131 ------------------
be/src/vec/data_types/data_type_array.h | 1 -
be/src/vec/data_types/data_type_bitmap.cpp | 12 --
be/src/vec/data_types/data_type_bitmap.h | 1 -
be/src/vec/data_types/data_type_date.cpp | 11 --
be/src/vec/data_types/data_type_date.h | 2 -
.../data_types/data_type_date_or_datetime_v2.cpp | 22 ---
.../vec/data_types/data_type_date_or_datetime_v2.h | 2 -
be/src/vec/data_types/data_type_date_time.cpp | 11 --
be/src/vec/data_types/data_type_date_time.h | 2 -
be/src/vec/data_types/data_type_decimal.cpp | 17 ---
be/src/vec/data_types/data_type_decimal.h | 1 -
be/src/vec/data_types/data_type_hll.cpp | 13 --
be/src/vec/data_types/data_type_hll.h | 1 -
be/src/vec/data_types/data_type_ipv4.cpp | 11 --
be/src/vec/data_types/data_type_ipv4.h | 1 -
be/src/vec/data_types/data_type_ipv6.cpp | 11 --
be/src/vec/data_types/data_type_ipv6.h | 1 -
be/src/vec/data_types/data_type_jsonb.cpp | 11 --
be/src/vec/data_types/data_type_jsonb.h | 1 -
be/src/vec/data_types/data_type_map.cpp | 150 ---------------------
be/src/vec/data_types/data_type_map.h | 1 -
be/src/vec/data_types/data_type_nullable.cpp | 18 ---
be/src/vec/data_types/data_type_nullable.h | 1 -
be/src/vec/data_types/data_type_number_base.cpp | 34 -----
be/src/vec/data_types/data_type_number_base.h | 1 -
be/src/vec/data_types/data_type_string.cpp | 6 -
be/src/vec/data_types/data_type_string.h | 1 -
be/src/vec/data_types/data_type_struct.cpp | 119 ----------------
be/src/vec/data_types/data_type_struct.h | 2 -
be/src/vec/data_types/serde/data_type_serde.cpp | 10 ++
be/src/vec/data_types/serde/data_type_serde.h | 2 +
be/src/vec/functions/cast/cast_to_jsonb.h | 11 +-
.../data_types/serde/data_type_serde_text_test.cpp | 45 +++----
38 files changed, 51 insertions(+), 642 deletions(-)
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index 6feb02a4b09..de4ad4a94b4 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -989,10 +989,9 @@ Status BaseTablet::generate_default_value_block(const
TabletSchema& schema,
const auto& column = schema.column(cids[i]);
if (column.has_default_value()) {
const auto& default_value = default_values[i];
- vectorized::ReadBuffer rb(const_cast<char*>(default_value.c_str()),
- default_value.size());
- RETURN_IF_ERROR(ref_block.get_by_position(i).type->from_string(
- rb, mutable_default_value_columns[i].get()));
+ StringRef str(default_value);
+
RETURN_IF_ERROR(ref_block.get_by_position(i).type->get_serde()->default_from_string(
+ str, *mutable_default_value_columns[i]));
}
}
default_value_block.set_columns(std::move(mutable_default_value_columns));
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 1affbc148f6..6f597a37805 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -802,9 +802,9 @@ Status
VerticalSegmentWriter::_generate_encoded_default_seq_value(const TabletSc
if (seq_column.has_default_value()) {
auto idx = tablet_schema.sequence_col_idx() -
tablet_schema.num_key_columns();
const auto& default_value = info.default_values[idx];
- vectorized::ReadBuffer rb(const_cast<char*>(default_value.c_str()),
default_value.size());
- RETURN_IF_ERROR(block.get_by_position(0).type->from_string(
- rb, block.get_by_position(0).column->assume_mutable().get()));
+ StringRef str {default_value};
+
RETURN_IF_ERROR(block.get_by_position(0).type->get_serde()->default_from_string(
+ str,
*block.get_by_position(0).column->assume_mutable().get()));
} else {
block.get_by_position(0).column->assume_mutable()->insert_default();
diff --git a/be/src/vec/data_types/data_type.cpp
b/be/src/vec/data_types/data_type.cpp
index 7df61d9a9f9..5cb761c7036 100644
--- a/be/src/vec/data_types/data_type.cpp
+++ b/be/src/vec/data_types/data_type.cpp
@@ -82,12 +82,6 @@ std::string IDataType::to_string(const IColumn& column,
size_t row_num) const {
"Data type {} to_string not implement.",
get_name());
return "";
}
-Status IDataType::from_string(ReadBuffer& rb, IColumn* column) const {
- throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
- "Data type {} from_string not implement.",
get_name());
-
- return Status::OK();
-}
void IDataType::to_string_batch(const IColumn& column, ColumnString&
column_to) const {
const auto size = column.size();
diff --git a/be/src/vec/data_types/data_type.h
b/be/src/vec/data_types/data_type.h
index 5cb07bbe4a1..891db5a6f3e 100644
--- a/be/src/vec/data_types/data_type.h
+++ b/be/src/vec/data_types/data_type.h
@@ -42,6 +42,7 @@
#include "vec/common/cow.h"
#include "vec/core/types.h"
#include "vec/data_types/serde/data_type_serde.h"
+#include "vec/io/reader_buffer.h"
namespace doris {
class PColumnMeta;
@@ -94,9 +95,6 @@ public:
virtual std::string to_string(const IColumn& column, size_t row_num) const;
virtual void to_string_batch(const IColumn& column, ColumnString&
column_to) const;
- // only for compound type now.
- virtual Status from_string(ReadBuffer& rb, IColumn* column) const;
-
// get specific serializer or deserializer
virtual DataTypeSerDeSPtr get_serde(int nesting_level = 1) const = 0;
@@ -228,6 +226,12 @@ public:
to_protobuf(ptype, node, scalar_type);
}
#ifdef BE_TEST
+ // only used in beut
+ Status from_string(ReadBuffer& rb, IColumn* column) const {
+ StringRef str = {rb.position(), rb.count()};
+ return get_serde()->default_from_string(str, *column);
+ }
+
TTypeDesc to_thrift() const {
TTypeDesc thrift_type;
to_thrift(thrift_type);
diff --git a/be/src/vec/data_types/data_type_array.cpp
b/be/src/vec/data_types/data_type_array.cpp
index 25d308dc1c7..eebd8b5147a 100644
--- a/be/src/vec/data_types/data_type_array.cpp
+++ b/be/src/vec/data_types/data_type_array.cpp
@@ -233,137 +233,6 @@ std::string DataTypeArray::to_string(const IColumn&
column, size_t row_num) cons
return str;
}
-bool next_element_from_string(ReadBuffer& rb, StringRef& output, bool&
has_quota) {
- StringRef element(rb.position(), 0);
- has_quota = false;
- if (rb.eof()) {
- return false;
- }
-
- // ltrim
- while (!rb.eof() && isspace(*rb.position())) {
- ++rb.position();
- element.data = rb.position();
- }
-
- // parse string
- if (*rb.position() == '"' || *rb.position() == '\'') {
- const char str_sep = *rb.position();
- size_t str_len = 1;
- // search until next '"' or '\''
- while (str_len < rb.count() && *(rb.position() + str_len) != str_sep) {
- ++str_len;
- }
- // invalid string
- if (str_len >= rb.count()) {
- rb.position() = rb.end();
- return false;
- }
- has_quota = true;
- rb.position() += str_len + 1;
- element.size += str_len + 1;
- }
-
- // parse array element until array separator ',' or end ']'
- while (!rb.eof() && (*rb.position() != ',') && (rb.count() != 1 ||
*rb.position() != ']')) {
- // invalid elements such as ["123" 456,"789" 777]
- // correct elements such as ["123" ,"789" ]
- if (has_quota && !isspace(*rb.position())) {
- return false;
- }
- ++rb.position();
- ++element.size;
- }
- // invalid array element
- if (rb.eof()) {
- return false;
- }
- // adjust read buffer position to first char of next array element
- ++rb.position();
-
- // rtrim
- while (element.size > 0 && isspace(element.data[element.size - 1])) {
- --element.size;
- }
-
- // trim '"' and '\'' for string
- if (element.size >= 2 && (element.data[0] == '"' || element.data[0] ==
'\'') &&
- element.data[0] == element.data[element.size - 1]) {
- ++element.data;
- element.size -= 2;
- }
- output = element;
- return true;
-}
-
-Status DataTypeArray::from_string(ReadBuffer& rb, IColumn* column) const {
- DCHECK(!rb.eof());
- // only support one level now
- auto* array_column = assert_cast<ColumnArray*>(column);
- auto& offsets = array_column->get_offsets();
-
- IColumn& nested_column = array_column->get_data();
- DCHECK(nested_column.is_nullable());
- if (*rb.position() != '[') {
- return Status::InvalidArgument("Array does not start with '['
character, found '{}'",
- *rb.position());
- }
- if (*(rb.end() - 1) != ']') {
- return Status::InvalidArgument("Array does not end with ']' character,
found '{}'",
- *(rb.end() - 1));
- }
- // empty array []
- if (rb.count() == 2) {
- offsets.push_back(offsets.back());
- return Status::OK();
- }
- ++rb.position();
-
- size_t element_num = 0;
- // parse array element until end of array
- while (!rb.eof()) {
- StringRef element(rb.position(), rb.count());
- bool has_quota = false;
- if (!next_element_from_string(rb, element, has_quota)) {
- // we should do array element column revert if error
- nested_column.pop_back(element_num);
- return Status::InvalidArgument("Cannot read array element from
text '{}'",
- element.to_string());
- }
-
- // handle empty element
- if (element.size == 0) {
- auto& nested_null_col =
reinterpret_cast<ColumnNullable&>(nested_column);
- nested_null_col.get_nested_column().insert_default();
- nested_null_col.get_null_map_data().push_back(0);
- ++element_num;
- continue;
- }
-
- // handle null element, need to distinguish null and "null"
- if (!has_quota && element.size == 4 && strncmp(element.data, "null",
4) == 0) {
- // insert null
- auto& nested_null_col =
reinterpret_cast<ColumnNullable&>(nested_column);
- nested_null_col.get_nested_column().insert_default();
- nested_null_col.get_null_map_data().push_back(1);
- ++element_num;
- continue;
- }
-
- // handle normal element
- ReadBuffer read_buffer(const_cast<char*>(element.data), element.size);
- auto st = nested->from_string(read_buffer, &nested_column);
- if (!st.ok()) {
- // we should do array element column revert if error
- nested_column.pop_back(element_num);
- return st;
- }
- ++element_num;
- }
- offsets.push_back(offsets.back() + element_num);
- return Status::OK();
-}
-
FieldWithDataType DataTypeArray::get_field_with_data_type(const IColumn&
column,
size_t row_num)
const {
const auto& array_column = assert_cast<const ColumnArray&>(column);
diff --git a/be/src/vec/data_types/data_type_array.h
b/be/src/vec/data_types/data_type_array.h
index 290767d3cf2..7072183144c 100644
--- a/be/src/vec/data_types/data_type_array.h
+++ b/be/src/vec/data_types/data_type_array.h
@@ -107,7 +107,6 @@ public:
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
using SerDeType = DataTypeArraySerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
diff --git a/be/src/vec/data_types/data_type_bitmap.cpp
b/be/src/vec/data_types/data_type_bitmap.cpp
index 60af8506066..b35408261ee 100644
--- a/be/src/vec/data_types/data_type_bitmap.cpp
+++ b/be/src/vec/data_types/data_type_bitmap.cpp
@@ -186,16 +186,4 @@ void DataTypeBitMap::to_string(const IColumn& column,
size_t row_num, BufferWrit
data.write_to(const_cast<char*>(buffer.data()));
ostr.write(buffer.c_str(), buffer.size());
}
-
-Status DataTypeBitMap::from_string(ReadBuffer& rb, IColumn* column) const {
- auto& data_column = assert_cast<ColumnBitmap&>(*column);
- auto& data = data_column.get_data();
-
- BitmapValue value;
- if (!value.deserialize(rb.to_string().c_str())) {
- return Status::InternalError("deserialize BITMAP from string fail!");
- }
- data.push_back(std::move(value));
- return Status::OK();
-}
} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_bitmap.h
b/be/src/vec/data_types/data_type_bitmap.h
index dd7748f2d5b..12e1a158fee 100644
--- a/be/src/vec/data_types/data_type_bitmap.h
+++ b/be/src/vec/data_types/data_type_bitmap.h
@@ -88,7 +88,6 @@ public:
return data.to_string();
}
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
Field get_default() const override {
return Field::create_field<TYPE_BITMAP>(BitmapValue::empty_bitmap());
diff --git a/be/src/vec/data_types/data_type_date.cpp
b/be/src/vec/data_types/data_type_date.cpp
index 47d20abd677..1df08306422 100644
--- a/be/src/vec/data_types/data_type_date.cpp
+++ b/be/src/vec/data_types/data_type_date.cpp
@@ -75,17 +75,6 @@ void DataTypeDate::to_string(const IColumn& column, size_t
row_num, BufferWritab
ostr.write(buf, pos - buf - 1);
}
-Status DataTypeDate::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = static_cast<ColumnDate*>(column);
- Int64 val = 0;
- if (!read_date_text_impl<Int64>(val, rb)) {
- return Status::InvalidArgument("parse date fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- return Status::OK();
-}
-
void DataTypeDate::cast_to_date(Int64& x) {
auto value = binary_cast<Int64, VecDateTimeValue>(x);
value.cast_to_date();
diff --git a/be/src/vec/data_types/data_type_date.h
b/be/src/vec/data_types/data_type_date.h
index 5670838258f..80627ed1441 100644
--- a/be/src/vec/data_types/data_type_date.h
+++ b/be/src/vec/data_types/data_type_date.h
@@ -65,8 +65,6 @@ public:
value.to_string(buf);
return buf;
}
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
-
static void cast_to_date(Int64& x);
Field get_field(const TExprNode& node) const override {
VecDateTimeValue value;
diff --git a/be/src/vec/data_types/data_type_date_or_datetime_v2.cpp
b/be/src/vec/data_types/data_type_date_or_datetime_v2.cpp
index 306aafde992..1e3db42befb 100644
--- a/be/src/vec/data_types/data_type_date_or_datetime_v2.cpp
+++ b/be/src/vec/data_types/data_type_date_or_datetime_v2.cpp
@@ -101,17 +101,6 @@ void DataTypeDateV2::to_string(const IColumn& column,
size_t row_num, BufferWrit
ostr.write(buf, pos - buf - 1);
}
-Status DataTypeDateV2::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = assert_cast<ColumnDateV2*>(column);
- UInt32 val = 0;
- if (!read_date_v2_text_impl<UInt32>(val, rb)) {
- return Status::InvalidArgument("parse date fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- return Status::OK();
-}
-
MutableColumnPtr DataTypeDateV2::create_column() const {
return DataTypeNumberBase<PrimitiveType::TYPE_DATEV2>::create_column();
}
@@ -198,17 +187,6 @@ void DataTypeDateTimeV2::to_string(const IColumn& column,
size_t row_num,
ostr.write(buf, pos - buf - 1);
}
-Status DataTypeDateTimeV2::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = assert_cast<ColumnDateTimeV2*>(column);
- UInt64 val = 0;
- if (!read_datetime_v2_text_impl<UInt64>(val, rb, _scale)) {
- return Status::InvalidArgument("parse date fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- return Status::OK();
-}
-
void DataTypeDateTimeV2::to_pb_column_meta(PColumnMeta* col_meta) const {
IDataType::to_pb_column_meta(col_meta);
col_meta->mutable_decimal_param()->set_scale(_scale);
diff --git a/be/src/vec/data_types/data_type_date_or_datetime_v2.h
b/be/src/vec/data_types/data_type_date_or_datetime_v2.h
index 28438b2539a..53007c6762c 100644
--- a/be/src/vec/data_types/data_type_date_or_datetime_v2.h
+++ b/be/src/vec/data_types/data_type_date_or_datetime_v2.h
@@ -89,7 +89,6 @@ public:
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
std::string to_string(UInt32 int_val) const;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
MutableColumnPtr create_column() const override;
@@ -139,7 +138,6 @@ public:
void push_number(ColumnString::Chars& chars, const UInt64& num) const;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
std::string to_string(UInt64 int_val) const;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
using SerDeType = DataTypeDateTimeV2SerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
return std::make_shared<SerDeType>(_scale, nesting_level);
diff --git a/be/src/vec/data_types/data_type_date_time.cpp
b/be/src/vec/data_types/data_type_date_time.cpp
index fc2107987c3..60c6c970b9f 100644
--- a/be/src/vec/data_types/data_type_date_time.cpp
+++ b/be/src/vec/data_types/data_type_date_time.cpp
@@ -89,17 +89,6 @@ void DataTypeDateTime::to_string(const IColumn& column,
size_t row_num,
ostr.write(buf, pos - buf - 1);
}
-Status DataTypeDateTime::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = static_cast<ColumnDateTime*>(column);
- Int64 val = 0;
- if (!read_datetime_text_impl<Int64>(val, rb)) {
- return Status::InvalidArgument("parse datetime fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- return Status::OK();
-}
-
void DataTypeDateTime::cast_to_date_time(Int64& x) {
auto value = binary_cast<Int64, doris::VecDateTimeValue>(x);
value.to_datetime();
diff --git a/be/src/vec/data_types/data_type_date_time.h
b/be/src/vec/data_types/data_type_date_time.h
index 7f6f6851988..5f66b3c5c97 100644
--- a/be/src/vec/data_types/data_type_date_time.h
+++ b/be/src/vec/data_types/data_type_date_time.h
@@ -105,8 +105,6 @@ public:
size_t number_length() const;
void push_number(ColumnString::Chars& chars, const Int64& num) const;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
-
static void cast_to_date_time(Int64& x);
MutableColumnPtr create_column() const override;
diff --git a/be/src/vec/data_types/data_type_decimal.cpp
b/be/src/vec/data_types/data_type_decimal.cpp
index 9b7a5c6e239..14eceae8a4b 100644
--- a/be/src/vec/data_types/data_type_decimal.cpp
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -190,23 +190,6 @@ std::string DataTypeDecimal<T>::to_string(const FieldType&
value) const {
}
}
-template <PrimitiveType T>
-Status DataTypeDecimal<T>::from_string(ReadBuffer& rb, IColumn* column) const {
- auto& column_data = static_cast<ColumnType&>(*column).get_data();
- FieldType val {};
- StringRef str_ref(rb.position(), rb.count());
- StringParser::ParseResult res =
-
read_decimal_text_impl<DataTypeDecimalSerDe<T>::get_primitive_type(),
FieldType>(
- val, str_ref, precision, scale);
- if (res == StringParser::PARSE_SUCCESS || res ==
StringParser::PARSE_UNDERFLOW) {
- column_data.emplace_back(val);
- return Status::OK();
- }
- return Status::InvalidArgument("parse decimal fail, string: '{}',
primitive type: '{}'",
- std::string(rb.position(),
rb.count()).c_str(),
-
DataTypeDecimalSerDe<T>::get_primitive_type());
-}
-
// binary: const flag | row num | real_saved_num | data
// data : {val1 | val2| ...} or {encode_size | val1 | val2| ...}
template <PrimitiveType T>
diff --git a/be/src/vec/data_types/data_type_decimal.h
b/be/src/vec/data_types/data_type_decimal.h
index c699b74866d..cc13f342454 100644
--- a/be/src/vec/data_types/data_type_decimal.h
+++ b/be/src/vec/data_types/data_type_decimal.h
@@ -238,7 +238,6 @@ public:
template <bool is_const>
void to_string_batch_impl(const ColumnPtr& column_ptr, ColumnString&
column_to) const;
std::string to_string(const FieldType& value) const;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
using SerDeType = DataTypeDecimalSerDe<T>;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
return std::make_shared<SerDeType>(precision, scale, nesting_level);
diff --git a/be/src/vec/data_types/data_type_hll.cpp
b/be/src/vec/data_types/data_type_hll.cpp
index 5784d9aa4bf..b7d4e3ed324 100644
--- a/be/src/vec/data_types/data_type_hll.cpp
+++ b/be/src/vec/data_types/data_type_hll.cpp
@@ -206,17 +206,4 @@ void DataTypeHLL::to_string(const class
doris::vectorized::IColumn& column, size
result.resize(actual_size);
ostr.write(result.c_str(), result.size());
}
-
-Status DataTypeHLL::from_string(ReadBuffer& rb, IColumn* column) const {
- auto& data_column = assert_cast<ColumnHLL&>(*column);
- auto& data = data_column.get_data();
-
- HyperLogLog hll;
- if (!hll.deserialize(Slice(rb.to_string()))) {
- return Status::InternalError("deserialize hll from string fail!");
- }
- data.push_back(std::move(hll));
- return Status::OK();
-}
-
} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_hll.h
b/be/src/vec/data_types/data_type_hll.h
index 8ad68bdc635..ac0f8b17a2b 100644
--- a/be/src/vec/data_types/data_type_hll.h
+++ b/be/src/vec/data_types/data_type_hll.h
@@ -81,7 +81,6 @@ public:
std::string to_string(const IColumn& column, size_t row_num) const
override { return "HLL()"; }
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
Field get_default() const override {
return Field::create_field<TYPE_HLL>(HyperLogLog::empty());
diff --git a/be/src/vec/data_types/data_type_ipv4.cpp
b/be/src/vec/data_types/data_type_ipv4.cpp
index bfe969f647e..8bd53e8abd5 100644
--- a/be/src/vec/data_types/data_type_ipv4.cpp
+++ b/be/src/vec/data_types/data_type_ipv4.cpp
@@ -64,17 +64,6 @@ void DataTypeIPv4::to_string(const IColumn& column, size_t
row_num, BufferWritab
ostr.write(value.data(), value.size());
}
-Status DataTypeIPv4::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = static_cast<ColumnIPv4*>(column);
- IPv4 val = 0;
- if (!read_ipv4_text_impl<IPv4>(val, rb)) {
- return Status::InvalidArgument("parse ipv4 fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- return Status::OK();
-}
-
MutableColumnPtr DataTypeIPv4::create_column() const {
return ColumnIPv4::create();
}
diff --git a/be/src/vec/data_types/data_type_ipv4.h
b/be/src/vec/data_types/data_type_ipv4.h
index ef53dfbf78e..4b7fa0d531a 100644
--- a/be/src/vec/data_types/data_type_ipv4.h
+++ b/be/src/vec/data_types/data_type_ipv4.h
@@ -63,7 +63,6 @@ public:
size_t number_length() const;
void push_number(ColumnString::Chars& chars, const IPv4& num) const;
std::string to_string(const IPv4& value) const;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
Field get_field(const TExprNode& node) const override;
diff --git a/be/src/vec/data_types/data_type_ipv6.cpp
b/be/src/vec/data_types/data_type_ipv6.cpp
index 6cec261e187..0bff199cdde 100755
--- a/be/src/vec/data_types/data_type_ipv6.cpp
+++ b/be/src/vec/data_types/data_type_ipv6.cpp
@@ -59,17 +59,6 @@ void DataTypeIPv6::to_string(const IColumn& column, size_t
row_num, BufferWritab
ostr.write(value.data(), value.size());
}
-Status DataTypeIPv6::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = static_cast<ColumnIPv6*>(column);
- IPv6 val = 0;
- if (!read_ipv6_text_impl<IPv6>(val, rb)) {
- return Status::InvalidArgument("parse ipv6 fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- return Status::OK();
-}
-
MutableColumnPtr DataTypeIPv6::create_column() const {
return ColumnIPv6::create();
}
diff --git a/be/src/vec/data_types/data_type_ipv6.h
b/be/src/vec/data_types/data_type_ipv6.h
index b4a925aaa61..64305939aba 100755
--- a/be/src/vec/data_types/data_type_ipv6.h
+++ b/be/src/vec/data_types/data_type_ipv6.h
@@ -62,7 +62,6 @@ public:
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
static std::string to_string(const IPv6& value);
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
Field get_field(const TExprNode& node) const override {
IPv6 value;
diff --git a/be/src/vec/data_types/data_type_jsonb.cpp
b/be/src/vec/data_types/data_type_jsonb.cpp
index 1d7e09c33c4..221809ea298 100644
--- a/be/src/vec/data_types/data_type_jsonb.cpp
+++ b/be/src/vec/data_types/data_type_jsonb.cpp
@@ -60,17 +60,6 @@ void DataTypeJsonb::to_string(const class
doris::vectorized::IColumn& column, si
}
}
-Status DataTypeJsonb::from_string(ReadBuffer& rb, IColumn* column) const {
- JsonBinaryValue value;
- // Throw exception if rb.count is large than INT32_MAX
- RETURN_IF_ERROR(value.from_json_string(rb.position(),
cast_set<Int32>(rb.count())));
-
- auto* column_string = static_cast<ColumnString*>(column);
- column_string->insert_data(value.value(), value.size());
-
- return Status::OK();
-}
-
Field DataTypeJsonb::get_default() const {
std::string default_json = "null";
// convert default_json to binary
diff --git a/be/src/vec/data_types/data_type_jsonb.h
b/be/src/vec/data_types/data_type_jsonb.h
index 91767746ae9..35322cb1c01 100644
--- a/be/src/vec/data_types/data_type_jsonb.h
+++ b/be/src/vec/data_types/data_type_jsonb.h
@@ -82,7 +82,6 @@ public:
bool can_be_inside_low_cardinality() const override { return true; }
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
using SerDeType = DataTypeJsonbSerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
return std::make_shared<SerDeType>(nesting_level);
diff --git a/be/src/vec/data_types/data_type_map.cpp
b/be/src/vec/data_types/data_type_map.cpp
index f372b67a9bc..fe168c80cf7 100644
--- a/be/src/vec/data_types/data_type_map.cpp
+++ b/be/src/vec/data_types/data_type_map.cpp
@@ -100,156 +100,6 @@ void DataTypeMap::to_string(const IColumn& column, size_t
row_num, BufferWritabl
ostr.write(str.c_str(), str.size());
}
-bool next_slot_from_string(ReadBuffer& rb, StringRef& output, bool& has_quota)
{
- StringRef element(rb.position(), 0);
- has_quota = false;
- if (rb.eof()) {
- return false;
- }
-
- // ltrim
- while (!rb.eof() && isspace(*rb.position())) {
- ++rb.position();
- element.data = rb.position();
- }
-
- // parse string
- if (*rb.position() == '"' || *rb.position() == '\'') {
- const char str_sep = *rb.position();
- size_t str_len = 1;
- // search until next '"' or '\''
- while (str_len < rb.count() && *(rb.position() + str_len) != str_sep) {
- ++str_len;
- }
- // invalid string
- if (str_len >= rb.count()) {
- rb.position() = rb.end();
- return false;
- }
- has_quota = true;
- rb.position() += str_len + 1;
- element.size += str_len + 1;
- }
-
- // parse array element until map separator ':' or ',' or end '}'
- while (!rb.eof() && (*rb.position() != ':') && (*rb.position() != ',') &&
- (rb.count() != 1 || *rb.position() != '}')) {
- if (has_quota && !isspace(*rb.position())) {
- return false;
- }
- ++rb.position();
- ++element.size;
- }
- // invalid array element
- if (rb.eof()) {
- return false;
- }
- // adjust read buffer position to first char of next array element
- ++rb.position();
-
- // rtrim
- while (element.size > 0 && isspace(element.data[element.size - 1])) {
- --element.size;
- }
-
- // trim '"' and '\'' for string
- if (element.size >= 2 && (element.data[0] == '"' || element.data[0] ==
'\'') &&
- element.data[0] == element.data[element.size - 1]) {
- ++element.data;
- element.size -= 2;
- }
- output = element;
- return true;
-}
-
-bool is_empty_null_element(StringRef element, IColumn* nested_column, bool
has_quota) {
- auto& nested_null_col = reinterpret_cast<ColumnNullable&>(*nested_column);
- // handle empty element
- if (element.size == 0) {
- nested_null_col.get_nested_column().insert_default();
- nested_null_col.get_null_map_data().push_back(0);
- return true;
- }
-
- // handle null element
- if (!has_quota && element.size == 4 && strncmp(element.data, "null", 4) ==
0) {
- nested_null_col.get_nested_column().insert_default();
- nested_null_col.get_null_map_data().push_back(1);
- return true;
- }
- return false;
-}
-Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const {
- DCHECK(!rb.eof());
- auto* map_column = assert_cast<ColumnMap*>(column);
-
- if (*rb.position() != '{') {
- return Status::InvalidArgument("map does not start with '{}'
character, found '{}'", "{",
- *rb.position());
- }
- if (*(rb.end() - 1) != '}') {
- return Status::InvalidArgument("map does not end with '{}' character,
found '{}'", "}",
- *(rb.end() - 1));
- }
-
- if (rb.count() == 2) {
- // empty map {} , need to make empty array to add offset
- map_column->insert_default();
- } else {
- // {"aaa": 1, "bbb": 20}, need to handle key slot and value slot to
make key column arr and value arr
- // skip "{"
- ++rb.position();
- ColumnArray::Offsets64& map_off = map_column->get_offsets();
- IColumn& nested_key_column = map_column->get_keys();
- DCHECK(nested_key_column.is_nullable());
- IColumn& nested_val_column = map_column->get_values();
- DCHECK(nested_val_column.is_nullable());
-
- size_t element_num = 0;
- while (!rb.eof()) {
- StringRef key_element(rb.position(), rb.count());
- bool has_quota = false;
- if (!next_slot_from_string(rb, key_element, has_quota)) {
- // pop this current row which already put element_num item
into this row.
- map_column->get_keys().pop_back(element_num);
- map_column->get_values().pop_back(element_num);
- return Status::InvalidArgument("Cannot read map key from text
'{}'",
- key_element.to_string());
- }
- if (!is_empty_null_element(key_element, &nested_key_column,
has_quota)) {
- ReadBuffer krb(const_cast<char*>(key_element.data),
key_element.size);
- if (auto st = key_type->from_string(krb, &nested_key_column);
!st.ok()) {
- // pop this current row which already put element_num item
into this row.
- map_column->get_keys().pop_back(element_num);
- map_column->get_values().pop_back(element_num);
- return st;
- }
- }
-
- has_quota = false;
- StringRef value_element(rb.position(), rb.count());
- if (!next_slot_from_string(rb, value_element, has_quota)) {
- // +1 just because key column already put succeed , but
element_num not refresh here
- map_column->get_keys().pop_back(element_num + 1);
- map_column->get_values().pop_back(element_num);
- return Status::InvalidArgument("Cannot read map value from
text '{}'",
- value_element.to_string());
- }
- if (!is_empty_null_element(value_element, &nested_val_column,
has_quota)) {
- ReadBuffer vrb(const_cast<char*>(value_element.data),
value_element.size);
- if (auto st = value_type->from_string(vrb,
&nested_val_column); !st.ok()) {
- map_column->get_keys().pop_back(element_num + 1);
- map_column->get_values().pop_back(element_num);
- return st;
- }
- }
- ++element_num;
- }
- map_off.push_back(map_off.back() + element_num);
- }
- return Status::OK();
-}
-
MutableColumnPtr DataTypeMap::create_column() const {
return ColumnMap::create(key_type->create_column(),
value_type->create_column(),
ColumnArray::ColumnOffsets::create());
diff --git a/be/src/vec/data_types/data_type_map.h
b/be/src/vec/data_types/data_type_map.h
index 0e956a1ee31..b1b439efe37 100644
--- a/be/src/vec/data_types/data_type_map.h
+++ b/be/src/vec/data_types/data_type_map.h
@@ -97,7 +97,6 @@ public:
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
using SerDeType = DataTypeMapSerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
return std::make_shared<SerDeType>(key_type->get_serde(nesting_level +
1),
diff --git a/be/src/vec/data_types/data_type_nullable.cpp
b/be/src/vec/data_types/data_type_nullable.cpp
index f2cade4edf8..3d547a31f20 100644
--- a/be/src/vec/data_types/data_type_nullable.cpp
+++ b/be/src/vec/data_types/data_type_nullable.cpp
@@ -78,24 +78,6 @@ void DataTypeNullable::to_string(const IColumn& column,
size_t row_num,
}
}
-Status DataTypeNullable::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* null_column = assert_cast<ColumnNullable*>(column);
- if (rb.count() == 4 && *(rb.position()) == 'N' && *(rb.position() + 1) ==
'U' &&
- *(rb.position() + 2) == 'L' && *(rb.position() + 3) == 'L') {
- null_column->insert_data(nullptr, 0);
- return Status::OK();
- }
- auto st = nested_data_type->from_string(rb,
&(null_column->get_nested_column()));
- if (!st.ok()) {
- // fill null if fail
- null_column->insert_data(nullptr, 0); // 0 is meaningless here
- return Status::OK();
- }
- // fill not null if succ
- null_column->get_null_map_data().push_back(0);
- return Status::OK();
-}
-
// binary: const flag | row num | read saved num| <null array> | <values array>
// <null array>: is_null1 | is_null2 | ...
// <values array>: value1 | value2 | ...>
diff --git a/be/src/vec/data_types/data_type_nullable.h
b/be/src/vec/data_types/data_type_nullable.h
index 21781d59e43..cf415238c0c 100644
--- a/be/src/vec/data_types/data_type_nullable.h
+++ b/be/src/vec/data_types/data_type_nullable.h
@@ -112,7 +112,6 @@ public:
}
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
const DataTypePtr& get_nested_type() const { return nested_data_type; }
bool is_null_literal() const override { return
nested_data_type->is_null_literal(); }
diff --git a/be/src/vec/data_types/data_type_number_base.cpp
b/be/src/vec/data_types/data_type_number_base.cpp
index 421404314ae..a0faf65c293 100644
--- a/be/src/vec/data_types/data_type_number_base.cpp
+++ b/be/src/vec/data_types/data_type_number_base.cpp
@@ -93,40 +93,6 @@ std::string DataTypeNumberBase<T>::to_string(
return std::string(buffer.data(), buffer.size());
}
}
-template <PrimitiveType T>
-Status DataTypeNumberBase<T>::from_string(ReadBuffer& rb, IColumn* column)
const {
- auto* column_data = static_cast<typename
PrimitiveTypeTraits<T>::ColumnType*>(column);
- StringRef str_ref {rb.position(), rb.count()};
- if constexpr (std::is_same<typename
PrimitiveTypeTraits<T>::ColumnItemType, UInt128>::value) {
- // TODO: support for Uint128
- return Status::InvalidArgument("uint128 is not support");
- } else if constexpr (is_float_or_double(T) || T == TYPE_TIMEV2 || T ==
TYPE_TIME) {
- typename PrimitiveTypeTraits<T>::ColumnItemType val = 0;
- if (!try_read_float_text(val, str_ref)) {
- return Status::InvalidArgument("parse number fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- } else if constexpr (T == TYPE_BOOLEAN) {
- // Note: here we should handle the bool type
- typename PrimitiveTypeTraits<T>::ColumnItemType val = 0;
- if (!try_read_bool_text(val, str_ref)) {
- return Status::InvalidArgument("parse boolean fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- } else if constexpr (is_int_or_bool(T)) {
- typename PrimitiveTypeTraits<T>::ColumnItemType val = 0;
- if (!try_read_int_text(val, str_ref)) {
- return Status::InvalidArgument("parse number fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
- }
- column_data->insert_value(val);
- } else {
- DCHECK(false);
- }
- return Status::OK();
-}
template <PrimitiveType T>
Field DataTypeNumberBase<T>::get_default() const {
diff --git a/be/src/vec/data_types/data_type_number_base.h
b/be/src/vec/data_types/data_type_number_base.h
index cac2ad75db4..0fc69319257 100644
--- a/be/src/vec/data_types/data_type_number_base.h
+++ b/be/src/vec/data_types/data_type_number_base.h
@@ -144,7 +144,6 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
std::string to_string(const IColumn& column, size_t row_num) const
override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
bool is_null_literal() const override { return _is_null_literal; }
void set_null_literal(bool flag) { _is_null_literal = flag; }
using SerDeType = DataTypeNumberSerDe<T>;
diff --git a/be/src/vec/data_types/data_type_string.cpp
b/be/src/vec/data_types/data_type_string.cpp
index fa092d42425..1de4733d466 100644
--- a/be/src/vec/data_types/data_type_string.cpp
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -63,12 +63,6 @@ void DataTypeString::to_string(const class
doris::vectorized::IColumn& column, s
ostr.write(value.data, value.size);
}
-Status DataTypeString::from_string(ReadBuffer& rb, IColumn* column) const {
- auto* column_data = static_cast<ColumnString*>(column);
- column_data->insert_data(rb.position(), rb.count());
- return Status::OK();
-}
-
Field DataTypeString::get_default() const {
return Field::create_field<TYPE_STRING>(String());
}
diff --git a/be/src/vec/data_types/data_type_string.h
b/be/src/vec/data_types/data_type_string.h
index 79fd24ec5e3..4b80356f9d6 100644
--- a/be/src/vec/data_types/data_type_string.h
+++ b/be/src/vec/data_types/data_type_string.h
@@ -89,7 +89,6 @@ public:
bool can_be_inside_low_cardinality() const override { return true; }
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
using SerDeType = DataTypeStringSerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
return std::make_shared<SerDeType>(nesting_level);
diff --git a/be/src/vec/data_types/data_type_struct.cpp
b/be/src/vec/data_types/data_type_struct.cpp
index ccf2d3513be..c0bd3785444 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -102,125 +102,6 @@ std::string DataTypeStruct::do_get_name() const {
return s.str();
}
-Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
- DCHECK(!rb.eof());
- auto* struct_column = assert_cast<ColumnStruct*>(column);
-
- if (*rb.position() != '{') {
- return Status::InvalidArgument("Struct does not start with '{}'
character, found '{}'", "{",
- *rb.position());
- }
- if (*(rb.end() - 1) != '}') {
- return Status::InvalidArgument("Struct does not end with '{}'
character, found '{}'", "}",
- *(rb.end() - 1));
- }
-
- // here need handle the empty struct '{}'
- if (rb.count() == 2) {
- for (size_t i = 0; i < struct_column->tuple_size(); ++i) {
- struct_column->get_column(i).insert_default();
- }
- return Status::OK();
- }
-
- ++rb.position();
-
- bool is_explicit_names = false;
- std::vector<std::string> field_names;
- std::vector<ReadBuffer> field_rbs;
- std::vector<size_t> field_pos;
-
- while (!rb.eof()) {
- StringRef slot(rb.position(), rb.count());
- bool has_quota = false;
- bool is_name = false;
- if (!DataTypeStructSerDe::next_slot_from_string(rb, slot, is_name,
has_quota)) {
- return Status::InvalidArgument("Cannot read struct field from text
'{}'",
- slot.to_string());
- }
- if (is_name) {
- std::string name = slot.to_string();
- if (!DataTypeStructSerDe::next_slot_from_string(rb, slot, is_name,
has_quota)) {
- return Status::InvalidArgument("Cannot read struct field from
text '{}'",
- slot.to_string());
- }
- ReadBuffer field_rb(const_cast<char*>(slot.data), slot.size);
- field_names.push_back(name);
- field_rbs.push_back(field_rb);
-
- if (!is_explicit_names) {
- is_explicit_names = true;
- }
- } else {
- ReadBuffer field_rb(const_cast<char*>(slot.data), slot.size);
- field_rbs.push_back(field_rb);
- }
- }
-
- // TODO: should we support insert default field value when actual field
number is less than
- // schema field number?
- if (field_rbs.size() != elems.size()) {
- std::string cmp_str = field_rbs.size() > elems.size() ? "more" :
"less";
- return Status::InvalidArgument(
- "Actual struct field number {} is {} than schema field number
{}.",
- field_rbs.size(), cmp_str, elems.size());
- }
-
- if (is_explicit_names) {
- if (field_names.size() != field_rbs.size()) {
- return Status::InvalidArgument(
- "Struct field name number {} is not equal to field number
{}.",
- field_names.size(), field_rbs.size());
- }
- std::unordered_set<std::string> name_set;
- for (size_t i = 0; i < field_names.size(); i++) {
- // check duplicate fields
- auto ret = name_set.insert(field_names[i]);
- if (!ret.second) {
- return Status::InvalidArgument("Struct field name {} is
duplicate with others.",
- field_names[i]);
- }
- // check name valid
- auto idx = try_get_position_by_name(field_names[i]);
- if (idx == std::nullopt) {
- return Status::InvalidArgument("Cannot find struct field name
{} in schema.",
- field_names[i]);
- }
- field_pos.push_back(idx.value());
- }
- } else {
- for (size_t i = 0; i < field_rbs.size(); i++) {
- field_pos.push_back(i);
- }
- }
-
- for (size_t idx = 0; idx < elems.size(); idx++) {
- auto field_rb = field_rbs[field_pos[idx]];
- // handle empty element
- if (field_rb.count() == 0) {
- struct_column->get_column(idx).insert_default();
- continue;
- }
- // handle null element
- if (field_rb.count() == 4 && strncmp(field_rb.position(), "null", 4)
== 0) {
- auto& nested_null_col =
-
reinterpret_cast<ColumnNullable&>(struct_column->get_column(idx));
- nested_null_col.insert_default();
- continue;
- }
- auto st = elems[idx]->from_string(field_rb,
&struct_column->get_column(idx));
- if (!st.ok()) {
- // we should do column revert if error
- for (size_t j = 0; j < idx; j++) {
- struct_column->get_column(j).pop_back(1);
- }
- return st;
- }
- }
-
- return Status::OK();
-}
-
std::string DataTypeStruct::to_string(const IColumn& column, size_t row_num)
const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
diff --git a/be/src/vec/data_types/data_type_struct.h
b/be/src/vec/data_types/data_type_struct.h
index 803a4d4a692..9b9ccfc73da 100644
--- a/be/src/vec/data_types/data_type_struct.h
+++ b/be/src/vec/data_types/data_type_struct.h
@@ -114,8 +114,6 @@ public:
const char* deserialize(const char* buf, MutableColumnPtr* column,
int be_exec_version) const override;
void to_pb_column_meta(PColumnMeta* col_meta) const override;
-
- Status from_string(ReadBuffer& rb, IColumn* column) const override;
std::string to_string(const IColumn& column, size_t row_num) const
override;
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
bool get_have_explicit_names() const { return have_explicit_names; }
diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp
b/be/src/vec/data_types/serde/data_type_serde.cpp
index 0af00a2683f..7fa53143be1 100644
--- a/be/src/vec/data_types/serde/data_type_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_serde.cpp
@@ -50,6 +50,16 @@ DataTypeSerDeSPtrs create_data_type_serdes(const
std::vector<SlotDescriptor*>& s
return serdes;
}
+Status DataTypeSerDe::default_from_string(StringRef& str, IColumn& column)
const {
+ auto slice = str.to_slice();
+ DataTypeSerDe::FormatOptions options;
+ options.converted_from_string = true;
+ ///TODO: Think again, when do we need to consider escape characters?
+ // options.escape_char = '\\';
+ // Deserialize the string into the column
+ return deserialize_one_cell_from_json(column, slice, options);
+}
+
Status DataTypeSerDe::serialize_column_to_jsonb_vector(const IColumn&
from_column,
ColumnString&
to_column) const {
const auto size = from_column.size();
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index 0f538e00d11..2d5851f5fad 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -238,6 +238,8 @@ public:
DataTypeSerDe(int nesting_level = 1) : _nesting_level(nesting_level) {};
virtual ~DataTypeSerDe();
+ Status default_from_string(StringRef& str, IColumn& column) const;
+
// All types can override this function
// When this function is called, column should be of the corresponding type
// everytime call this, should insert new cell to the end of column
diff --git a/be/src/vec/functions/cast/cast_to_jsonb.h
b/be/src/vec/functions/cast/cast_to_jsonb.h
index 5346b14eea0..c7e4d959c22 100644
--- a/be/src/vec/functions/cast/cast_to_jsonb.h
+++ b/be/src/vec/functions/cast/cast_to_jsonb.h
@@ -36,6 +36,13 @@ struct ConvertImplGenericFromJsonb {
uint32_t result, size_t input_rows_count,
const NullMap::value_type* null_map = nullptr) {
auto data_type_to = block.get_by_position(result).type;
+ auto data_type_serde_to = data_type_to->get_serde();
+
+ DataTypeSerDe::FormatOptions options;
+ options.converted_from_string = true;
+ options.escape_char = '\\';
+ options.timezone = &context->state()->timezone_obj();
+
const auto& col_with_type_and_name =
block.get_by_position(arguments[0]);
const IColumn& col_from = *col_with_type_and_name.column;
if (const ColumnString* col_from_string =
check_and_get_column<ColumnString>(&col_from)) {
@@ -94,8 +101,8 @@ struct ConvertImplGenericFromJsonb {
(*vec_null_map_to)[i] = 1;
continue;
}
- ReadBuffer read_buffer((char*)(input_str.data()),
input_str.size());
- st = data_type_to->from_string(read_buffer, col_to.get());
+ StringRef read_buffer((char*)(input_str.data()),
input_str.size());
+ st = data_type_serde_to->from_string(read_buffer, *col_to,
options);
// if parsing failed, will return null
(*vec_null_map_to)[i] = !st.ok();
if (!st.ok()) {
diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
index d51a7599331..0c19c914a88 100644
--- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
@@ -789,30 +789,19 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
typedef std::tuple<FieldType, std::vector<std::string>,
std::vector<std::string>,
std::vector<std::string>, std::vector<std::string>>
FieldType_RandStr;
- std::vector<FieldType_RandStr> nested_field_types = {
- FieldType_RandStr(
- FieldType::OLAP_FIELD_TYPE_STRING,
- {"[[Hello, World],[This, is, a, nested,
array],null,[null,null,aaaa]]"},
- {"[[\"Hello\", \"World\"], [\"This\", \"is\", \"a\",
\"nested\", "
- "\"array\"], null, [null, null, "
- "\"aaaa\"]]"},
- {"[null, null, null, null, null, null, null, null,
null, null, null]"},
- {"[[\"Hello\", \"World\"], [\"This\", \"is\", \"a\",
\"nested\", "
- "\"array\"], null, [null, null, "
- "\"aaaa\"]]"}),
- FieldType_RandStr(
- FieldType::OLAP_FIELD_TYPE_STRING,
- {"[[With, special, \"characters\"], [like, @, #, $, %
\"^\", &, *, (, ), "
- "-, _], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >,
,, ., /, ?, ~]]"},
- {"[[\"With\", \"special\", \"characters\"], [\"like\",
\"@\", \"#\", "
- "\"$\", \"% \"^\"\", \"&\", \"*\", \"(\", \")\",
\"-\", "
- "\"_\"], [\"=\", \"+\", \"[, ]\", \"{, }\", \"|\",
\"\\\", \";\", "
- "\":\", \"', '', <, >, ,, ., /, ?, ~\"]]"},
- {""},
- {"[[\"With\", \"special\", \"characters\"], [\"like\",
\"@\", \"#\", "
- "\"$\", \"% \"^\"\", \"&\", \"*\", \"(\", \")\",
\"-\", "
- "\"_\"], [\"=\", \"+\", \"[, ]\", \"{, }\", \"|\",
\"\\\", \";\", "
- "\":\", \"', '', <, >, ,, ., /, ?, ~\"]]"})};
+ std::vector<FieldType_RandStr> nested_field_types = {FieldType_RandStr(
+ FieldType::OLAP_FIELD_TYPE_STRING,
+ {"[[With, special, \"characters\"], [like, @, #, $, % \"^\",
&, *, (, ), "
+ "-, _], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, .,
/, ?, ~]]"},
+ {"[[\"With\", \"special\", \"characters\"], [\"like\", \"@\",
\"#\", "
+ "\"$\", \"% \"^\"\", \"&\", \"*\", \"(\", \")\", \"-\", "
+ "\"_\"], [\"=\", \"+\", \"[, ]\", \"{, }\", \"|\", \"\\\",
\";\", "
+ "\":\", \"', '', <, >, ,, ., /, ?, ~\"]]"},
+ {""},
+ {"[[\"With\", \"special\", \"characters\"], [\"like\", \"@\",
\"#\", "
+ "\"$\", \"% \"^\"\", \"&\", \"*\", \"(\", \")\", \"-\", "
+ "\"_\"], [\"=\", \"+\", \"[, ]\", \"{, }\", \"|\", \"\\\",
\";\", "
+ "\":\", \"', '', <, >, ,, ., /, ?, ~\"]]"})};
// array type
for (auto type_pair : nested_field_types) {
auto type = std::get<0>(type_pair);
@@ -869,7 +858,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto col2 = array_data_type_ptr->create_column();
Status status = array_data_type_ptr->from_string(rb,
col2.get());
if (expect_from_string_str == "") {
- EXPECT_EQ(status.ok(), false);
+ EXPECT_EQ(status.ok(), true);
std::cout << "test from_string: " << status.to_json()
<< std::endl;
} else {
auto ser_col = ColumnString::create();
@@ -1024,7 +1013,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto col2 = array_data_type_ptr->create_column();
Status status = array_data_type_ptr->from_string(rb,
col2.get());
if (expect_from_string_str == "") {
- EXPECT_EQ(status.ok(), false);
+ EXPECT_EQ(status.ok(), true);
std::cout << "test from_string: " << status.to_json()
<< std::endl;
} else {
auto ser_col = ColumnString::create();
@@ -1242,7 +1231,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto col2 = map_data_type_ptr->create_column();
Status status = map_data_type_ptr->from_string(rb,
col2.get());
if (expect_from_string_str == "") {
- EXPECT_EQ(status.ok(), false);
+ EXPECT_EQ(status.ok(), true);
std::cout << "test from_string: " << status.to_json()
<< std::endl;
} else {
auto ser_col = ColumnString::create();
@@ -1383,7 +1372,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto col2 = array_data_type_ptr->create_column();
Status status = array_data_type_ptr->from_string(rb,
col2.get());
if (expect_from_string_str == "") {
- EXPECT_EQ(status.ok(), false);
+ EXPECT_EQ(status.ok(), true);
std::cout << "test from_string: " << status.to_json()
<< std::endl;
} else {
auto ser_col = ColumnString::create();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]