This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new ca0323d50eb [opt](status) add INVALID_DATA_FORMAT status code for
serde (#24795)
ca0323d50eb is described below
commit ca0323d50eb4523ff23688513ecbbff0a860b735
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Sep 22 19:32:09 2023 +0800
[opt](status) add INVALID_DATA_FORMAT status code for serde (#24795)
Add new status code on BE: `INVALID_DATA_FORMAT`,
mainly use for indicating the error when parsing data format failed, such
as parsing `xxx` to integer.
In previous, we use `INVALID_ARGUMENT` in these cases,
but it will print a lot error stack log in be.INFO when error happens.
One option is that we can add `INVALID_ARGUMENT` to `capture_stacktrace` to
suppress its stack log,
but it may effect other error cases.
So I decide to add a new status code for parsing data format error
---
be/src/common/status.h | 6 +++++-
be/src/vec/data_types/serde/data_type_array_serde.cpp | 8 ++++----
be/src/vec/data_types/serde/data_type_date64_serde.cpp | 8 ++++----
be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp | 4 ++--
be/src/vec/data_types/serde/data_type_datev2_serde.cpp | 4 ++--
be/src/vec/data_types/serde/data_type_decimal_serde.cpp | 6 +++---
be/src/vec/data_types/serde/data_type_map_serde.cpp | 5 +++--
be/src/vec/data_types/serde/data_type_number_serde.cpp | 14 +++++++-------
be/src/vec/data_types/serde/data_type_struct_serde.cpp | 8 ++++----
gensrc/thrift/Status.thrift | 1 +
10 files changed, 35 insertions(+), 29 deletions(-)
diff --git a/be/src/common/status.h b/be/src/common/status.h
index 1d73bee2d1d..f4f552036d4 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -37,6 +37,7 @@ TStatusError(PUBLISH_TIMEOUT);
TStatusError(MEM_ALLOC_FAILED);
TStatusError(BUFFER_ALLOCATION_FAILED);
TStatusError(INVALID_ARGUMENT);
+TStatusError(INVALID_DATA_FORMAT);
TStatusError(MINIMUM_RESERVATION_UNAVAILABLE);
TStatusError(CORRUPTION);
TStatusError(IO_ERROR);
@@ -318,7 +319,8 @@ constexpr bool capture_stacktrace(int code) {
&& code != ErrorCode::CANCELLED
&& code != ErrorCode::UNINITIALIZED
&& code != ErrorCode::PIP_WAIT_FOR_RF
- && code != ErrorCode::PIP_WAIT_FOR_SC;
+ && code != ErrorCode::PIP_WAIT_FOR_SC
+ && code != ErrorCode::INVALID_DATA_FORMAT;
}
// clang-format on
@@ -396,6 +398,7 @@ public:
ERROR_CTOR(MemoryAllocFailed, MEM_ALLOC_FAILED)
ERROR_CTOR(BufferAllocFailed, BUFFER_ALLOCATION_FAILED)
ERROR_CTOR(InvalidArgument, INVALID_ARGUMENT)
+ ERROR_CTOR(InvalidDataFormat, INVALID_DATA_FORMAT)
ERROR_CTOR(MinimumReservationUnavailable, MINIMUM_RESERVATION_UNAVAILABLE)
ERROR_CTOR(Corruption, CORRUPTION)
ERROR_CTOR(IOError, IO_ERROR)
@@ -435,6 +438,7 @@ public:
}
bool is_invalid_argument() const { return ErrorCode::INVALID_ARGUMENT ==
_code; }
+ bool is_invalid_data_format() const { return
ErrorCode::INVALID_DATA_FORMAT == _code; }
bool is_not_found() const { return _code == ErrorCode::NOT_FOUND; }
bool is_not_authorized() const { return code() ==
TStatusCode::NOT_AUTHORIZED; }
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index ac7992cc068..727d8234ac1 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -83,12 +83,12 @@ Status
DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
IColumn& nested_column = array_column.get_data();
DCHECK(nested_column.is_nullable());
if (slice[0] != '[') {
- return Status::InvalidArgument("Array does not start with '['
character, found '{}'",
- slice[0]);
+ return Status::InvalidDataFormat("Array does not start with '['
character, found '{}'",
+ slice[0]);
}
if (slice[slice.size - 1] != ']') {
- return Status::InvalidArgument("Array does not end with ']' character,
found '{}'",
- slice[slice.size - 1]);
+ return Status::InvalidDataFormat("Array does not end with ']'
character, found '{}'",
+ slice[slice.size - 1]);
}
// empty array []
if (slice.size == 2) {
diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp
b/be/src/vec/data_types/serde/data_type_date64_serde.cpp
index e4978410f0f..f7aa8e11d13 100644
--- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp
@@ -86,8 +86,8 @@ Status
DataTypeDate64SerDe::deserialize_one_cell_from_json(IColumn& column, Slic
val = 716833;
}
} else if (ReadBuffer rb(slice.data, slice.size);
!read_date_text_impl<Int64>(val, rb)) {
- return Status::InvalidArgument("parse date fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse date fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
return Status::OK();
@@ -159,8 +159,8 @@ Status
DataTypeDateTimeSerDe::deserialize_one_cell_from_json(IColumn& column, Sl
val = 14000101000000L;
}
} else if (ReadBuffer rb(slice.data, slice.size);
!read_datetime_text_impl<Int64>(val, rb)) {
- return Status::InvalidArgument("parse datetime fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse datetime fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index 9b931de1bfc..f0563a4a340 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -83,8 +83,8 @@ Status
DataTypeDateTimeV2SerDe::deserialize_one_cell_from_json(IColumn& column,
} else if (ReadBuffer rb(slice.data, slice.size);
!read_datetime_v2_text_impl<UInt64>(val, rb)) {
- return Status::InvalidArgument("parse date fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse date fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index b56560220a5..bc758511f0d 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -73,8 +73,8 @@ Status
DataTypeDateV2SerDe::deserialize_one_cell_from_json(IColumn& column, Slic
val = doris::vectorized::MIN_DATE_V2;
}
} else if (ReadBuffer rb(slice.data, slice.size);
!read_date_v2_text_impl<UInt32>(val, rb)) {
- return Status::InvalidArgument("parse date fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse date fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index 7907311c7da..e00c1139cb1 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -81,9 +81,9 @@ Status
DataTypeDecimalSerDe<T>::deserialize_one_cell_from_json(IColumn& column,
column_data.emplace_back(val);
return Status::OK();
}
- return Status::InvalidArgument("parse decimal fail, string: '{}',
primitive type: '{}'",
- std::string(rb.position(),
rb.count()).c_str(),
- get_primitive_type());
+ return Status::InvalidDataFormat("parse decimal fail, string: '{}',
primitive type: '{}'",
+ std::string(rb.position(),
rb.count()).c_str(),
+ get_primitive_type());
}
template <typename T>
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index 84c4830a04a..68c41a8d429 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -186,12 +186,13 @@ Status
DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slice&
if (slice[0] != '{') {
std::stringstream ss;
ss << slice[0] << '\'';
- return Status::InvalidArgument("Map does not start with '{' character,
found '" + ss.str());
+ return Status::InvalidDataFormat("Map does not start with '{'
character, found '" +
+ ss.str());
}
if (slice[slice.size - 1] != '}') {
std::stringstream ss;
ss << slice[slice.size - 1] << '\'';
- return Status::InvalidArgument("Map does not end with '}' character,
found '" + ss.str());
+ return Status::InvalidDataFormat("Map does not end with '}' character,
found '" + ss.str());
}
// empty map
if (slice.size == 2) {
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index ffa3239a67b..9c08a33c0cb 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -108,27 +108,27 @@ Status
DataTypeNumberSerDe<T>::deserialize_one_cell_from_json(IColumn& column, S
ReadBuffer rb(slice.data, slice.size);
if constexpr (std::is_same<T, UInt128>::value) {
// TODO: support for Uint128
- return Status::InvalidArgument("uint128 is not support");
+ return Status::InvalidDataFormat("uint128 is not support");
} else if constexpr (std::is_same_v<T, float> || std::is_same_v<T,
double>) {
T val = 0;
if (!read_float_text_fast_impl(val, rb)) {
- return Status::InvalidArgument("parse number fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse number fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
} else if constexpr (std::is_same_v<T, uint8_t>) {
// Note: here we should handle the bool type
T val = 0;
if (!try_read_bool_text(val, rb)) {
- return Status::InvalidArgument("parse boolean fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse boolean fail, string:
'{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
} else if constexpr (std::is_integral<T>::value) {
T val = 0;
if (!read_int_text_impl(val, rb)) {
- return Status::InvalidArgument("parse number fail, string: '{}'",
- std::string(rb.position(),
rb.count()).c_str());
+ return Status::InvalidDataFormat("parse number fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
column_data.insert_value(val);
} else {
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index 37afea846f4..f2d243efc6c 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -50,14 +50,14 @@ Status
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
if (slice[0] != '{') {
std::stringstream ss;
ss << slice[0] << '\'';
- return Status::InvalidArgument("Struct does not start with '{'
character, found '" +
- ss.str());
+ return Status::InvalidDataFormat("Struct does not start with '{'
character, found '" +
+ ss.str());
}
if (slice[slice.size - 1] != '}') {
std::stringstream ss;
ss << slice[slice.size - 1] << '\'';
- return Status::InvalidArgument("Struct does not end with '}'
character, found '" +
- ss.str());
+ return Status::InvalidDataFormat("Struct does not end with '}'
character, found '" +
+ ss.str());
}
// here need handle the empty struct '{}'
diff --git a/gensrc/thrift/Status.thrift b/gensrc/thrift/Status.thrift
index 06083b9a93c..044c708b6eb 100644
--- a/gensrc/thrift/Status.thrift
+++ b/gensrc/thrift/Status.thrift
@@ -75,6 +75,7 @@ enum TStatusCode {
INCOMPLETE = 44,
OLAP_ERR_VERSION_ALREADY_MERGED = 45,
DATA_QUALITY_ERROR = 46,
+ INVALID_DATA_FORMAT = 47,
VEC_EXCEPTION = 50,
VEC_LOGIC_ERROR = 51,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]