This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new ca0323d50eb [opt](status) add INVALID_DATA_FORMAT status code for 
serde (#24795)
ca0323d50eb is described below

commit ca0323d50eb4523ff23688513ecbbff0a860b735
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Sep 22 19:32:09 2023 +0800

    [opt](status) add INVALID_DATA_FORMAT status code for serde (#24795)
    
    Add new status code on BE: `INVALID_DATA_FORMAT`,
    mainly use for indicating the error when parsing data format failed, such 
as parsing `xxx` to integer.
    
    In previous, we use `INVALID_ARGUMENT` in these cases,
    but it will print a lot error stack log in be.INFO when error happens.
    
    One option is that we can add `INVALID_ARGUMENT` to `capture_stacktrace` to 
suppress its stack log,
    but it may effect other error cases.
    So I decide to add a new status code for parsing data format error
---
 be/src/common/status.h                                     |  6 +++++-
 be/src/vec/data_types/serde/data_type_array_serde.cpp      |  8 ++++----
 be/src/vec/data_types/serde/data_type_date64_serde.cpp     |  8 ++++----
 be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp |  4 ++--
 be/src/vec/data_types/serde/data_type_datev2_serde.cpp     |  4 ++--
 be/src/vec/data_types/serde/data_type_decimal_serde.cpp    |  6 +++---
 be/src/vec/data_types/serde/data_type_map_serde.cpp        |  5 +++--
 be/src/vec/data_types/serde/data_type_number_serde.cpp     | 14 +++++++-------
 be/src/vec/data_types/serde/data_type_struct_serde.cpp     |  8 ++++----
 gensrc/thrift/Status.thrift                                |  1 +
 10 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/be/src/common/status.h b/be/src/common/status.h
index 1d73bee2d1d..f4f552036d4 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -37,6 +37,7 @@ TStatusError(PUBLISH_TIMEOUT);
 TStatusError(MEM_ALLOC_FAILED);
 TStatusError(BUFFER_ALLOCATION_FAILED);
 TStatusError(INVALID_ARGUMENT);
+TStatusError(INVALID_DATA_FORMAT);
 TStatusError(MINIMUM_RESERVATION_UNAVAILABLE);
 TStatusError(CORRUPTION);
 TStatusError(IO_ERROR);
@@ -318,7 +319,8 @@ constexpr bool capture_stacktrace(int code) {
         && code != ErrorCode::CANCELLED
         && code != ErrorCode::UNINITIALIZED
         && code != ErrorCode::PIP_WAIT_FOR_RF
-        && code != ErrorCode::PIP_WAIT_FOR_SC;
+        && code != ErrorCode::PIP_WAIT_FOR_SC
+        && code != ErrorCode::INVALID_DATA_FORMAT;
 }
 // clang-format on
 
@@ -396,6 +398,7 @@ public:
     ERROR_CTOR(MemoryAllocFailed, MEM_ALLOC_FAILED)
     ERROR_CTOR(BufferAllocFailed, BUFFER_ALLOCATION_FAILED)
     ERROR_CTOR(InvalidArgument, INVALID_ARGUMENT)
+    ERROR_CTOR(InvalidDataFormat, INVALID_DATA_FORMAT)
     ERROR_CTOR(MinimumReservationUnavailable, MINIMUM_RESERVATION_UNAVAILABLE)
     ERROR_CTOR(Corruption, CORRUPTION)
     ERROR_CTOR(IOError, IO_ERROR)
@@ -435,6 +438,7 @@ public:
     }
 
     bool is_invalid_argument() const { return ErrorCode::INVALID_ARGUMENT == 
_code; }
+    bool is_invalid_data_format() const { return 
ErrorCode::INVALID_DATA_FORMAT == _code; }
 
     bool is_not_found() const { return _code == ErrorCode::NOT_FOUND; }
     bool is_not_authorized() const { return code() == 
TStatusCode::NOT_AUTHORIZED; }
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index ac7992cc068..727d8234ac1 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -83,12 +83,12 @@ Status 
DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
     IColumn& nested_column = array_column.get_data();
     DCHECK(nested_column.is_nullable());
     if (slice[0] != '[') {
-        return Status::InvalidArgument("Array does not start with '[' 
character, found '{}'",
-                                       slice[0]);
+        return Status::InvalidDataFormat("Array does not start with '[' 
character, found '{}'",
+                                         slice[0]);
     }
     if (slice[slice.size - 1] != ']') {
-        return Status::InvalidArgument("Array does not end with ']' character, 
found '{}'",
-                                       slice[slice.size - 1]);
+        return Status::InvalidDataFormat("Array does not end with ']' 
character, found '{}'",
+                                         slice[slice.size - 1]);
     }
     // empty array []
     if (slice.size == 2) {
diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp 
b/be/src/vec/data_types/serde/data_type_date64_serde.cpp
index e4978410f0f..f7aa8e11d13 100644
--- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp
@@ -86,8 +86,8 @@ Status 
DataTypeDate64SerDe::deserialize_one_cell_from_json(IColumn& column, Slic
             val = 716833;
         }
     } else if (ReadBuffer rb(slice.data, slice.size); 
!read_date_text_impl<Int64>(val, rb)) {
-        return Status::InvalidArgument("parse date fail, string: '{}'",
-                                       std::string(rb.position(), 
rb.count()).c_str());
+        return Status::InvalidDataFormat("parse date fail, string: '{}'",
+                                         std::string(rb.position(), 
rb.count()).c_str());
     }
     column_data.insert_value(val);
     return Status::OK();
@@ -159,8 +159,8 @@ Status 
DataTypeDateTimeSerDe::deserialize_one_cell_from_json(IColumn& column, Sl
             val = 14000101000000L;
         }
     } else if (ReadBuffer rb(slice.data, slice.size); 
!read_datetime_text_impl<Int64>(val, rb)) {
-        return Status::InvalidArgument("parse datetime fail, string: '{}'",
-                                       std::string(rb.position(), 
rb.count()).c_str());
+        return Status::InvalidDataFormat("parse datetime fail, string: '{}'",
+                                         std::string(rb.position(), 
rb.count()).c_str());
     }
     column_data.insert_value(val);
     return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index 9b931de1bfc..f0563a4a340 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -83,8 +83,8 @@ Status 
DataTypeDateTimeV2SerDe::deserialize_one_cell_from_json(IColumn& column,
 
     } else if (ReadBuffer rb(slice.data, slice.size);
                !read_datetime_v2_text_impl<UInt64>(val, rb)) {
-        return Status::InvalidArgument("parse date fail, string: '{}'",
-                                       std::string(rb.position(), 
rb.count()).c_str());
+        return Status::InvalidDataFormat("parse date fail, string: '{}'",
+                                         std::string(rb.position(), 
rb.count()).c_str());
     }
     column_data.insert_value(val);
     return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index b56560220a5..bc758511f0d 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -73,8 +73,8 @@ Status 
DataTypeDateV2SerDe::deserialize_one_cell_from_json(IColumn& column, Slic
             val = doris::vectorized::MIN_DATE_V2;
         }
     } else if (ReadBuffer rb(slice.data, slice.size); 
!read_date_v2_text_impl<UInt32>(val, rb)) {
-        return Status::InvalidArgument("parse date fail, string: '{}'",
-                                       std::string(rb.position(), 
rb.count()).c_str());
+        return Status::InvalidDataFormat("parse date fail, string: '{}'",
+                                         std::string(rb.position(), 
rb.count()).c_str());
     }
     column_data.insert_value(val);
     return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index 7907311c7da..e00c1139cb1 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -81,9 +81,9 @@ Status 
DataTypeDecimalSerDe<T>::deserialize_one_cell_from_json(IColumn& column,
         column_data.emplace_back(val);
         return Status::OK();
     }
-    return Status::InvalidArgument("parse decimal fail, string: '{}', 
primitive type: '{}'",
-                                   std::string(rb.position(), 
rb.count()).c_str(),
-                                   get_primitive_type());
+    return Status::InvalidDataFormat("parse decimal fail, string: '{}', 
primitive type: '{}'",
+                                     std::string(rb.position(), 
rb.count()).c_str(),
+                                     get_primitive_type());
 }
 
 template <typename T>
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp 
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index 84c4830a04a..68c41a8d429 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -186,12 +186,13 @@ Status 
DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slice&
     if (slice[0] != '{') {
         std::stringstream ss;
         ss << slice[0] << '\'';
-        return Status::InvalidArgument("Map does not start with '{' character, 
found '" + ss.str());
+        return Status::InvalidDataFormat("Map does not start with '{' 
character, found '" +
+                                         ss.str());
     }
     if (slice[slice.size - 1] != '}') {
         std::stringstream ss;
         ss << slice[slice.size - 1] << '\'';
-        return Status::InvalidArgument("Map does not end with '}' character, 
found '" + ss.str());
+        return Status::InvalidDataFormat("Map does not end with '}' character, 
found '" + ss.str());
     }
     // empty map
     if (slice.size == 2) {
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index ffa3239a67b..9c08a33c0cb 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -108,27 +108,27 @@ Status 
DataTypeNumberSerDe<T>::deserialize_one_cell_from_json(IColumn& column, S
     ReadBuffer rb(slice.data, slice.size);
     if constexpr (std::is_same<T, UInt128>::value) {
         // TODO: support for Uint128
-        return Status::InvalidArgument("uint128 is not support");
+        return Status::InvalidDataFormat("uint128 is not support");
     } else if constexpr (std::is_same_v<T, float> || std::is_same_v<T, 
double>) {
         T val = 0;
         if (!read_float_text_fast_impl(val, rb)) {
-            return Status::InvalidArgument("parse number fail, string: '{}'",
-                                           std::string(rb.position(), 
rb.count()).c_str());
+            return Status::InvalidDataFormat("parse number fail, string: '{}'",
+                                             std::string(rb.position(), 
rb.count()).c_str());
         }
         column_data.insert_value(val);
     } else if constexpr (std::is_same_v<T, uint8_t>) {
         // Note: here we should handle the bool type
         T val = 0;
         if (!try_read_bool_text(val, rb)) {
-            return Status::InvalidArgument("parse boolean fail, string: '{}'",
-                                           std::string(rb.position(), 
rb.count()).c_str());
+            return Status::InvalidDataFormat("parse boolean fail, string: 
'{}'",
+                                             std::string(rb.position(), 
rb.count()).c_str());
         }
         column_data.insert_value(val);
     } else if constexpr (std::is_integral<T>::value) {
         T val = 0;
         if (!read_int_text_impl(val, rb)) {
-            return Status::InvalidArgument("parse number fail, string: '{}'",
-                                           std::string(rb.position(), 
rb.count()).c_str());
+            return Status::InvalidDataFormat("parse number fail, string: '{}'",
+                                             std::string(rb.position(), 
rb.count()).c_str());
         }
         column_data.insert_value(val);
     } else {
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp 
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index 37afea846f4..f2d243efc6c 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -50,14 +50,14 @@ Status 
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
     if (slice[0] != '{') {
         std::stringstream ss;
         ss << slice[0] << '\'';
-        return Status::InvalidArgument("Struct does not start with '{' 
character, found '" +
-                                       ss.str());
+        return Status::InvalidDataFormat("Struct does not start with '{' 
character, found '" +
+                                         ss.str());
     }
     if (slice[slice.size - 1] != '}') {
         std::stringstream ss;
         ss << slice[slice.size - 1] << '\'';
-        return Status::InvalidArgument("Struct does not end with '}' 
character, found '" +
-                                       ss.str());
+        return Status::InvalidDataFormat("Struct does not end with '}' 
character, found '" +
+                                         ss.str());
     }
 
     // here need handle the empty struct '{}'
diff --git a/gensrc/thrift/Status.thrift b/gensrc/thrift/Status.thrift
index 06083b9a93c..044c708b6eb 100644
--- a/gensrc/thrift/Status.thrift
+++ b/gensrc/thrift/Status.thrift
@@ -75,6 +75,7 @@ enum TStatusCode {
     INCOMPLETE          = 44,
     OLAP_ERR_VERSION_ALREADY_MERGED = 45,
     DATA_QUALITY_ERROR  = 46,
+    INVALID_DATA_FORMAT = 47,
 
     VEC_EXCEPTION = 50,
     VEC_LOGIC_ERROR = 51,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to