This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 5a810122a22 [debug](load) check the column type when string column is
invalid (#39337)
5a810122a22 is described below
commit 5a810122a22bbd4e600630ac3b66a84570e10123
Author: Jerry Hu <[email protected]>
AuthorDate: Sat Aug 24 18:14:21 2024 +0800
[debug](load) check the column type when string column is invalid (#39337)
## Proposed changes
Issue Number: close #xxx
<!--Describe your changes.-->
---
be/src/vec/sink/vtablet_block_convertor.cpp | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp
b/be/src/vec/sink/vtablet_block_convertor.cpp
index 4446e44f431..96de68f5976 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -202,10 +202,11 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
return ret;
};
- auto column_ptr =
vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
- auto& real_column_ptr = column_ptr == nullptr ? column :
(column_ptr->get_nested_column_ptr());
- auto null_map = column_ptr == nullptr ? nullptr :
column_ptr->get_null_map_data().data();
- auto need_to_validate = [&null_map, this](size_t j, size_t row) {
+ const auto* column_ptr =
vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+ const auto& real_column_ptr =
+ column_ptr == nullptr ? column :
(column_ptr->get_nested_column_ptr());
+ const auto* null_map = column_ptr == nullptr ? nullptr :
column_ptr->get_null_map_data().data();
+ const auto need_to_validate = [&null_map, this](size_t j, size_t row) {
return !_filter_map[row] && (null_map == nullptr || null_map[j] == 0);
};
@@ -213,7 +214,7 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
- const auto column_string =
+ const auto* column_string =
assert_cast<const
vectorized::ColumnString*>(real_column_ptr.get());
size_t limit = config::string_type_length_soft_limit_bytes;
@@ -222,12 +223,22 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
limit = std::min(config::string_type_length_soft_limit_bytes,
type.len);
}
- auto* __restrict offsets = column_string->get_offsets().data();
+ const auto* __restrict offsets = column_string->get_offsets().data();
int invalid_count = 0;
for (int j = 0; j < row_count; ++j) {
invalid_count += (offsets[j] - offsets[j - 1]) > limit;
}
+ auto check_column_type = [&]() {
+ const auto& real_column = *real_column_ptr;
+ if (nullptr == dynamic_cast<const
vectorized::ColumnString*>(&real_column)) {
+ return Status::InternalError(
+ "invalid column(#{}) type: {}, expect type:
ColumnString, is nereids: {}",
+ slot_index, demangle(typeid(real_column).name()),
state->is_nereids());
+ }
+ return Status::OK();
+ };
+
if (invalid_count) {
for (size_t j = 0; j < row_count; ++j) {
auto row = rows ? (*rows)[j] : j;
@@ -235,6 +246,11 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
auto str_val = column_string->get_data_at(j);
bool invalid = str_val.size > limit;
if (invalid) {
+ auto st = check_column_type();
+ if (!st.ok()) {
+ LOG(WARNING) << "check column type failed: " <<
st.to_string();
+ return st;
+ }
if (str_val.size > type.len) {
fmt::format_to(error_msg, "{}",
"the length of input is too long
than schema. ");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]