github-actions[bot] commented on code in PR #64651:
URL: https://github.com/apache/doris/pull/64651#discussion_r3448557010
##########
be/src/exec/sink/vtablet_block_convertor.cpp:
##########
@@ -298,33 +299,81 @@ Status
OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B
null_map = tmp_column_ptr == nullptr ? nullptr
:
tmp_column_ptr->get_null_map_data().data();
}
+ bool need_manual_truncation = false;
for (size_t j = 0; j < row_count; ++j) {
auto row = rows ? (*rows)[j] : j;
if (need_to_validate(j, row, filter_map, null_map)) {
auto str_val = column_string->get_data_at(j);
bool invalid = str_val.size > limit;
if (invalid) {
- if (str_val.size > len) {
- fmt::format_to(error_msg, "{}",
- "the length of input is too long
than schema. ");
- fmt::format_to(error_msg, "first 32 bytes of input
str: [{}] ",
- str_val.to_prefix(32));
- fmt::format_to(error_msg, "schema length: {}; ",
len);
- fmt::format_to(error_msg, "actual length: {}; ",
str_val.size);
- } else if (str_val.size > limit) {
- fmt::format_to(
- error_msg, "{}",
- "the length of input string is too long
than vec schema. ");
- fmt::format_to(error_msg, "first 32 bytes of input
str: [{}] ",
- str_val.to_prefix(32));
- fmt::format_to(error_msg, "schema length: {}; ",
len);
- fmt::format_to(error_msg, "limit length: {}; ",
limit);
- fmt::format_to(error_msg, "actual length: {}; ",
str_val.size);
+ if (!state->enable_insert_strict() && type_str) {
+ // In non-strict mode, the string will be
truncated at a valid
+ // UTF-8 character boundary within the byte limit
(see below).
+ need_manual_truncation = true;
+ } else {
+ if (str_val.size > len) {
+ fmt::format_to(error_msg, "{}",
+ "the length of input is too
long than schema. ");
+ fmt::format_to(error_msg, "first 32 bytes of
input str: [{}] ",
+ str_val.to_prefix(32));
+ fmt::format_to(error_msg, "schema length: {};
", len);
+ fmt::format_to(error_msg, "actual length: {};
", str_val.size);
+ } else if (str_val.size > limit) {
+ fmt::format_to(
+ error_msg, "{}",
+ "the length of input string is too
long than vec schema. ");
+ fmt::format_to(error_msg, "first 32 bytes of
input str: [{}] ",
+ str_val.to_prefix(32));
+ fmt::format_to(error_msg, "schema length: {};
", len);
+ fmt::format_to(error_msg, "limit length: {};
", limit);
+ fmt::format_to(error_msg, "actual length: {};
", str_val.size);
+ }
+
RETURN_IF_ERROR(set_invalid_and_append_error_msg(row));
}
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(row));
}
}
}
Review Comment:
This changes user-visible load semantics, but the PR does not add any
regression or unit test. The reported failure is specific and easy to regress:
non-strict load/insert into `VARCHAR(32)` with
`${jnd${upper:ı}:ldap://test.comxxxxxx}` or `中123456789012345678901234567890`
should succeed and store a value whose byte length does not exceed 32, while
strict mode should still reject. Please add a regression test, with generated
`.out`, for this converter path before merging.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]