github-actions[bot] commented on code in PR #63001:
URL: https://github.com/apache/doris/pull/63001#discussion_r3199492287
##########
be/src/format/orc/vorc_reader.cpp:
##########
@@ -2172,45 +2209,61 @@ Status OrcReader::_orc_column_to_doris_column(
resolved_column = converter->get_column(src_type, doris_column,
data_type);
resolved_type = converter->get_type();
- if (resolved_column->is_nullable()) {
+ MutableColumnPtr mutable_resolved_column;
+ if (converter->is_consistent()) {
+ resolved_column.reset();
+ mutable_resolved_column = IColumn::mutate(std::move(doris_column));
+ } else {
+ mutable_resolved_column =
IColumn::mutate(std::move(resolved_column));
+ }
+
+ if (mutable_resolved_column->is_nullable()) {
SCOPED_RAW_TIMER(&_statistics.decode_null_map_time);
auto* nullable_column =
-
reinterpret_cast<ColumnNullable*>(resolved_column->assume_mutable().get());
+
reinterpret_cast<ColumnNullable*>(mutable_resolved_column.get());
data_column = nullable_column->get_nested_column_ptr();
-
- NullMap& map_data_column = nullable_column->get_null_map_data();
- auto origin_size = map_data_column.size();
- map_data_column.resize(origin_size + num_values);
- if (cvb->hasNulls) {
- const auto* cvb_nulls = cvb->notNull.data();
- for (int i = 0; i < num_values; ++i) {
- map_data_column[origin_size + i] = !cvb_nulls[i];
- }
- } else {
- memset(map_data_column.data() + origin_size, 0, num_values);
- }
+ fill_orc_null_map(nullable_column, cvb, num_values);
} else {
if (cvb->hasNulls) {
return Status::InternalError("Not nullable column {} has null
values in orc file",
col_name);
}
- data_column = resolved_column->assume_mutable();
+ data_column = std::move(mutable_resolved_column);
}
RETURN_IF_ERROR(_fill_doris_data_column<is_filter>(
col_name, data_column, remove_nullable(resolved_type),
root_node, orc_column_type,
cvb, num_values));
- // resolve schema change
+
+ if (mutable_resolved_column) {
+ data_column.reset();
+ resolved_column = std::move(mutable_resolved_column);
+ } else {
+ resolved_column = std::move(data_column);
+ }
+
+ if (converter->is_consistent()) {
+ doris_column = std::move(resolved_column);
+ return Status::OK();
+ }
+
+ doris_column = IColumn::mutate(std::move(doris_column));
auto converted_column = doris_column->assume_mutable();
+ if (converted_column->is_nullable()) {
+ align_orc_null_map(resolved_column,
+
reinterpret_cast<ColumnNullable*>(converted_column.get()),
Review Comment:
This copies the source null map from offset `0`, but in the non-consistent
nullable converter path the source nullable column was built with the
destination null map and then deep-mutated. If `doris_column` already has rows,
`fill_orc_null_map()` appends the newly-read ORC nulls after those old rows in
`resolved_column`; passing only `resolved_column->size()` here then makes
`align_orc_null_map()` insert the first `new_rows` entries, which are stale old
null flags rather than the newly appended slice. This can corrupt null flags
when reading multiple batches through an ORC schema-change conversion. Please
preserve the destination old row count and copy from that offset, or avoid
cloning a source nullable wrapper that contains the destination's existing null
map.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]