github-actions[bot] commented on code in PR #63001:
URL: https://github.com/apache/doris/pull/63001#discussion_r3199492287


##########
be/src/format/orc/vorc_reader.cpp:
##########
@@ -2172,45 +2209,61 @@ Status OrcReader::_orc_column_to_doris_column(
         resolved_column = converter->get_column(src_type, doris_column, 
data_type);
         resolved_type = converter->get_type();
 
-        if (resolved_column->is_nullable()) {
+        MutableColumnPtr mutable_resolved_column;
+        if (converter->is_consistent()) {
+            resolved_column.reset();
+            mutable_resolved_column = IColumn::mutate(std::move(doris_column));
+        } else {
+            mutable_resolved_column = 
IColumn::mutate(std::move(resolved_column));
+        }
+
+        if (mutable_resolved_column->is_nullable()) {
             SCOPED_RAW_TIMER(&_statistics.decode_null_map_time);
             auto* nullable_column =
-                    
reinterpret_cast<ColumnNullable*>(resolved_column->assume_mutable().get());
+                    
reinterpret_cast<ColumnNullable*>(mutable_resolved_column.get());
             data_column = nullable_column->get_nested_column_ptr();
-
-            NullMap& map_data_column = nullable_column->get_null_map_data();
-            auto origin_size = map_data_column.size();
-            map_data_column.resize(origin_size + num_values);
-            if (cvb->hasNulls) {
-                const auto* cvb_nulls = cvb->notNull.data();
-                for (int i = 0; i < num_values; ++i) {
-                    map_data_column[origin_size + i] = !cvb_nulls[i];
-                }
-            } else {
-                memset(map_data_column.data() + origin_size, 0, num_values);
-            }
+            fill_orc_null_map(nullable_column, cvb, num_values);
         } else {
             if (cvb->hasNulls) {
                 return Status::InternalError("Not nullable column {} has null 
values in orc file",
                                              col_name);
             }
-            data_column = resolved_column->assume_mutable();
+            data_column = std::move(mutable_resolved_column);
         }
 
         RETURN_IF_ERROR(_fill_doris_data_column<is_filter>(
                 col_name, data_column, remove_nullable(resolved_type), 
root_node, orc_column_type,
                 cvb, num_values));
-        // resolve schema change
+
+        if (mutable_resolved_column) {
+            data_column.reset();
+            resolved_column = std::move(mutable_resolved_column);
+        } else {
+            resolved_column = std::move(data_column);
+        }
+
+        if (converter->is_consistent()) {
+            doris_column = std::move(resolved_column);
+            return Status::OK();
+        }
+
+        doris_column = IColumn::mutate(std::move(doris_column));
         auto converted_column = doris_column->assume_mutable();
+        if (converted_column->is_nullable()) {
+            align_orc_null_map(resolved_column,
+                               
reinterpret_cast<ColumnNullable*>(converted_column.get()),

Review Comment:
   This copies the source null map from offset `0`, but in the non-consistent 
nullable converter path the source nullable column was built with the 
destination null map and then deep-mutated. If `doris_column` already has rows, 
`fill_orc_null_map()` appends the newly-read ORC nulls after those old rows in 
`resolved_column`; passing only `resolved_column->size()` here then makes 
`align_orc_null_map()` insert the first `new_rows` entries, which are stale old 
null flags rather than the newly appended slice. This can corrupt null flags 
when reading multiple batches through an ORC schema-change conversion. Please 
preserve the destination old row count and copy from that offset, or avoid 
cloning a source nullable wrapper that contains the destination's existing null 
map.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to