github-actions[bot] commented on code in PR #63001:
URL: https://github.com/apache/doris/pull/63001#discussion_r3210117214


##########
be/src/core/column/column_array.cpp:
##########
@@ -98,6 +100,21 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) 
: data(std::move(nest
     offsets = ColumnOffsets::create();
 }
 

Review Comment:
   This shared-column constructor skips the invariants enforced by the mutable 
constructor above: `offsets_column` must actually be `ColumnOffset64`, and when 
offsets are non-empty the nested data size must match `offsets.back()`. Public 
callers such as `ColumnArray::create(const ColumnPtr&, const ColumnPtr&)` now 
route here, so an invalid array can be constructed and later 
`offset_at`/`size_at`, filtering, or serialization can read inconsistent 
offsets. Please keep this path shared, but still perform the same const-only, 
offsets-type, and data-size validation using const access.



##########
be/src/core/column/column_map.cpp:
##########
@@ -518,59 +518,82 @@ void ColumnMap::insert_range_from_ignore_overflow(const 
IColumn& src, size_t sta
 }
 
 ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) 
const {
-    auto k_arr =
-            ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->filter(filt, result_size_hint);
-    auto v_arr =
-            ColumnArray::create(values_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->filter(filt, result_size_hint);
+    // For const filter we must clone subcolumns so the original ColumnMap 
remains intact.
+    // IColumn::mutate(copy) clones if use_count>1, returns self if exclusive.
+    auto offsets_mut = 
IColumn::mutate(static_cast<IColumn::Ptr>(offsets_column));
+    MutableColumnPtr offsets_copy = offsets_mut->clone_empty();
+    offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+    auto k_arr = 
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(keys_column)),
+                                     std::move(offsets_mut))
+                         ->filter(filt, result_size_hint);
+    auto v_arr = 
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(values_column)),
+                                     std::move(offsets_copy))
+                         ->filter(filt, result_size_hint);
     return ColumnMap::create(assert_cast<const 
ColumnArray&>(*k_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*v_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*k_arr).get_offsets_ptr());
 }
 
 size_t ColumnMap::filter(const Filter& filter) {
-    MutableColumnPtr copied_off = offsets_column->clone_empty();
-    copied_off->insert_range_from(*offsets_column, 0, offsets_column->size());
-    ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-            ->filter(filter);
-    ColumnArray::create(values_column->assume_mutable(), 
copied_off->assume_mutable())
-            ->filter(filter);
-    return get_offsets().size();
+    // Move subcolumns out of this ColumnMap to get exclusive ownership, then 
write back.
+    auto keys_mut = 
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(keys_column)));
+    auto offsets_mut = 
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(offsets_column)));
+    auto values_mut = 
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(values_column)));
+    // Clone offsets for values (both keys and values share the same offsets 
structure)
+    MutableColumnPtr copied_off = offsets_mut->clone_empty();
+    copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+    auto k_arr = ColumnArray::create(std::move(keys_mut), 
std::move(offsets_mut));
+    k_arr->filter(filter);
+    auto v_arr = ColumnArray::create(std::move(values_mut), 
std::move(copied_off));
+    v_arr->filter(filter);
+    // Put filtered subcolumns back
+    static_cast<IColumn::Ptr&>(keys_column) = k_arr->get_data_ptr();
+    static_cast<IColumn::Ptr&>(offsets_column) = k_arr->get_offsets_ptr();
+    static_cast<IColumn::Ptr&>(values_column) = v_arr->get_data_ptr();
+    // Use const access to avoid assume_mutable_ref() on the just-written-back 
offsets_column
+    // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of 
scope)
+    return static_cast<const IColumn::Ptr&>(offsets_column)->size();
 }
 
 MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) 
const {
-    // Make a temp column array
-    auto k_arr =
-            ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->permute(perm, limit);
-    auto v_arr =
-            ColumnArray::create(values_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->permute(perm, limit);
+    // Const permute: clone subcolumns so the original ColumnMap remains 
intact.
+    auto offsets_mut = 
IColumn::mutate(static_cast<IColumn::Ptr>(offsets_column));
+    MutableColumnPtr offsets_copy = offsets_mut->clone_empty();
+    offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+    auto k_arr = 
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(keys_column)),
+                                     std::move(offsets_mut))
+                         ->permute(perm, limit);
+    auto v_arr = 
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(values_column)),
+                                     std::move(offsets_copy))
+                         ->permute(perm, limit);
 
     return ColumnMap::create(assert_cast<const 
ColumnArray&>(*k_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*v_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*k_arr).get_offsets_ptr());
 }
 
 Status ColumnMap::deduplicate_keys(bool recursive) {
-    const auto inner_rows = keys_column->size();
-    const auto rows = offsets_column->size();
+    const IColumn& ck = *static_cast<const IColumn::Ptr&>(keys_column);
+    const IColumn& co = *static_cast<const IColumn::Ptr&>(offsets_column);
+    const auto inner_rows = ck.size();
+    const auto rows = co.size();
 
     if (recursive) {
-        auto values_column_ = values_column;
+        const IColumn::Ptr& values_ptr = static_cast<const 
IColumn::Ptr&>(values_column);
+        IColumn::Ptr values_column_ = values_ptr;
         if (values_column_->is_nullable()) {
-            values_column_ = 
(assert_cast<ColumnNullable&>(*values_column)).get_nested_column_ptr();
+            values_column_ =
+                    assert_cast<const 
ColumnNullable&>(*values_column_).get_nested_column_ptr();
         }
 
         if (auto* values_map = 
check_and_get_column<ColumnMap>(values_column_.get())) {
-            RETURN_IF_ERROR(values_map->deduplicate_keys(recursive));
+            
RETURN_IF_ERROR(const_cast<ColumnMap*>(values_map)->deduplicate_keys(recursive));
         }
     }
 

Review Comment:
   This recursive mutation bypasses COW by const-casting the nested `ColumnMap` 
reached through `values_column`. If the map value subcolumn is shared, 
`deduplicate_keys(true)` can move/filter the nested map's subcolumns in place 
and mutate all aliases of that nested map. This is distinct from the top-level 
filtering below, which correctly detaches with 
`IColumn::mutate(std::move(...))`; the recursive value path should similarly 
detach the nullable/value subcolumn and write the deduplicated nested map back 
into `values_column` before continuing.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to