AdamGS commented on code in PR #9643:
URL: https://github.com/apache/arrow-rs/pull/9643#discussion_r3093295595


##########
arrow-select/src/take.rs:
##########
@@ -624,23 +623,65 @@ where
     OffsetType::Native: OffsetSizeTrait,
     PrimitiveArray<OffsetType>: From<Vec<OffsetType::Native>>,
 {
-    // TODO: Some optimizations can be done here such as if it is
-    // taking the whole list or a contiguous sublist
-    let (list_indices, offsets, null_buf) =
-        take_value_indices_from_list::<IndexType, OffsetType>(values, 
indices)?;
-
-    let taken = take_impl::<OffsetType>(values.values().as_ref(), 
&list_indices)?;
-    let value_offsets = Buffer::from_vec(offsets);
-    // create a new list with taken data and computed null information
+    let list_offsets = values.value_offsets();
+    let child_data = values.values().to_data();
+    let nulls = take_nulls(values.nulls(), indices);
+
+    let mut new_offsets = Vec::with_capacity(indices.len() + 1);
+    new_offsets.push(OffsetType::Native::zero());
+
+    let use_nulls = child_data.null_count() > 0;
+
+    let capacity = child_data
+        .len()
+        .checked_div(values.len())
+        .map(|v| v * indices.len())
+        .unwrap_or_default();
+
+    let mut array_data = MutableArrayData::new(vec![&child_data], use_nulls, 
capacity);
+
+    match nulls.as_ref().filter(|n| n.null_count() > 0) {
+        None => {
+            for index in indices.values() {
+                let ix = index.as_usize();
+                let start = list_offsets[ix].as_usize();
+                let end = list_offsets[ix + 1].as_usize();
+                array_data.extend(0, start, end);
+                
new_offsets.push(OffsetType::Native::from_usize(array_data.len()).unwrap());
+            }
+        }
+        Some(output_nulls) => {
+            new_offsets.resize(indices.len() + 1, OffsetType::Native::zero());
+            let mut last_filled = 0;
+            for i in output_nulls.valid_indices() {
+                let current = 
OffsetType::Native::from_usize(array_data.len()).unwrap();
+                if last_filled < i {
+                    new_offsets[last_filled + 1..=i].fill(current);
+                }
+                // SAFETY: `i` comes from validity bitmap over `indices`, so 
in-bounds.

Review Comment:
   I think the right assertion here would be `assert_eq!(output_nulls.len(), 
indices.len());` right? I can also add an assertion at the end that offsets 
have the expected length.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to