korowa commented on code in PR #8020:
URL: https://github.com/apache/arrow-datafusion/pull/8020#discussion_r1385356681


##########
datafusion/physical-plan/src/joins/utils.rs:
##########
@@ -920,71 +920,99 @@ pub(crate) fn append_right_indices(
     }
 }
 
-/// Get unmatched and deduplicated indices
+/// Get unmatched and deduplicated indices for specified range of indices
 pub(crate) fn get_anti_indices(
-    row_count: usize,
+    range: Range<usize>,
     input_indices: &UInt32Array,
 ) -> UInt32Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
+    let mut bitmap = BooleanBufferBuilder::new(range.len());
+    bitmap.append_n(range.len(), false);
+    input_indices
+        .iter()
+        .flatten()
+        .map(|v| v as usize)
+        .filter(|v| range.contains(v))
+        .for_each(|v| {
+            bitmap.set_bit(v - range.start, true);
+        });
+
+    let offset = range.start;
 
     // get the anti index
-    (0..row_count)
-        .filter_map(|idx| (!bitmap.get_bit(idx)).then_some(idx as u32))
+    (range)
+        .filter_map(|idx| (!bitmap.get_bit(idx - offset)).then_some(idx as 
u32))
         .collect::<UInt32Array>()
 }
 
 /// Get unmatched and deduplicated indices
 pub(crate) fn get_anti_u64_indices(
-    row_count: usize,
+    range: Range<usize>,
     input_indices: &UInt64Array,
 ) -> UInt64Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
+    let mut bitmap = BooleanBufferBuilder::new(range.len());
+    bitmap.append_n(range.len(), false);
+    input_indices
+        .iter()
+        .flatten()
+        .map(|v| v as usize)
+        .filter(|v| range.contains(v))
+        .for_each(|v| {
+            bitmap.set_bit(v - range.start, true);
+        });
+
+    let offset = range.start;
 
     // get the anti index
-    (0..row_count)
-        .filter_map(|idx| (!bitmap.get_bit(idx)).then_some(idx as u64))
+    (range)
+        .filter_map(|idx| (!bitmap.get_bit(idx - offset)).then_some(idx as 
u64))
         .collect::<UInt64Array>()
 }
 
-/// Get matched and deduplicated indices
+/// Get matched and deduplicated indices for specified range of indices
 pub(crate) fn get_semi_indices(
-    row_count: usize,
+    range: Range<usize>,
     input_indices: &UInt32Array,
 ) -> UInt32Array {
-    let mut bitmap = BooleanBufferBuilder::new(row_count);
-    bitmap.append_n(row_count, false);
-    input_indices.iter().flatten().for_each(|v| {
-        bitmap.set_bit(v as usize, true);
-    });
+    let mut bitmap = BooleanBufferBuilder::new(range.len());
+    bitmap.append_n(range.len(), false);
+    input_indices
+        .iter()
+        .flatten()
+        .map(|v| v as usize)
+        .filter(|v| range.contains(v))
+        .for_each(|v| {
+            bitmap.set_bit(v - range.start, true);
+        });
+
+    let offset = range.start;
 
     // get the semi index
-    (0..row_count)
-        .filter_map(|idx| (bitmap.get_bit(idx)).then_some(idx as u32))
+    (range)
+        .filter_map(|idx| (bitmap.get_bit(idx - offset)).then_some(idx as u32))
         .collect::<UInt32Array>()
 }
 
 /// Get matched and deduplicated indices
 pub(crate) fn get_semi_u64_indices(

Review Comment:
   Same as above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to