Dandandan commented on code in PR #12531:
URL: https://github.com/apache/datafusion/pull/12531#discussion_r1766819636


##########
datafusion/physical-plan/src/joins/nested_loop_join.rs:
##########
@@ -458,19 +457,47 @@ struct NestedLoopJoinStream {
     join_metrics: BuildProbeJoinMetrics,
 }
 
+/// Creates a Cartesian product of two input batches, preserving the order of 
the right batch,
+/// and applying a join filter if provided.
+///
+/// # Example
+/// Input:
+/// left = [0, 1], right = [0, 1, 2]
+///
+/// Output:
+/// left_indices = [0, 1, 0, 1, 0, 1], right_indices = [0, 0, 1, 1, 2, 2]
+///
+/// Input:
+/// left = [0, 1, 2], right = [0, 1, 2, 3], filter = left.a != right.a
+///
+/// Output:
+/// left_indices = [1, 2, 0, 2, 0, 1, 0, 1, 2], right_indices = [0, 0, 1, 1, 
2, 2, 3, 3, 3]
 fn build_join_indices(
-    right_row_index: usize,
     left_batch: &RecordBatch,
     right_batch: &RecordBatch,
     filter: Option<&JoinFilter>,
 ) -> Result<(UInt64Array, UInt32Array)> {
-    // left indices: [0, 1, 2, 3, 4, ..., left_row_count]
-    // right indices: [right_index, right_index, ..., right_index]
-
     let left_row_count = left_batch.num_rows();
-    let left_indices = UInt64Array::from_iter_values(0..(left_row_count as 
u64));
-    let right_indices = UInt32Array::from(vec![right_row_index as u32; 
left_row_count]);
-    // in the nested loop join, the filter can contain non-equal and equal 
condition.
+    let right_row_count = right_batch.num_rows();
+
+    // Calculate the capacity of the output array to avoid reallocations
+    let capacity = left_row_count * right_row_count;
+
+    // Left indices are 0..left_row_count repeated right_row_count times
+    let mut left_indices_builder = UInt64Array::builder(capacity);
+    for _ in 0..right_row_count {
+        left_indices_builder.extend((0..(left_row_count as u64)).map(Some));
+    }
+
+    // Right indices are each right row index repeated left_row_count times
+    let mut right_indices_builder = UInt32Array::builder(capacity);
+    for right_index in 0..right_row_count {
+        right_indices_builder.extend(vec![Some(right_index as u32); 
left_row_count])

Review Comment:
   We should avoid this intermediate `Vec`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to