Dandandan commented on a change in pull request #9070:
URL: https://github.com/apache/arrow/pull/9070#discussion_r550959459



##########
File path: rust/datafusion/src/physical_plan/hash_join.rs
##########
@@ -479,37 +490,40 @@ fn build_batch(
 fn build_join_indexes(
     left: &JoinHashMap,
     right: &RecordBatch,
-    join_type: &JoinType,
+    join_type: JoinType,
     right_on: &HashSet<String>,
-) -> Result<Vec<(JoinIndex, RightIndex)>> {
+) -> Result<(UInt64Array, UInt32Array)> {
     let keys_values = right_on
         .iter()
         .map(|name| 
Ok(col(name).evaluate(right)?.into_array(right.num_rows())))
         .collect::<Result<Vec<_>>>()?;
 
     let mut key = Vec::with_capacity(keys_values.len());
 
+    let mut left_indices = UInt64Builder::new(0);
+    let mut right_indices = UInt32Builder::new(0);
+
     match join_type {
         JoinType::Inner => {
-            let mut indexes = Vec::new(); // unknown a prior size
-
             // Visit all of the right rows
             for row in 0..right.num_rows() {
                 // Get the key and find it in the build index
                 create_key(&keys_values, row, &mut key)?;
                 let left_indexes = left.get(&key);
-
                 // for every item on the left and right with this key, add the 
respective pair
-                left_indexes.unwrap_or(&vec![]).iter().for_each(|x| {
-                    // on an inner join, left and right indices are present
-                    indexes.push((Some(*x), Some(row as u32)));
-                })
+
+                if let Some(indices) = left_indexes {
+                    left_indices.append_slice(&indices)?;
+
+                    for _ in 0..indices.len() {
+                        // on an inner join, left and right indices are present
+                        right_indices.append_value(row as u32)?;

Review comment:
       this could use something like fill

##########
File path: rust/datafusion/src/physical_plan/hash_join.rs
##########
@@ -479,37 +490,40 @@ fn build_batch(
 fn build_join_indexes(
     left: &JoinHashMap,
     right: &RecordBatch,
-    join_type: &JoinType,
+    join_type: JoinType,
     right_on: &HashSet<String>,
-) -> Result<Vec<(JoinIndex, RightIndex)>> {
+) -> Result<(UInt64Array, UInt32Array)> {
     let keys_values = right_on
         .iter()
         .map(|name| 
Ok(col(name).evaluate(right)?.into_array(right.num_rows())))
         .collect::<Result<Vec<_>>>()?;
 
     let mut key = Vec::with_capacity(keys_values.len());
 
+    let mut left_indices = UInt64Builder::new(0);
+    let mut right_indices = UInt32Builder::new(0);
+
     match join_type {
         JoinType::Inner => {
-            let mut indexes = Vec::new(); // unknown a prior size
-
             // Visit all of the right rows
             for row in 0..right.num_rows() {
                 // Get the key and find it in the build index
                 create_key(&keys_values, row, &mut key)?;
                 let left_indexes = left.get(&key);
-
                 // for every item on the left and right with this key, add the 
respective pair
-                left_indexes.unwrap_or(&vec![]).iter().for_each(|x| {
-                    // on an inner join, left and right indices are present
-                    indexes.push((Some(*x), Some(row as u32)));
-                })
+
+                if let Some(indices) = left_indexes {
+                    left_indices.append_slice(&indices)?;
+
+                    for _ in 0..indices.len() {
+                        // on an inner join, left and right indices are present
+                        right_indices.append_value(row as u32)?;

Review comment:
       this could use something like append_n




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to