notashes commented on code in PR #20182:
URL: https://github.com/apache/datafusion/pull/20182#discussion_r2778165967


##########
datafusion/common/benches/with_hashes.rs:
##########
@@ -205,5 +222,123 @@ where
     Arc::new(array)
 }
 
+/// Create a StructArray with multiple columns
+fn create_struct_array(array_len: usize) -> ArrayRef {
+    let mut rng = make_rng();
+
+    // Create 4 columns of different types for our struct array
+    let bool_array: ArrayRef = Arc::new(
+        (0..array_len)
+            .map(|_| Some(rng.random::<bool>()))
+            .collect::<arrow::array::BooleanArray>(),
+    );
+
+    let int32_array: ArrayRef = Arc::new(
+        (0..array_len)
+            .map(|_| Some(rng.random::<i32>()))
+            .collect::<PrimitiveArray<Int32Type>>(),
+    );
+
+    let int64_array: ArrayRef = Arc::new(
+        (0..array_len)
+            .map(|_| Some(rng.random::<i64>()))
+            .collect::<PrimitiveArray<Int64Type>>(),
+    );
+
+    let string_array: ArrayRef = {
+        let strings: Vec<String> = (0..array_len)
+            .map(|_| {
+                let len = rng.random_range(5..20);
+                let value: Vec<u8> =
+                    rng.clone().sample_iter(&Alphanumeric).take(len).collect();
+                String::from_utf8(value).unwrap()
+            })
+            .collect();
+        Arc::new(StringArray::from(strings))
+    };
+
+    let fields = Fields::from(vec![
+        Field::new("bool_col", DataType::Boolean, false),
+        Field::new("int32_col", DataType::Int32, false),
+        Field::new("int64_col", DataType::Int64, false),
+        Field::new("string_col", DataType::Utf8, false),
+    ]);
+
+    Arc::new(StructArray::new(
+        fields,
+        vec![bool_array, int32_array, int64_array, string_array],
+        None,
+    ))
+}
+
+/// Create a RunArray to test run array hashing.
+fn create_run_array<T>(array_len: usize) -> ArrayRef
+where
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    let mut rng = make_rng();
+
+    // Create runs of varying lengths
+    let mut run_ends = Vec::new();
+    let mut values = Vec::new();
+    let mut current_end = 0;
+
+    while current_end < array_len {
+        // Random run length between 1 and 50
+        let run_length = rng.random_range(1..=50).min(array_len - current_end);
+        current_end += run_length;
+        run_ends.push(current_end as i32);
+        values.push(Some(rng.random::<T::Native>()));
+    }
+
+    let run_ends_array = Arc::new(PrimitiveArray::<Int32Type>::from(run_ends));
+    let values_array: Arc<dyn Array> =
+        Arc::new(values.into_iter().collect::<PrimitiveArray<T>>());
+
+    Arc::new(
+        RunArray::try_new(&run_ends_array, values_array.as_ref())
+            .expect("Failed to create RunArray"),
+    )
+}
+
+/// Create a RunArray with null values
+fn create_run_array_with_null_values<T>(array_len: usize) -> ArrayRef

Review Comment:
   gotcha! i tried pattern matching within add_nulls() and extract out the 
values array to insert nulls there. it seems better!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to