This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 4fc9ad126ff Benchmark for casting view to dict arrays (and the 
reverse) (#5874)
4fc9ad126ff is described below

commit 4fc9ad126ff26d2ad81d3b35f0c11d262cb104f4
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Wed Jun 12 18:31:06 2024 -0400

    Benchmark for casting view to dict arrays (and the reverse) (#5874)
    
    * add benchmark
    
    * make clippy happy
    
    * move to arrow workspace
---
 arrow/benches/cast_kernels.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 228408e5711..8803e8eea87 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -114,6 +114,17 @@ fn build_decimal256_array(size: usize, precision: u8, 
scale: i8) -> ArrayRef {
     )
 }
 
+fn build_dict_array(size: usize) -> ArrayRef {
+    let values = StringArray::from_iter([
+        Some("small"),
+        Some("larger string more than 12 bytes"),
+        None,
+    ]);
+    let keys = UInt64Array::from_iter((0..size as u64).map(|v| v % 3));
+
+    Arc::new(DictionaryArray::new(keys, Arc::new(values)))
+}
+
 // cast array from specified primitive array type to desired data type
 fn cast_array(array: &ArrayRef, to_type: DataType) {
     criterion::black_box(cast(array, &to_type).unwrap());
@@ -138,6 +149,9 @@ fn add_benchmark(c: &mut Criterion) {
     let decimal128_array = build_decimal128_array(512, 10, 3);
     let decimal256_array = build_decimal256_array(512, 50, 3);
 
+    let dict_array = build_dict_array(10_000);
+    let string_view_array = cast(&dict_array, &DataType::Utf8View).unwrap();
+
     c.bench_function("cast int32 to int32 512", |b| {
         b.iter(|| cast_array(&i32_array, DataType::Int32))
     });
@@ -237,6 +251,17 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("cast decimal256 to decimal256 512 with same scale", |b| {
         b.iter(|| cast_array(&decimal256_array, DataType::Decimal256(60, 3)))
     });
+    c.bench_function("cast dict to string view", |b| {
+        b.iter(|| cast_array(&dict_array, DataType::Utf8View))
+    });
+    c.bench_function("cast string view to dict", |b| {
+        b.iter(|| {
+            cast_array(
+                &string_view_array,
+                DataType::Dictionary(Box::new(DataType::UInt64), 
Box::new(DataType::Utf8)),
+            )
+        })
+    });
 }
 
 criterion_group!(benches, add_benchmark);

Reply via email to