This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4fc9ad126ff Benchmark for casting view to dict arrays (and the
reverse) (#5874)
4fc9ad126ff is described below
commit 4fc9ad126ff26d2ad81d3b35f0c11d262cb104f4
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Wed Jun 12 18:31:06 2024 -0400
Benchmark for casting view to dict arrays (and the reverse) (#5874)
* add benchmark
* make clippy happy
* move to arrow workspace
---
arrow/benches/cast_kernels.rs | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 228408e5711..8803e8eea87 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -114,6 +114,17 @@ fn build_decimal256_array(size: usize, precision: u8,
scale: i8) -> ArrayRef {
)
}
+fn build_dict_array(size: usize) -> ArrayRef {
+ let values = StringArray::from_iter([
+ Some("small"),
+ Some("larger string more than 12 bytes"),
+ None,
+ ]);
+ let keys = UInt64Array::from_iter((0..size as u64).map(|v| v % 3));
+
+ Arc::new(DictionaryArray::new(keys, Arc::new(values)))
+}
+
// cast array from specified primitive array type to desired data type
fn cast_array(array: &ArrayRef, to_type: DataType) {
criterion::black_box(cast(array, &to_type).unwrap());
@@ -138,6 +149,9 @@ fn add_benchmark(c: &mut Criterion) {
let decimal128_array = build_decimal128_array(512, 10, 3);
let decimal256_array = build_decimal256_array(512, 50, 3);
+ let dict_array = build_dict_array(10_000);
+ let string_view_array = cast(&dict_array, &DataType::Utf8View).unwrap();
+
c.bench_function("cast int32 to int32 512", |b| {
b.iter(|| cast_array(&i32_array, DataType::Int32))
});
@@ -237,6 +251,17 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function("cast decimal256 to decimal256 512 with same scale", |b| {
b.iter(|| cast_array(&decimal256_array, DataType::Decimal256(60, 3)))
});
+ c.bench_function("cast dict to string view", |b| {
+ b.iter(|| cast_array(&dict_array, DataType::Utf8View))
+ });
+ c.bench_function("cast string view to dict", |b| {
+ b.iter(|| {
+ cast_array(
+ &string_view_array,
+ DataType::Dictionary(Box::new(DataType::UInt64),
Box::new(DataType::Utf8)),
+ )
+ })
+ });
}
criterion_group!(benches, add_benchmark);