This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new bc6b2cda chore: Add criterion benchmarks for casting between integer 
types (#401)
bc6b2cda is described below

commit bc6b2cda3efd2b0c6c48f932ce19da46456bcbd5
Author: Andy Grove <[email protected]>
AuthorDate: Thu May 9 12:04:55 2024 -0600

    chore: Add criterion benchmarks for casting between integer types (#401)
    
    * Add cargo bench for casting between int types
    
    * Update core/benches/cast_from_string.rs
    
    Co-authored-by: comphead <[email protected]>
    
    ---------
    
    Co-authored-by: comphead <[email protected]>
---
 core/Cargo.toml                               |  6 ++-
 core/benches/{cast.rs => cast_from_string.rs} | 34 ++++++++-------
 core/benches/{cast.rs => cast_numeric.rs}     | 60 ++++++++++++---------------
 3 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/core/Cargo.toml b/core/Cargo.toml
index cbca7f62..ac565680 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -119,5 +119,9 @@ name = "row_columnar"
 harness = false
 
 [[bench]]
-name = "cast"
+name = "cast_from_string"
+harness = false
+
+[[bench]]
+name = "cast_numeric"
 harness = false
diff --git a/core/benches/cast.rs b/core/benches/cast_from_string.rs
similarity index 93%
copy from core/benches/cast.rs
copy to core/benches/cast_from_string.rs
index 281fe82e..5bfaebf3 100644
--- a/core/benches/cast.rs
+++ b/core/benches/cast_from_string.rs
@@ -23,19 +23,7 @@ use datafusion_physical_expr::{expressions::Column, 
PhysicalExpr};
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, 
true)]));
-    let mut b = StringBuilder::new();
-    for i in 0..1000 {
-        if i % 10 == 0 {
-            b.append_null();
-        } else if i % 2 == 0 {
-            b.append_value(format!("{}", rand::random::<f64>()));
-        } else {
-            b.append_value(format!("{}", rand::random::<i64>()));
-        }
-    }
-    let array = b.finish();
-    let batch = RecordBatch::try_new(schema.clone(), 
vec![Arc::new(array)]).unwrap();
+    let batch = create_utf8_batch();
     let expr = Arc::new(Column::new("a", 0));
     let timezone = "".to_string();
     let cast_string_to_i8 = Cast::new(
@@ -58,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     );
     let cast_string_to_i64 = Cast::new(expr, DataType::Int64, 
EvalMode::Legacy, timezone);
 
-    let mut group = c.benchmark_group("cast");
+    let mut group = c.benchmark_group("cast_string_to_int");
     group.bench_function("cast_string_to_i8", |b| {
         b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
     });
@@ -73,6 +61,24 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 }
 
+// Create UTF8 batch with strings representing ints, floats, nulls
+fn create_utf8_batch() -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, 
true)]));
+    let mut b = StringBuilder::new();
+    for i in 0..1000 {
+        if i % 10 == 0 {
+            b.append_null();
+        } else if i % 2 == 0 {
+            b.append_value(format!("{}", rand::random::<f64>()));
+        } else {
+            b.append_value(format!("{}", rand::random::<i64>()));
+        }
+    }
+    let array = b.finish();
+    let batch = RecordBatch::try_new(schema.clone(), 
vec![Arc::new(array)]).unwrap();
+    batch
+}
+
 fn config() -> Criterion {
     Criterion::default()
 }
diff --git a/core/benches/cast.rs b/core/benches/cast_numeric.rs
similarity index 62%
rename from core/benches/cast.rs
rename to core/benches/cast_numeric.rs
index 281fe82e..398be694 100644
--- a/core/benches/cast.rs
+++ b/core/benches/cast_numeric.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_array::{builder::StringBuilder, RecordBatch};
+use arrow_array::{builder::Int32Builder, RecordBatch};
 use arrow_schema::{DataType, Field, Schema};
 use comet::execution::datafusion::expressions::cast::{Cast, EvalMode};
 use criterion::{criterion_group, criterion_main, Criterion};
@@ -23,56 +23,50 @@ use datafusion_physical_expr::{expressions::Column, 
PhysicalExpr};
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, 
true)]));
-    let mut b = StringBuilder::new();
-    for i in 0..1000 {
-        if i % 10 == 0 {
-            b.append_null();
-        } else if i % 2 == 0 {
-            b.append_value(format!("{}", rand::random::<f64>()));
-        } else {
-            b.append_value(format!("{}", rand::random::<i64>()));
-        }
-    }
-    let array = b.finish();
-    let batch = RecordBatch::try_new(schema.clone(), 
vec![Arc::new(array)]).unwrap();
+    let batch = create_int32_batch();
     let expr = Arc::new(Column::new("a", 0));
     let timezone = "".to_string();
-    let cast_string_to_i8 = Cast::new(
+    let cast_i32_to_i8 = Cast::new(
         expr.clone(),
         DataType::Int8,
         EvalMode::Legacy,
         timezone.clone(),
     );
-    let cast_string_to_i16 = Cast::new(
+    let cast_i32_to_i16 = Cast::new(
         expr.clone(),
         DataType::Int16,
         EvalMode::Legacy,
         timezone.clone(),
     );
-    let cast_string_to_i32 = Cast::new(
-        expr.clone(),
-        DataType::Int32,
-        EvalMode::Legacy,
-        timezone.clone(),
-    );
-    let cast_string_to_i64 = Cast::new(expr, DataType::Int64, 
EvalMode::Legacy, timezone);
+    let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, 
timezone);
 
-    let mut group = c.benchmark_group("cast");
-    group.bench_function("cast_string_to_i8", |b| {
-        b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
+    let mut group = c.benchmark_group("cast_int_to_int");
+    group.bench_function("cast_i32_to_i8", |b| {
+        b.iter(|| cast_i32_to_i8.evaluate(&batch).unwrap());
     });
-    group.bench_function("cast_string_to_i16", |b| {
-        b.iter(|| cast_string_to_i16.evaluate(&batch).unwrap());
+    group.bench_function("cast_i32_to_i16", |b| {
+        b.iter(|| cast_i32_to_i16.evaluate(&batch).unwrap());
     });
-    group.bench_function("cast_string_to_i32", |b| {
-        b.iter(|| cast_string_to_i32.evaluate(&batch).unwrap());
-    });
-    group.bench_function("cast_string_to_i64", |b| {
-        b.iter(|| cast_string_to_i64.evaluate(&batch).unwrap());
+    group.bench_function("cast_i32_to_i64", |b| {
+        b.iter(|| cast_i32_to_i64.evaluate(&batch).unwrap());
     });
 }
 
+fn create_int32_batch() -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, 
true)]));
+    let mut b = Int32Builder::new();
+    for i in 0..1000 {
+        if i % 10 == 0 {
+            b.append_null();
+        } else {
+            b.append_value(rand::random::<i32>());
+        }
+    }
+    let array = b.finish();
+    let batch = RecordBatch::try_new(schema.clone(), 
vec![Arc::new(array)]).unwrap();
+    batch
+}
+
 fn config() -> Criterion {
     Criterion::default()
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to