This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new bc6b2cda chore: Add criterion benchmarks for casting between integer
types (#401)
bc6b2cda is described below
commit bc6b2cda3efd2b0c6c48f932ce19da46456bcbd5
Author: Andy Grove <[email protected]>
AuthorDate: Thu May 9 12:04:55 2024 -0600
chore: Add criterion benchmarks for casting between integer types (#401)
* Add cargo bench for casting between int types
* Update core/benches/cast_from_string.rs
Co-authored-by: comphead <[email protected]>
---------
Co-authored-by: comphead <[email protected]>
---
core/Cargo.toml | 6 ++-
core/benches/{cast.rs => cast_from_string.rs} | 34 ++++++++-------
core/benches/{cast.rs => cast_numeric.rs} | 60 ++++++++++++---------------
3 files changed, 52 insertions(+), 48 deletions(-)
diff --git a/core/Cargo.toml b/core/Cargo.toml
index cbca7f62..ac565680 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -119,5 +119,9 @@ name = "row_columnar"
harness = false
[[bench]]
-name = "cast"
+name = "cast_from_string"
+harness = false
+
+[[bench]]
+name = "cast_numeric"
harness = false
diff --git a/core/benches/cast.rs b/core/benches/cast_from_string.rs
similarity index 93%
copy from core/benches/cast.rs
copy to core/benches/cast_from_string.rs
index 281fe82e..5bfaebf3 100644
--- a/core/benches/cast.rs
+++ b/core/benches/cast_from_string.rs
@@ -23,19 +23,7 @@ use datafusion_physical_expr::{expressions::Column,
PhysicalExpr};
use std::sync::Arc;
fn criterion_benchmark(c: &mut Criterion) {
- let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8,
true)]));
- let mut b = StringBuilder::new();
- for i in 0..1000 {
- if i % 10 == 0 {
- b.append_null();
- } else if i % 2 == 0 {
- b.append_value(format!("{}", rand::random::<f64>()));
- } else {
- b.append_value(format!("{}", rand::random::<i64>()));
- }
- }
- let array = b.finish();
- let batch = RecordBatch::try_new(schema.clone(),
vec![Arc::new(array)]).unwrap();
+ let batch = create_utf8_batch();
let expr = Arc::new(Column::new("a", 0));
let timezone = "".to_string();
let cast_string_to_i8 = Cast::new(
@@ -58,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) {
);
let cast_string_to_i64 = Cast::new(expr, DataType::Int64,
EvalMode::Legacy, timezone);
- let mut group = c.benchmark_group("cast");
+ let mut group = c.benchmark_group("cast_string_to_int");
group.bench_function("cast_string_to_i8", |b| {
b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
});
@@ -73,6 +61,24 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}
+// Create UTF8 batch with strings representing ints, floats, nulls
+fn create_utf8_batch() -> RecordBatch {
+ let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8,
true)]));
+ let mut b = StringBuilder::new();
+ for i in 0..1000 {
+ if i % 10 == 0 {
+ b.append_null();
+ } else if i % 2 == 0 {
+ b.append_value(format!("{}", rand::random::<f64>()));
+ } else {
+ b.append_value(format!("{}", rand::random::<i64>()));
+ }
+ }
+ let array = b.finish();
+ let batch = RecordBatch::try_new(schema.clone(),
vec![Arc::new(array)]).unwrap();
+ batch
+}
+
fn config() -> Criterion {
Criterion::default()
}
diff --git a/core/benches/cast.rs b/core/benches/cast_numeric.rs
similarity index 62%
rename from core/benches/cast.rs
rename to core/benches/cast_numeric.rs
index 281fe82e..398be694 100644
--- a/core/benches/cast.rs
+++ b/core/benches/cast_numeric.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow_array::{builder::StringBuilder, RecordBatch};
+use arrow_array::{builder::Int32Builder, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use comet::execution::datafusion::expressions::cast::{Cast, EvalMode};
use criterion::{criterion_group, criterion_main, Criterion};
@@ -23,56 +23,50 @@ use datafusion_physical_expr::{expressions::Column,
PhysicalExpr};
use std::sync::Arc;
fn criterion_benchmark(c: &mut Criterion) {
- let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8,
true)]));
- let mut b = StringBuilder::new();
- for i in 0..1000 {
- if i % 10 == 0 {
- b.append_null();
- } else if i % 2 == 0 {
- b.append_value(format!("{}", rand::random::<f64>()));
- } else {
- b.append_value(format!("{}", rand::random::<i64>()));
- }
- }
- let array = b.finish();
- let batch = RecordBatch::try_new(schema.clone(),
vec![Arc::new(array)]).unwrap();
+ let batch = create_int32_batch();
let expr = Arc::new(Column::new("a", 0));
let timezone = "".to_string();
- let cast_string_to_i8 = Cast::new(
+ let cast_i32_to_i8 = Cast::new(
expr.clone(),
DataType::Int8,
EvalMode::Legacy,
timezone.clone(),
);
- let cast_string_to_i16 = Cast::new(
+ let cast_i32_to_i16 = Cast::new(
expr.clone(),
DataType::Int16,
EvalMode::Legacy,
timezone.clone(),
);
- let cast_string_to_i32 = Cast::new(
- expr.clone(),
- DataType::Int32,
- EvalMode::Legacy,
- timezone.clone(),
- );
- let cast_string_to_i64 = Cast::new(expr, DataType::Int64,
EvalMode::Legacy, timezone);
+ let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy,
timezone);
- let mut group = c.benchmark_group("cast");
- group.bench_function("cast_string_to_i8", |b| {
- b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap());
+ let mut group = c.benchmark_group("cast_int_to_int");
+ group.bench_function("cast_i32_to_i8", |b| {
+ b.iter(|| cast_i32_to_i8.evaluate(&batch).unwrap());
});
- group.bench_function("cast_string_to_i16", |b| {
- b.iter(|| cast_string_to_i16.evaluate(&batch).unwrap());
+ group.bench_function("cast_i32_to_i16", |b| {
+ b.iter(|| cast_i32_to_i16.evaluate(&batch).unwrap());
});
- group.bench_function("cast_string_to_i32", |b| {
- b.iter(|| cast_string_to_i32.evaluate(&batch).unwrap());
- });
- group.bench_function("cast_string_to_i64", |b| {
- b.iter(|| cast_string_to_i64.evaluate(&batch).unwrap());
+ group.bench_function("cast_i32_to_i64", |b| {
+ b.iter(|| cast_i32_to_i64.evaluate(&batch).unwrap());
});
}
+fn create_int32_batch() -> RecordBatch {
+ let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32,
true)]));
+ let mut b = Int32Builder::new();
+ for i in 0..1000 {
+ if i % 10 == 0 {
+ b.append_null();
+ } else {
+ b.append_value(rand::random::<i32>());
+ }
+ }
+ let array = b.finish();
+ let batch = RecordBatch::try_new(schema.clone(),
vec![Arc::new(array)]).unwrap();
+ batch
+}
+
fn config() -> Criterion {
Criterion::default()
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]