This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fc87fd75d6 GH-40357: [C++] Add benchmark for ToTensor conversions
(#40358)
fc87fd75d6 is described below
commit fc87fd75d6602562e64abf8744890332e35f979e
Author: Alenka Frim <[email protected]>
AuthorDate: Tue Mar 26 08:59:50 2024 +0100
GH-40357: [C++] Add benchmark for ToTensor conversions (#40358)
### Rationale for this change
We should add benchmarks to be sure not to cause regressions while working
on additional implementations of `RecordBatch::ToTensor` and `Table::ToTensor`.
### What changes are included in this PR?
New `cpp/src/arrow/to_tensor_benchmark.cc file`.
* GitHub Issue: #40357
Lead-authored-by: AlenkaF <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
cpp/src/arrow/CMakeLists.txt | 1 +
cpp/src/arrow/tensor_benchmark.cc | 68 +++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 3d1b621db0..4bf1008af4 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -1175,6 +1175,7 @@ add_arrow_benchmark(builder_benchmark)
add_arrow_benchmark(compare_benchmark)
add_arrow_benchmark(memory_pool_benchmark)
add_arrow_benchmark(type_benchmark)
+add_arrow_benchmark(tensor_benchmark)
#
# Recurse into sub-directories
diff --git a/cpp/src/arrow/tensor_benchmark.cc
b/cpp/src/arrow/tensor_benchmark.cc
new file mode 100644
index 0000000000..91a9270ef3
--- /dev/null
+++ b/cpp/src/arrow/tensor_benchmark.cc
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+
+template <typename ValueType>
+static void BatchToTensorSimple(benchmark::State& state) {
+ using CType = typename ValueType::c_type;
+ std::shared_ptr<DataType> ty = TypeTraits<ValueType>::type_singleton();
+
+ const int64_t num_cols = state.range(1);
+ const int64_t num_rows = state.range(0) / num_cols / sizeof(CType);
+ arrow::random::RandomArrayGenerator gen_{42};
+
+ std::vector<std::shared_ptr<Field>> fields = {};
+ std::vector<std::shared_ptr<Array>> columns = {};
+
+ for (int64_t i = 0; i < num_cols; ++i) {
+ fields.push_back(field("f" + std::to_string(i), ty));
+ columns.push_back(gen_.ArrayOf(ty, num_rows));
+ }
+ auto schema = std::make_shared<Schema>(std::move(fields));
+ auto batch = RecordBatch::Make(schema, num_rows, columns);
+
+ for (auto _ : state) {
+ ASSERT_OK_AND_ASSIGN(auto tensor, batch->ToTensor());
+ }
+ state.SetItemsProcessed(state.iterations() * num_rows * num_cols);
+ state.SetBytesProcessed(state.iterations() * ty->byte_width() * num_rows *
num_cols);
+}
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+ for (int64_t size : {kL1Size, kL2Size}) {
+ for (int64_t num_columns : {3, 30, 300}) {
+ bench->Args({size, num_columns});
+ bench->ArgNames({"size", "num_columns"});
+ }
+ }
+}
+
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int8Type)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int16Type)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int32Type)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int64Type)->Apply(SetArgs);
+
+} // namespace arrow