(arrow) branch main updated: GH-40357: [C++] Add benchmark for ToTensor conversions (#40358)

jorisvandenbossche Tue, 26 Mar 2024 01:00:05 -0700

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new fc87fd75d6 GH-40357: [C++] Add benchmark for ToTensor conversions 
(#40358)
fc87fd75d6 is described below

commit fc87fd75d6602562e64abf8744890332e35f979e
Author: Alenka Frim <[email protected]>
AuthorDate: Tue Mar 26 08:59:50 2024 +0100

    GH-40357: [C++] Add benchmark for ToTensor conversions (#40358)
    
    ### Rationale for this change
    
    We should add benchmarks to be sure not to cause regressions while working 
on additional implementations of `RecordBatch::ToTensor` and `Table::ToTensor`.
    
    ### What changes are included in this PR?
    
    New `cpp/src/arrow/to_tensor_benchmark.cc file`.
    * GitHub Issue: #40357
    
    Lead-authored-by: AlenkaF <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 cpp/src/arrow/CMakeLists.txt      |  1 +
 cpp/src/arrow/tensor_benchmark.cc | 68 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 3d1b621db0..4bf1008af4 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -1175,6 +1175,7 @@ add_arrow_benchmark(builder_benchmark)
 add_arrow_benchmark(compare_benchmark)
 add_arrow_benchmark(memory_pool_benchmark)
 add_arrow_benchmark(type_benchmark)
+add_arrow_benchmark(tensor_benchmark)
 
 #
 # Recurse into sub-directories
diff --git a/cpp/src/arrow/tensor_benchmark.cc 
b/cpp/src/arrow/tensor_benchmark.cc
new file mode 100644
index 0000000000..91a9270ef3
--- /dev/null
+++ b/cpp/src/arrow/tensor_benchmark.cc
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+
+template <typename ValueType>
+static void BatchToTensorSimple(benchmark::State& state) {
+  using CType = typename ValueType::c_type;
+  std::shared_ptr<DataType> ty = TypeTraits<ValueType>::type_singleton();
+
+  const int64_t num_cols = state.range(1);
+  const int64_t num_rows = state.range(0) / num_cols / sizeof(CType);
+  arrow::random::RandomArrayGenerator gen_{42};
+
+  std::vector<std::shared_ptr<Field>> fields = {};
+  std::vector<std::shared_ptr<Array>> columns = {};
+
+  for (int64_t i = 0; i < num_cols; ++i) {
+    fields.push_back(field("f" + std::to_string(i), ty));
+    columns.push_back(gen_.ArrayOf(ty, num_rows));
+  }
+  auto schema = std::make_shared<Schema>(std::move(fields));
+  auto batch = RecordBatch::Make(schema, num_rows, columns);
+
+  for (auto _ : state) {
+    ASSERT_OK_AND_ASSIGN(auto tensor, batch->ToTensor());
+  }
+  state.SetItemsProcessed(state.iterations() * num_rows * num_cols);
+  state.SetBytesProcessed(state.iterations() * ty->byte_width() * num_rows * 
num_cols);
+}
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+  for (int64_t size : {kL1Size, kL2Size}) {
+    for (int64_t num_columns : {3, 30, 300}) {
+      bench->Args({size, num_columns});
+      bench->ArgNames({"size", "num_columns"});
+    }
+  }
+}
+
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int8Type)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int16Type)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int32Type)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BatchToTensorSimple, Int64Type)->Apply(SetArgs);
+
+}  // namespace arrow

(arrow) branch main updated: GH-40357: [C++] Add benchmark for ToTensor conversions (#40358)

Reply via email to