zanmato1984 commented on code in PR #44053:
URL: https://github.com/apache/arrow/pull/44053#discussion_r1756557143
##########
cpp/src/arrow/acero/aggregate_benchmark.cc:
##########
@@ -866,5 +890,47 @@
BENCHMARK(TDigestKernelDoubleMedian)->Apply(QuantileKernelArgs);
BENCHMARK(TDigestKernelDoubleDeciles)->Apply(QuantileKernelArgs);
BENCHMARK(TDigestKernelDoubleCentiles)->Apply(QuantileKernelArgs);
+//
+// RowSegmenter
+//
+
+template <typename... Args>
+static void BenchmarkRowSegmenter(benchmark::State& state, Args&&...) {
Review Comment:
Updated.
##########
cpp/src/arrow/acero/aggregate_benchmark.cc:
##########
@@ -866,5 +890,47 @@
BENCHMARK(TDigestKernelDoubleMedian)->Apply(QuantileKernelArgs);
BENCHMARK(TDigestKernelDoubleDeciles)->Apply(QuantileKernelArgs);
BENCHMARK(TDigestKernelDoubleCentiles)->Apply(QuantileKernelArgs);
+//
+// RowSegmenter
+//
+
+template <typename... Args>
+static void BenchmarkRowSegmenter(benchmark::State& state, Args&&...) {
+ int64_t num_rows = state.range(0);
+ int64_t num_segments = state.range(1);
+ ASSERT_NE(num_segments, 0);
+ ASSERT_GE(num_rows, num_segments);
+ int64_t num_segment_keys = state.range(2);
+ // Adjust num_rows to be a multiple of num_segments.
+ num_rows = num_rows / num_segments * num_segments;
+
+ // A trivial column to count from.
+ auto arg = ConstantArrayGenerator::Zeroes(num_rows, int64());
+ // num_segments segments, each having identical num_rows / num_segments rows
of the
+ // associated segment id.
+ ArrayVector segments(num_segments);
+ for (int i = 0; i < num_segments; ++i) {
+ ASSERT_OK_AND_ASSIGN(
+ segments[i],
+ Constant(std::make_shared<Int64Scalar>(i))->Generate(num_rows /
num_segments));
+ }
+ // Concat all segments to form the segment key.
+ ASSERT_OK_AND_ASSIGN(auto segment_key, Concatenate(segments));
+ // num_segment_keys copies of the segment key.
+ ArrayVector segment_keys(num_segment_keys, segment_key);
Review Comment:
Done.
##########
cpp/src/arrow/acero/aggregate_benchmark.cc:
##########
@@ -866,5 +890,47 @@
BENCHMARK(TDigestKernelDoubleMedian)->Apply(QuantileKernelArgs);
BENCHMARK(TDigestKernelDoubleDeciles)->Apply(QuantileKernelArgs);
BENCHMARK(TDigestKernelDoubleCentiles)->Apply(QuantileKernelArgs);
+//
+// RowSegmenter
+//
+
+template <typename... Args>
+static void BenchmarkRowSegmenter(benchmark::State& state, Args&&...) {
+ int64_t num_rows = state.range(0);
+ int64_t num_segments = state.range(1);
+ ASSERT_NE(num_segments, 0);
+ ASSERT_GE(num_rows, num_segments);
+ int64_t num_segment_keys = state.range(2);
+ // Adjust num_rows to be a multiple of num_segments.
+ num_rows = num_rows / num_segments * num_segments;
+
+ // A trivial column to count from.
+ auto arg = ConstantArrayGenerator::Zeroes(num_rows, int64());
+ // num_segments segments, each having identical num_rows / num_segments rows
of the
+ // associated segment id.
+ ArrayVector segments(num_segments);
+ for (int i = 0; i < num_segments; ++i) {
+ ASSERT_OK_AND_ASSIGN(
+ segments[i],
+ Constant(std::make_shared<Int64Scalar>(i))->Generate(num_rows /
num_segments));
+ }
+ // Concat all segments to form the segment key.
+ ASSERT_OK_AND_ASSIGN(auto segment_key, Concatenate(segments));
+ // num_segment_keys copies of the segment key.
+ ArrayVector segment_keys(num_segment_keys, segment_key);
+
+ BenchmarkGroupBy(state, {{"count", ""}}, {arg}, /*keys=*/{}, segment_keys);
+
+ state.SetItemsProcessed(num_rows * state.iterations());
+}
+
+std::vector<std::string> row_segmenter_argnames = {"Rows", "Segments",
"SegmentKeys"};
+std::vector<std::vector<int64_t>> row_segmenter_args = {
+ {32 * 1024}, benchmark::CreateRange(1, 256, 4),
benchmark::CreateDenseRange(0, 3, 1)};
+
+BENCHMARK(BenchmarkRowSegmenter)
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]