lidavidm commented on code in PR #14352:
URL: https://github.com/apache/arrow/pull/14352#discussion_r1016640128
##########
cpp/src/arrow/compute/row/grouper.h:
##########
@@ -30,6 +30,40 @@
namespace arrow {
namespace compute {
+struct ARROW_EXPORT GroupingSegment {
+ int64_t offset;
+ int64_t length;
+ bool is_open;
+};
+
+inline bool operator==(const GroupingSegment& segment1, const GroupingSegment&
segment2) {
+ return segment1.offset == segment2.offset && segment1.length ==
segment2.length &&
+ segment1.is_open == segment2.is_open;
+}
+inline bool operator!=(const GroupingSegment& segment1, const GroupingSegment&
segment2) {
+ return !(segment1 == segment2);
+}
+
+class ARROW_EXPORT GroupingSegmenter {
Review Comment:
nit: docstring?
##########
cpp/src/arrow/compute/kernels/hash_aggregate_test.cc:
##########
@@ -253,17 +300,23 @@ Result<Datum> GroupByTest(const std::vector<Datum>&
arguments,
idx = idx + 1;
}
if (use_exec_plan) {
- return GroupByUsingExecPlan(arguments, keys, internal_aggregates,
use_threads,
- small_chunksize_context(use_threads));
+ return GroupByUsingExecPlan(arguments, keys, segment_keys,
internal_aggregates,
+ use_threads,
small_chunksize_context(use_threads));
} else {
- return internal::GroupBy(arguments, keys, internal_aggregates, use_threads,
- default_exec_context());
+ return AlternatorGroupBy(arguments, keys, segment_keys,
internal_aggregates,
+ use_threads, default_exec_context());
}
}
-} // namespace
+Result<Datum> GroupByTest(const std::vector<Datum>& arguments,
+ const std::vector<Datum>& keys,
+ const std::vector<TestAggregate>& aggregates, bool
use_threads,
+ bool use_exec_plan = false) {
+ return GroupByTest(arguments, keys, {}, aggregates, use_threads,
use_exec_plan);
+}
-TEST(Grouper, SupportedKeys) {
+template <typename GroupClass>
+void test_group_class_supported_keys() {
Review Comment:
nit: please follow CamelCase naming convention for functions
##########
cpp/src/arrow/compute/exec/aggregate.h:
##########
@@ -37,6 +37,7 @@ namespace internal {
/// and use an aggregate node.
Review Comment:
Hmm, yeah, plus "internal use only" things should go in an `_internal.h`
header. Maybe this is more like, "this is exposed for use by PyArrow but we
make no guarantees for use outside this codebase"
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]