pitrou commented on code in PR #44166:
URL: https://github.com/apache/arrow/pull/44166#discussion_r1766568861
##########
cpp/src/arrow/acero/hash_aggregate_test.cc:
##########
@@ -804,64 +804,143 @@ TEST(RowSegmenter, MultipleSegmentsMultipleBatches) {
namespace {
void TestRowSegmenterConstantBatch(
- std::function<ArgShape(size_t i)> shape_func,
+ const std::shared_ptr<DataType>& type,
+ std::function<ArgShape(int64_t key)> shape_func,
+ std::function<Result<std::shared_ptr<Scalar>>(int64_t key)> value_func,
std::function<Result<std::unique_ptr<RowSegmenter>>(const
std::vector<TypeHolder>&)>
make_segmenter) {
- constexpr size_t n = 3, repetitions = 3;
- std::vector<TypeHolder> types = {int32(), int32(), int32()};
- std::vector<ArgShape> shapes(n);
- for (size_t i = 0; i < n; i++) shapes[i] = shape_func(i);
- auto full_batch = ExecBatchFromJSON(types, shapes, "[[1, 1, 1], [1, 1, 1],
[1, 1, 1]]");
- auto test_by_size = [&](size_t size) -> Status {
- SCOPED_TRACE("constant-batch with " + ToChars(size) + " key(s)");
- std::vector<Datum> values(full_batch.values.begin(),
- full_batch.values.begin() + size);
- ExecBatch batch(values, full_batch.length);
- std::vector<TypeHolder> key_types(types.begin(), types.begin() + size);
+ constexpr int64_t n_keys = 3, n_rows = 3, repetitions = 3;
+ std::vector<TypeHolder> types(n_keys, type);
+ std::vector<Datum> full_values(n_keys);
+ for (int64_t i = 0; i < n_keys; i++) {
+ auto shape = shape_func(i);
+ ASSERT_OK_AND_ASSIGN(auto scalar, value_func(i));
+ if (shape == ArgShape::SCALAR) {
+ full_values[i] = std::move(scalar);
+ } else {
+ ASSERT_OK_AND_ASSIGN(full_values[i], MakeArrayFromScalar(*scalar,
n_rows));
+ }
+ }
+ auto test_with_keys = [&](int64_t keys) -> Status {
+ SCOPED_TRACE("constant-batch with " + ToChars(keys) + " key(s)");
+ std::vector<Datum> values(full_values.begin(), full_values.begin() + keys);
+ ExecBatch batch(values, n_rows);
+ std::vector<TypeHolder> key_types(types.begin(), types.begin() + keys);
ARROW_ASSIGN_OR_RAISE(auto segmenter, make_segmenter(key_types));
- for (size_t i = 0; i < repetitions; i++) {
- TestSegments(segmenter, ExecSpan(batch), {{0, 3, true, true}});
+ for (int64_t i = 0; i < repetitions; i++) {
+ TestSegments(segmenter, ExecSpan(batch), {{0, n_rows, true, true}});
ARROW_RETURN_NOT_OK(segmenter->Reset());
}
return Status::OK();
};
- for (size_t i = 0; i <= 3; i++) {
- ASSERT_OK(test_by_size(i));
+ for (int64_t i = 0; i <= n_keys; i++) {
+ ASSERT_OK(test_with_keys(i));
}
}
} // namespace
TEST(RowSegmenter, ConstantArrayBatch) {
- TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::ARRAY; },
- MakeRowSegmenter);
+ TestRowSegmenterConstantBatch(
+ int32(), [](int64_t key) { return ArgShape::ARRAY; },
+ [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
}
TEST(RowSegmenter, ConstantScalarBatch) {
- TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::SCALAR; },
- MakeRowSegmenter);
+ TestRowSegmenterConstantBatch(
+ int32(), [](int64_t key) { return ArgShape::SCALAR; },
+ [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
}
TEST(RowSegmenter, ConstantMixedBatch) {
TestRowSegmenterConstantBatch(
- [](size_t i) { return i % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
- MakeRowSegmenter);
+ int32(),
+ [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR :
ArgShape::ARRAY; },
+ [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
}
TEST(RowSegmenter, ConstantArrayBatchWithAnyKeysSegmenter) {
- TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::ARRAY; },
- MakeGenericSegmenter);
+ TestRowSegmenterConstantBatch(
+ int32(), [](int64_t key) { return ArgShape::ARRAY; },
+ [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
}
TEST(RowSegmenter, ConstantScalarBatchWithAnyKeysSegmenter) {
- TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::SCALAR; },
- MakeGenericSegmenter);
+ TestRowSegmenterConstantBatch(
+ int32(), [](int64_t key) { return ArgShape::SCALAR; },
+ [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
}
TEST(RowSegmenter, ConstantMixedBatchWithAnyKeysSegmenter) {
TestRowSegmenterConstantBatch(
- [](size_t i) { return i % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
- MakeGenericSegmenter);
+ int32(),
+ [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR :
ArgShape::ARRAY; },
+ [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatch) {
+ constexpr int fsb = 8;
+ auto type = fixed_size_binary(fsb);
+ ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+ TestRowSegmenterConstantBatch(
+ type, [](int64_t key) { return ArgShape::ARRAY; },
+ [&](int64_t key) { return value; }, MakeRowSegmenter);
Review Comment:
Should these additional tests also exercise `MakeGenericSegmenter`? Or is
that covered elsewhere?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]