R-JunmingChen commented on code in PR #37100:
URL: https://github.com/apache/arrow/pull/37100#discussion_r1364759407
##########
cpp/src/arrow/compute/kernels/aggregate_test.cc:
##########
@@ -2175,6 +2175,244 @@ TEST(TestFixedSizeBinaryMinMaxKernel, Basics) {
EXPECT_THAT(MinMax(ScalarFromJSON(ty, R"("aa")"), options),
ResultWith(null));
}
+TEST(TestDictionaryMinMaxKernel, DecimalsValue) {
+ ScalarAggregateOptions options;
+ std::shared_ptr<arrow::DataType> dict_ty;
+ std::shared_ptr<arrow::DataType> ty;
+ for (const auto& index_type : all_dictionary_index_types()) {
+ ARROW_SCOPED_TRACE("index_type = ", index_type->ToString());
+
+ for (const auto& value_ty_ : {decimal128(5, 2), decimal256(5, 2)}) {
+ dict_ty = dictionary(index_type, value_ty_);
+ ty = struct_({field("min", value_ty_), field("max", value_ty_)});
+
+ auto chunk1 = DictArrayFromJSON(dict_ty, R"([null, 0])", R"(["5.10"])");
+ auto chunk2 = DictArrayFromJSON(dict_ty, R"([0, 1, 1])", R"(["3.10",
"-1.23"])");
+ ASSERT_OK_AND_ASSIGN(auto chunked, ChunkedArray::Make({chunk1, chunk2}));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([0, 1, 1, 0])", R"(["5.10",
"-1.23"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([3, 1, 1, 4, 0, 2, null])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, 3, null, 1, 4, 3, 0, 2,
null])",
+ R"(["-5.10", "-1.23", "-2.00", "-3.45",
"-4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-5.10", "max":
"-1.23"})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([null, null])",
R"([])"), options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([])", R"([])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([0])", R"(["1.00"])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "1.00", "max":
"1.00"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/false);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, 3, 1, 1, 4, 0, 2,
null])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([3, 1, 1, 4, 0, 2])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, null, null])", R"([])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([0])", R"(["1.00"])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "1.00", "max":
"1.00"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/5);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([0, 1, 1, 0])", R"(["5.10",
"-1.23"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, 3, 1, 1, 4, 0, 2, null,
null])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, null, null])", R"([])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([0])", R"(["1.00"])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "1.00", "max":
"1.00"})")));
+
+ // compact dictionary
+ EXPECT_THAT(
+ MinMax(
+ DictArrayFromJSON(
+ dict_ty, R"([3, 1, 1, 4, 0, 2])",
+ R"(["5.10", "-1.23", "2.00", "3.45", "4.56", "8.20", "9.20",
"10.20"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(
+ DictArrayFromJSON(
+ dict_ty, R"([5, 1, 1, 6, 0, 2])",
+ R"(["5.10", "-1.23", "2.00", "3.45", "4.56", "8.20", "9.20",
"10.20"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"9.20"})")));
+ }
Review Comment:
Ok, I will add comments and more test cases when code changes are completed
##########
cpp/src/arrow/compute/kernels/aggregate_test.cc:
##########
@@ -2175,6 +2175,244 @@ TEST(TestFixedSizeBinaryMinMaxKernel, Basics) {
EXPECT_THAT(MinMax(ScalarFromJSON(ty, R"("aa")"), options),
ResultWith(null));
}
+TEST(TestDictionaryMinMaxKernel, DecimalsValue) {
+ ScalarAggregateOptions options;
+ std::shared_ptr<arrow::DataType> dict_ty;
+ std::shared_ptr<arrow::DataType> ty;
+ for (const auto& index_type : all_dictionary_index_types()) {
+ ARROW_SCOPED_TRACE("index_type = ", index_type->ToString());
+
+ for (const auto& value_ty_ : {decimal128(5, 2), decimal256(5, 2)}) {
+ dict_ty = dictionary(index_type, value_ty_);
+ ty = struct_({field("min", value_ty_), field("max", value_ty_)});
+
+ auto chunk1 = DictArrayFromJSON(dict_ty, R"([null, 0])", R"(["5.10"])");
+ auto chunk2 = DictArrayFromJSON(dict_ty, R"([0, 1, 1])", R"(["3.10",
"-1.23"])");
+ ASSERT_OK_AND_ASSIGN(auto chunked, ChunkedArray::Make({chunk1, chunk2}));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([0, 1, 1, 0])", R"(["5.10",
"-1.23"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([3, 1, 1, 4, 0, 2, null])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, 3, null, 1, 4, 3, 0, 2,
null])",
+ R"(["-5.10", "-1.23", "-2.00", "-3.45",
"-4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-5.10", "max":
"-1.23"})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([null, null])",
R"([])"), options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([])", R"([])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([0])", R"(["1.00"])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "1.00", "max":
"1.00"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/false);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, 3, 1, 1, 4, 0, 2,
null])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([3, 1, 1, 4, 0, 2])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, null, null])", R"([])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([0])", R"(["1.00"])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "1.00", "max":
"1.00"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/5);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max":
null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([0, 1, 1, 0])", R"(["5.10",
"-1.23"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, 3, 1, 1, 4, 0, 2, null,
null])",
+ R"(["5.10", "-1.23", "2.00", "3.45",
"4.56"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+
+ options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
+ EXPECT_THAT(MinMax(chunked, options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(DictArrayFromJSON(dict_ty, R"([null, null, null])", R"([])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": null, "max": null})")));
+ EXPECT_THAT(MinMax(DictArrayFromJSON(dict_ty, R"([0])", R"(["1.00"])"),
options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "1.00", "max":
"1.00"})")));
+
+ // compact dictionary
+ EXPECT_THAT(
+ MinMax(
+ DictArrayFromJSON(
+ dict_ty, R"([3, 1, 1, 4, 0, 2])",
+ R"(["5.10", "-1.23", "2.00", "3.45", "4.56", "8.20", "9.20",
"10.20"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"5.10"})")));
+ EXPECT_THAT(
+ MinMax(
+ DictArrayFromJSON(
+ dict_ty, R"([5, 1, 1, 6, 0, 2])",
+ R"(["5.10", "-1.23", "2.00", "3.45", "4.56", "8.20", "9.20",
"10.20"])"),
+ options),
+ ResultWith(ScalarFromJSON(ty, R"({"min": "-1.23", "max":
"9.20"})")));
+ }
Review Comment:
Sure, I will add comments and more test cases when code changes are completed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]