pitrou commented on code in PR #49937:
URL: https://github.com/apache/arrow/pull/49937#discussion_r3234520970
##########
cpp/src/arrow/array/array_dict_test.cc:
##########
@@ -1761,4 +1761,81 @@ TEST(TestDictionaryUnifier, TableZeroColumns) {
AssertTablesEqual(*table, *unified);
}
+// GH-49689: Ordered dictionary tests
+
+TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_TRUE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) {
+ auto unordered_type = dictionary(int8(), utf8(), /*ordered=*/false);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type,
&boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_FALSE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder));
+ auto& builder = checked_cast<DictionaryBuilder<StringType>&>(*boxed_builder);
+
+ ASSERT_OK(builder.Append("a"));
+ ASSERT_OK(builder.Append("b"));
+ ASSERT_OK(builder.Append("a"));
+
+ std::shared_ptr<Array> result;
+ ASSERT_OK(builder.Finish(&result));
+
+ const auto& result_type = checked_cast<const
DictionaryType&>(*result->type());
+ ASSERT_TRUE(result_type.ordered());
+
+ auto ex_dict = ArrayFromJSON(utf8(), R"(["a", "b"])");
+ auto ex_indices = ArrayFromJSON(int8(), "[0, 1, 0]");
+ DictionaryArray expected(ordered_type, ex_indices, ex_dict);
+ AssertArraysEqual(expected, *result);
+}
+
+TEST(TestDictionaryBuilderOrdered, ListOfOrderedDictionary) {
+ auto ordered_dict_type = dictionary(int8(), utf8(), /*ordered=*/true);
Review Comment:
Same here.
##########
cpp/src/arrow/array/array_dict_test.cc:
##########
@@ -1761,4 +1761,81 @@ TEST(TestDictionaryUnifier, TableZeroColumns) {
AssertTablesEqual(*table, *unified);
}
+// GH-49689: Ordered dictionary tests
+
+TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_TRUE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) {
+ auto unordered_type = dictionary(int8(), utf8(), /*ordered=*/false);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type,
&boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_FALSE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder));
+ auto& builder = checked_cast<DictionaryBuilder<StringType>&>(*boxed_builder);
+
+ ASSERT_OK(builder.Append("a"));
+ ASSERT_OK(builder.Append("b"));
+ ASSERT_OK(builder.Append("a"));
+
+ std::shared_ptr<Array> result;
+ ASSERT_OK(builder.Finish(&result));
+
+ const auto& result_type = checked_cast<const
DictionaryType&>(*result->type());
+ ASSERT_TRUE(result_type.ordered());
+
+ auto ex_dict = ArrayFromJSON(utf8(), R"(["a", "b"])");
+ auto ex_indices = ArrayFromJSON(int8(), "[0, 1, 0]");
+ DictionaryArray expected(ordered_type, ex_indices, ex_dict);
+ AssertArraysEqual(expected, *result);
+}
+
+TEST(TestDictionaryBuilderOrdered, ListOfOrderedDictionary) {
+ auto ordered_dict_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ auto list_type = list(field("item", ordered_dict_type));
+
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), list_type, &boxed_builder));
+ auto& list_builder = checked_cast<ListBuilder&>(*boxed_builder);
+ auto& dict_builder =
+
checked_cast<DictionaryBuilder<StringType>&>(*list_builder.value_builder());
+
+ ASSERT_OK(list_builder.Append());
+ ASSERT_OK(dict_builder.Append("a"));
+ ASSERT_OK(dict_builder.Append("b"));
+ ASSERT_OK(list_builder.Append());
+ ASSERT_OK(dict_builder.Append("a"));
+
+ std::shared_ptr<Array> result;
+ ASSERT_OK(list_builder.Finish(&result));
+
+ const auto& result_list_type = checked_cast<const
ListType&>(*result->type());
+ const auto& result_dict_type =
+ checked_cast<const DictionaryType&>(*result_list_type.value_type());
+ ASSERT_TRUE(result_dict_type.ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, MakeDictionaryBuilderPreservesOrdered) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
Review Comment:
Same here?
##########
cpp/src/arrow/array/array_dict_test.cc:
##########
@@ -1761,4 +1761,81 @@ TEST(TestDictionaryUnifier, TableZeroColumns) {
AssertTablesEqual(*table, *unified);
}
+// GH-49689: Ordered dictionary tests
+
+TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_TRUE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) {
+ auto unordered_type = dictionary(int8(), utf8(), /*ordered=*/false);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type,
&boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_FALSE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
Review Comment:
You could have a `for` loop on the `ordered` flag to test both possibilities.
##########
cpp/src/arrow/array/array_dict_test.cc:
##########
@@ -1761,4 +1761,81 @@ TEST(TestDictionaryUnifier, TableZeroColumns) {
AssertTablesEqual(*table, *unified);
}
+// GH-49689: Ordered dictionary tests
+
+TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) {
+ auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true);
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder));
+
+ auto builder_type = boxed_builder->type();
+ ASSERT_TRUE(checked_cast<const DictionaryType&>(*builder_type).ordered());
+}
+
+TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) {
+ auto unordered_type = dictionary(int8(), utf8(), /*ordered=*/false);
Review Comment:
These two tests can be factored into a single one by using a `for` loop.
##########
cpp/src/arrow/array/builder_dict.h:
##########
@@ -659,11 +669,14 @@ class DictionaryBuilderBase<BuilderType, NullType> :
public ArrayBuilder {
Status Finish(std::shared_ptr<DictionaryArray>* out) { return
FinishTyped(out); }
std::shared_ptr<DataType> type() const override {
- return ::arrow::dictionary(indices_builder_.type(), null());
+ return ::arrow::dictionary(indices_builder_.type(), null(), ordered_);
}
+ void set_ordered(bool ordered) { ordered_ = ordered; }
Review Comment:
Instead of having this separate method, perhaps we can add a `bool ordered =
false` parameter at the end of DictionaryBuilderBase constructors?
##########
cpp/src/arrow/array/builder_dict.h:
##########
@@ -160,7 +160,8 @@ class DictionaryBuilderBase : public ArrayBuilder {
delta_offset_(0),
byte_width_(-1),
indices_builder_(start_int_size, pool, alignment),
- value_type_(value_type) {}
+ value_type_(value_type),
+ ordered_(false) {}
Review Comment:
Since `ordered_` is default-initialized to `false` in the class declaration
(I mean the `bool ordered_ = false;` part), we don't need to reiterate in the
constructors.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]