westonpace commented on a change in pull request #8984:
URL: https://github.com/apache/arrow/pull/8984#discussion_r554220491



##########
File path: cpp/src/arrow/array/concatenate_test.cc
##########
@@ -225,6 +225,86 @@ TEST_F(ConcatenateTest, DictionaryType) {
   });
 }
 
+TEST_F(ConcatenateTest, DictionaryTypeDifferentDictionaries) {
+  auto dict_type = dictionary(uint8(), utf8());
+  auto dict_one = DictArrayFromJSON(dict_type, "[1, 2, null, 3, 0]",
+                                    "[\"A0\", \"A1\", \"A2\", \"A3\"]");
+  auto dict_two = DictArrayFromJSON(dict_type, "[null, 4, 2, 1]",
+                                    "[\"B0\", \"B1\", \"B2\", \"B3\", 
\"B4\"]");
+  auto concat_expected = DictArrayFromJSON(
+      dict_type, "[1, 2, null, 3, 0, null, 8, 6, 5]",
+      "[\"A0\", \"A1\", \"A2\", \"A3\", \"B0\", \"B1\", \"B2\", \"B3\", 
\"B4\"]");
+  ASSERT_OK_AND_ASSIGN(auto concat_actual, Concatenate({dict_one, dict_two}));
+  AssertArraysEqual(*concat_expected, *concat_actual);
+}
+
+TEST_F(ConcatenateTest, DictionaryTypePartialOverlapDictionaries) {
+  auto dict_type = dictionary(uint8(), utf8());
+  auto dict_one = DictArrayFromJSON(dict_type, "[1, 2, null, 3, 0]",
+                                    "[\"A0\", \"A1\", \"C2\", \"C3\"]");
+  auto dict_two = DictArrayFromJSON(dict_type, "[null, 4, 2, 1]",
+                                    "[\"B0\", \"B1\", \"C2\", \"C3\", 
\"B4\"]");
+  auto concat_expected =
+      DictArrayFromJSON(dict_type, "[1, 2, null, 3, 0, null, 6, 2, 5]",
+                        "[\"A0\", \"A1\", \"C2\", \"C3\", \"B0\", \"B1\", 
\"B4\"]");
+  ASSERT_OK_AND_ASSIGN(auto concat_actual, Concatenate({dict_one, dict_two}));
+  AssertArraysEqual(*concat_expected, *concat_actual);
+}
+
+TEST_F(ConcatenateTest, DictionaryTypeDifferentSizeIndex) {
+  auto dict_type = dictionary(uint8(), utf8());
+  auto bigger_dict_type = dictionary(uint16(), utf8());
+  auto dict_one = DictArrayFromJSON(dict_type, "[0]", "[\"A0\"]");
+  auto dict_two = DictArrayFromJSON(bigger_dict_type, "[0]", "[\"B0\"]");
+  ASSERT_RAISES(Invalid, Concatenate({dict_one, dict_two}).status());
+}
+
+TEST_F(ConcatenateTest, DictionaryTypeCantUnifyNullInDictionary) {
+  auto dict_type = dictionary(uint8(), utf8());
+  auto dict_one = DictArrayFromJSON(dict_type, "[0, 1]", "[null, \"A\"]");
+  auto dict_two = DictArrayFromJSON(dict_type, "[0, 1]", "[null, \"B\"]");
+  ASSERT_RAISES(Invalid, Concatenate({dict_one, dict_two}).status());
+}
+
+TEST_F(ConcatenateTest, DictionaryTypeEnlargedIndices) {
+  auto size = std::numeric_limits<uint8_t>::max() + 1;
+  auto dict_type = dictionary(uint8(), uint16());
+
+  UInt8Builder index_builder;
+  ASSERT_OK(index_builder.Reserve(size));
+  for (auto i = 0; i < size; i++) {
+    index_builder.UnsafeAppend(i);
+  }
+  ASSERT_OK_AND_ASSIGN(auto indices, index_builder.Finish());
+
+  UInt16Builder values_builder;
+  ASSERT_OK(values_builder.Reserve(size));
+  for (auto i = 0; i < size; i++) {
+    values_builder.UnsafeAppend(i);
+  }
+  ASSERT_OK_AND_ASSIGN(auto dictionary_one, values_builder.Finish());
+
+  UInt16Builder values_builder_two;
+  ASSERT_OK(values_builder_two.Reserve(size));
+  for (auto i = size; i < 2 * size; i++) {

Review comment:
       I don't think `i*2` would work.  It's building up two arrays, the first 
has values [0,size) and the second has values [size,2*size).  I changed it a 
little so it is one loop if that is clearer.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to