kiszk commented on a change in pull request #7507:
URL: https://github.com/apache/arrow/pull/7507#discussion_r576995848



##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -2598,4 +2599,325 @@ TEST(TestRechunkArraysConsistently, Plain) {
   }
 }
 
+// ----------------------------------------------------------------------
+// Test SwapEndianArrayData
+
+/// \brief Indicate if fields are equals.
+///
+/// \param[in] target ArrayData to be converted and tested
+/// \param[in] expected result ArrayData
+void AssertArrayDataEqualsWithSwapEndian(const std::shared_ptr<ArrayData>& 
target,
+                                         const std::shared_ptr<ArrayData>& 
expected) {
+  auto swap_array = MakeArray(*::arrow::internal::SwapEndianArrayData(target));
+  auto expected_array = MakeArray(expected);
+  ASSERT_ARRAYS_EQUAL(*swap_array, *expected_array);
+  ASSERT_OK(swap_array->ValidateFull());
+}
+
+TEST(TestSwapEndianArrayData, PrimitiveType) {
+  auto null_buffer = Buffer::FromString("\xff");
+  auto data_int_buffer = Buffer::FromString("01234567");
+
+  auto data = ArrayData::Make(null(), 0, {nullptr}, 0);
+  auto expected_data = data;
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  data = ArrayData::Make(boolean(), 8, {null_buffer, data_int_buffer}, 0);
+  expected_data = data;
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  data = ArrayData::Make(int8(), 8, {null_buffer, data_int_buffer}, 0);
+  expected_data = data;
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  data = ArrayData::Make(uint16(), 4, {null_buffer, data_int_buffer}, 0);
+  auto data_int16_buffer = Buffer::FromString("10325476");
+  expected_data = ArrayData::Make(uint16(), 4, {null_buffer, 
data_int16_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  data = ArrayData::Make(int32(), 2, {null_buffer, data_int_buffer}, 0);
+  auto data_int32_buffer = Buffer::FromString("32107654");
+  expected_data = ArrayData::Make(int32(), 2, {null_buffer, 
data_int32_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  data = ArrayData::Make(uint64(), 1, {null_buffer, data_int_buffer}, 0);
+  auto data_int64_buffer = Buffer::FromString("76543210");
+  expected_data = ArrayData::Make(uint64(), 1, {null_buffer, 
data_int64_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  auto data_16byte_buffer = Buffer::FromString("0123456789abcdef");
+  data = ArrayData::Make(decimal128(38, 10), 1, {null_buffer, 
data_16byte_buffer});
+  auto data_decimal128_buffer = Buffer::FromString("fedcba9876543210");
+  expected_data =
+      ArrayData::Make(decimal128(38, 10), 1, {null_buffer, 
data_decimal128_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  auto data_32byte_buffer = 
Buffer::FromString("0123456789abcdef123456789ABCDEF0");
+  data = ArrayData::Make(decimal256(76, 20), 1, {null_buffer, 
data_32byte_buffer});
+  auto data_decimal256_buffer = 
Buffer::FromString("0FEDCBA987654321fedcba9876543210");
+  expected_data =
+      ArrayData::Make(decimal256(76, 20), 1, {null_buffer, 
data_decimal256_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  auto data_float_buffer = Buffer::FromString("01200560");
+  data = ArrayData::Make(float32(), 2, {null_buffer, data_float_buffer}, 0);
+  auto data_float32_buffer = Buffer::FromString("02100650");
+  expected_data = ArrayData::Make(float32(), 2, {null_buffer, 
data_float32_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  data = ArrayData::Make(float64(), 1, {null_buffer, data_float_buffer});
+  auto data_float64_buffer = Buffer::FromString("06500210");
+  expected_data = ArrayData::Make(float64(), 1, {null_buffer, 
data_float64_buffer}, 0);
+  AssertArrayDataEqualsWithSwapEndian(data, expected_data);
+
+  // With offset > 0
+  data =
+      ArrayData::Make(int64(), 1, {null_buffer, data_int_buffer}, 
kUnknownNullCount, 1);
+  ASSERT_RAISES(Invalid, ::arrow::internal::SwapEndianArrayData(data));
+}
+
+std::shared_ptr<ArrayData> ReplaceBuffers(const std::shared_ptr<ArrayData>& 
data,
+                                          const int32_t buffer_index,
+                                          const std::vector<uint8_t>& 
buffer_data) {
+  const auto test_data = std::make_shared<ArrayData>(*data);
+  test_data->buffers[buffer_index] =
+      std::make_shared<Buffer>(buffer_data.data(), buffer_data.size());
+  return test_data;
+}
+
+std::shared_ptr<ArrayData> ReplaceBuffersInChild(const 
std::shared_ptr<ArrayData>& data,
+                                                 const int32_t child_index,
+                                                 const std::vector<uint8_t>& 
child_data) {
+  const auto test_data = std::make_shared<ArrayData>(*data);
+  // assume updating only buffer[1] in child_data
+  auto child_array_data =
+      std::make_shared<ArrayData>(*test_data->child_data[child_index]);
+  child_array_data->buffers[1] =
+      std::make_shared<Buffer>(child_data.data(), child_data.size());
+  test_data->child_data[child_index] = child_array_data;
+  return test_data;
+}
+
+std::shared_ptr<ArrayData> ReplaceBuffersInDictionary(
+    const std::shared_ptr<ArrayData>& data, const int32_t buffer_index,
+    const std::vector<uint8_t>& buffer_data) {
+  const auto test_data = std::make_shared<ArrayData>(*data);
+  auto dict_array_data = std::make_shared<ArrayData>(*test_data->dictionary);
+  dict_array_data->buffers[buffer_index] =
+      std::make_shared<Buffer>(buffer_data.data(), buffer_data.size());
+  test_data->dictionary = dict_array_data;
+  return test_data;
+}
+
+TEST(TestSwapEndianArrayData, NonPrimitiveType) {
+  auto array = ArrayFromJSON(binary(), R"(["0123", null, "45"])");
+  const std::vector<uint8_t> offset1 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6};
+#else
+      {0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0};
+#endif
+  auto expected_data = array->data();
+  auto test_data = ReplaceBuffers(expected_data, 1, offset1);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  array = ArrayFromJSON(large_binary(), R"(["01234", null, "567"])");
+  const std::vector<uint8_t> offset2 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
+       0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 8};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0,
+       5, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0};
+#endif
+  expected_data = array->data();
+  test_data = ReplaceBuffers(expected_data, 1, offset2);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  array = ArrayFromJSON(fixed_size_binary(3), R"(["012", null, "345"])");
+  expected_data = array->data();
+  AssertArrayDataEqualsWithSwapEndian(expected_data, expected_data);
+
+  array = ArrayFromJSON(utf8(), R"(["ABCD", null, "EF"])");
+  const std::vector<uint8_t> offset4 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6};
+#else
+      {0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0};
+#endif
+  expected_data = array->data();
+  test_data = ReplaceBuffers(expected_data, 1, offset4);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  array = ArrayFromJSON(large_utf8(), R"(["ABCDE", null, "FGH"])");
+  const std::vector<uint8_t> offset5 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
+       0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 8};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0,
+       5, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0};
+#endif
+  expected_data = array->data();
+  test_data = ReplaceBuffers(expected_data, 1, offset5);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  auto type = std::make_shared<ListType>(int32());
+  array = ArrayFromJSON(type, "[[0, 1, 2, 3], null, [4, 5]]");
+  const std::vector<uint8_t> offset6 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6};
+#else
+      {0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data6 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5};
+#else
+      {0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0};
+#endif
+  expected_data = array->data();
+  test_data = ReplaceBuffers(expected_data, 1, offset6);
+  test_data = ReplaceBuffersInChild(test_data, 0, data6);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  auto type7 = std::make_shared<LargeListType>(int64());
+  array = ArrayFromJSON(type7, "[[0, 1, 2], null, [3]]");
+  const std::vector<uint8_t> offset7 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
+       0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
+       3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data7 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+       0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+       2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0};
+#endif
+  expected_data = array->data();
+  test_data = ReplaceBuffers(expected_data, 1, offset7);
+  test_data = ReplaceBuffersInChild(test_data, 0, data7);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  auto type8 = std::make_shared<FixedSizeListType>(int32(), 2);
+  array = ArrayFromJSON(type8, "[[0, 1], null, [2, 3]]");
+  expected_data = array->data();
+  const std::vector<uint8_t> data8 =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3};
+#else
+      {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0};
+#endif
+  test_data = ReplaceBuffersInChild(expected_data, 0, data8);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  auto type9 = dictionary(int32(), int16());
+  auto dict = ArrayFromJSON(int16(), "[4, 5, 6, 7]");
+  DictionaryArray array9(type9, ArrayFromJSON(int32(), "[0, 2, 3]"), dict);
+  expected_data = array9.data();
+  const std::vector<uint8_t> data9a =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3};
+#else
+      {0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data9b =
+#if ARROW_LITTLE_ENDIAN
+      {0, 4, 0, 5, 0, 6, 0, 7};
+#else
+      {4, 0, 5, 0, 6, 0, 7, 0};
+#endif
+  test_data = ReplaceBuffers(expected_data, 1, data9a);
+  test_data = ReplaceBuffersInDictionary(test_data, 1, data9b);
+  // dictionary must be explicitly swapped
+  test_data->dictionary = 
*::arrow::internal::SwapEndianArrayData(test_data->dictionary);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  array = ArrayFromJSON(struct_({field("a", int32()), field("b", utf8())}),
+                        R"([{"a": 4, "b": null}, {"a": null, "b": "foo"}])");
+  expected_data = array->data();
+  const std::vector<uint8_t> data10a =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 4, 0, 0, 0, 0};
+#else
+      {4, 0, 0, 0, 0, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data10b =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0};
+#endif
+  test_data = ReplaceBuffersInChild(expected_data, 0, data10a);
+  test_data = ReplaceBuffersInChild(test_data, 1, data10b);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  auto expected_i8 = ArrayFromJSON(int8(), "[127, null, null, null, null]");
+  auto expected_str = ArrayFromJSON(utf8(), R"([null, "abcd", null, null, 
""])");
+  auto expected_i32 = ArrayFromJSON(int32(), "[null, null, 1, 2, null]");
+  std::vector<uint8_t> expected_types_vector;
+  expected_types_vector.push_back(Type::INT8);
+  expected_types_vector.insert(expected_types_vector.end(), 2, Type::STRING);
+  expected_types_vector.insert(expected_types_vector.end(), 2, Type::INT32);
+  std::shared_ptr<Array> expected_types;
+  ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
+  auto arr11 = SparseUnionArray::Make(
+      *expected_types, {expected_i8, expected_str, expected_i32}, {"i8", 
"str", "i32"},
+      {Type::INT8, Type::STRING, Type::INT32});
+  expected_data = (*arr11)->data();
+  const std::vector<uint8_t> data11a =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data11b =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0};
+#endif
+  test_data = ReplaceBuffersInChild(expected_data, 1, data11a);
+  test_data = ReplaceBuffersInChild(test_data, 2, data11b);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+
+  expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]");
+  expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])");
+  expected_i32 = ArrayFromJSON(int32(), "[1, -259, 2]");
+  auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 1, 1, 2, 2, 
2]");
+  auto arr12 = DenseUnionArray::Make(
+      *expected_types, *expected_offsets, {expected_i8, expected_str, 
expected_i32},
+      {"i8", "str", "i32"}, {Type::INT8, Type::STRING, Type::INT32});
+  expected_data = (*arr12)->data();
+  const std::vector<uint8_t> data12a =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+       0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2};
+#else
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
+       0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data12b =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 6};
+#else
+      {0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0};
+#endif
+  const std::vector<uint8_t> data12c =
+#if ARROW_LITTLE_ENDIAN
+      {0, 0, 0, 1, 255, 255, 254, 253, 0, 0, 0, 2};
+#else
+      {1, 0, 0, 0, 253, 254, 255, 255, 2, 0, 0, 0};
+#endif
+  test_data = ReplaceBuffers(expected_data, 2, data12a);
+  test_data = ReplaceBuffersInChild(test_data, 1, data12b);
+  test_data = ReplaceBuffersInChild(test_data, 2, data12c);
+  AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
+}
+

Review comment:
       Sure, I will add it.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to