westonpace commented on code in PR #35565:
URL: https://github.com/apache/arrow/pull/35565#discussion_r1205251598
##########
cpp/src/arrow/util/align_util_test.cc:
##########
@@ -278,4 +279,225 @@ TEST(EnsureAlignment, Table) {
ASSERT_EQ(util::CheckAlignment(*aligned_table, 2048, &needs_alignment),
true);
}
+using TypesRequiringSomeKindOfAlignment =
+ testing::Types<Int16Type, Int32Type, Int64Type, UInt16Type, UInt32Type,
UInt64Type,
+ FloatType, DoubleType, Date32Type, Date64Type, Time32Type,
Time64Type,
+ Decimal128Type, Decimal256Type, TimestampType,
DurationType, MapType,
+ DenseUnionType, LargeBinaryType, LargeListType,
LargeStringType,
+ MonthIntervalType, DayTimeIntervalType,
MonthDayNanoIntervalType>;
+
+using TypesNotRequiringAlignment =
+ testing::Types<NullType, Int8Type, UInt8Type, FixedSizeListType,
FixedSizeBinaryType,
+ BooleanType, SparseUnionType>;
+
+TEST(EnsureAlignment, Malloc) {}
+
+template <typename ArrowType>
+std::shared_ptr<DataType> sample_type() {
+ return TypeTraits<ArrowType>::type_singleton();
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<FixedSizeBinaryType>() {
+ return fixed_size_binary(16);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<FixedSizeListType>() {
+ return fixed_size_list(uint8(), 16);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Decimal128Type>() {
+ return decimal128(32, 6);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Decimal256Type>() {
+ return decimal256(60, 10);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<LargeListType>() {
+ return large_list(int8());
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<DenseUnionType>() {
+ return dense_union({field("x", int8()), field("y", uint8())});
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<MapType>() {
+ return map(utf8(), field("item", utf8()));
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<DurationType>() {
+ return duration(TimeUnit::NANO);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<TimestampType>() {
+ return timestamp(TimeUnit::NANO);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Time32Type>() {
+ return time32(TimeUnit::SECOND);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Time64Type>() {
+ return time64(TimeUnit::NANO);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<SparseUnionType>() {
+ return sparse_union({field("x", uint8()), field("y", int8())}, {1, 2});
+}
+
+template <typename ArrowType>
+std::shared_ptr<ArrayData> SampleArray() {
+ random::RandomArrayGenerator gen(42);
+ return gen.ArrayOf(sample_type<ArrowType>(), 100)->data();
+}
+
+template <>
+std::shared_ptr<ArrayData> SampleArray<SparseUnionType>() {
+ auto ty = sparse_union({field("ints", int64()), field("strs", utf8())}, {2,
7});
+ auto ints = ArrayFromJSON(int64(), "[0, 1, 2, 3]");
+ auto strs = ArrayFromJSON(utf8(), R"(["a", null, "c", "d"])");
+ auto ids = ArrayFromJSON(int8(), "[2, 7, 2, 7]")->data()->buffers[1];
+ const int length = 4;
+ SparseUnionArray arr(ty, length, {ints, strs}, ids);
+ return arr.data();
+}
+
+class MallocAlignment : public ::testing::Test {
+ public:
+ void CheckModified(const ArrayData& src, const ArrayData& dst) {
+ ASSERT_EQ(src.buffers.size(), dst.buffers.size());
+ for (std::size_t i = 0; i < src.buffers.size(); i++) {
+ if (!src.buffers[i] || !dst.buffers[i]) {
+ continue;
+ }
+ if (src.buffers[i]->address() != dst.buffers[i]->address()) {
+ return;
+ }
+ }
+ FAIL() << "Expected at least one buffer to have been modified by
EnsureAlignment";
+ }
+
+ void CheckUnmodified(const ArrayData& src, const ArrayData& dst) {
+ ASSERT_EQ(src.buffers.size(), dst.buffers.size());
+ for (std::size_t i = 0; i < src.buffers.size(); i++) {
+ if (!src.buffers[i] || !dst.buffers[i]) {
+ continue;
+ }
+ ASSERT_EQ(src.buffers[i]->address(), dst.buffers[i]->address());
+ }
+ }
+};
+
+template <typename T>
+class MallocAlignmentRequired : public MallocAlignment {};
+template <typename T>
+class MallocAlignmentNotRequired : public MallocAlignment {};
+
+TYPED_TEST_SUITE(MallocAlignmentRequired, TypesRequiringSomeKindOfAlignment);
+TYPED_TEST_SUITE(MallocAlignmentNotRequired, TypesNotRequiringAlignment);
+
+TYPED_TEST(MallocAlignmentRequired, RoundTrip) {
+ std::shared_ptr<ArrayData> data = SampleArray<TypeParam>();
+ std::shared_ptr<ArrayData> unaligned = UnalignValues(*data);
+ ASSERT_OK_AND_ASSIGN(
+ std::shared_ptr<ArrayData> aligned,
+ util::EnsureAlignment(unaligned, util::kValueAlignment,
default_memory_pool()));
+
+ AssertArraysEqual(*MakeArray(data), *MakeArray(aligned));
+ this->CheckModified(*unaligned, *aligned);
+}
+
+TYPED_TEST(MallocAlignmentNotRequired, RoundTrip) {
+ std::shared_ptr<ArrayData> data = SampleArray<TypeParam>();
+ std::shared_ptr<ArrayData> unaligned = UnalignValues(*data);
+ ASSERT_OK_AND_ASSIGN(
+ std::shared_ptr<ArrayData> aligned,
+ util::EnsureAlignment(unaligned, util::kValueAlignment,
default_memory_pool()));
+
+ AssertArraysEqual(*MakeArray(data), *MakeArray(aligned));
+ this->CheckUnmodified(*unaligned, *aligned);
+}
+
+TEST_F(MallocAlignment, RunEndEncoded) {
+ // Run end requires alignment, value type does not
+ std::shared_ptr<Array> run_ends = ArrayFromJSON(int32(), "[3, 5]");
+ std::shared_ptr<Array> values = ArrayFromJSON(int8(), "[50, 100]");
+ ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> array,
+ RunEndEncodedArray::Make(/*logical_length=*/5,
std::move(run_ends),
+ std::move(values), 0));
+
+ std::shared_ptr<ArrayData> unaligned_ree =
std::make_shared<ArrayData>(*array->data());
+ unaligned_ree->child_data[0] = UnalignValues(*unaligned_ree->child_data[0]);
+ unaligned_ree->child_data[1] = UnalignValues(*unaligned_ree->child_data[1]);
+
+ std::shared_ptr<ArrayData> aligned_ree =
std::make_shared<ArrayData>(*unaligned_ree);
+
+ ASSERT_OK_AND_ASSIGN(
+ aligned_ree,
+ util::EnsureAlignment(aligned_ree, util::kValueAlignment,
default_memory_pool()));
+
+ this->CheckModified(*unaligned_ree->child_data[0],
*aligned_ree->child_data[0]);
+ this->CheckUnmodified(*unaligned_ree->child_data[1],
*aligned_ree->child_data[1]);
+}
+
+TEST_F(MallocAlignment, Dictionary) {
+ // Dictionary values require alignment, dictionary keys do not
+ std::shared_ptr<DataType> int8_utf8 = dictionary(int8(), utf8());
+ std::shared_ptr<Array> array = ArrayFromJSON(int8_utf8, R"(["x", "x",
"y"])");
+
+ std::shared_ptr<ArrayData> unaligned_dict =
std::make_shared<ArrayData>(*array->data());
+ unaligned_dict->dictionary = UnalignValues(*unaligned_dict->dictionary);
+ unaligned_dict = UnalignValues(*unaligned_dict);
+
+ std::shared_ptr<ArrayData> aligned_dict =
std::make_shared<ArrayData>(*unaligned_dict);
+
+ ASSERT_OK_AND_ASSIGN(
+ aligned_dict,
+ util::EnsureAlignment(aligned_dict, util::kValueAlignment,
default_memory_pool()));
+
+ this->CheckUnmodified(*unaligned_dict, *aligned_dict);
+ this->CheckModified(*unaligned_dict->dictionary, *aligned_dict->dictionary);
+
+ // Dictionary values do not require alignment, dictionary keys do
Review Comment:
Fixed
##########
cpp/src/arrow/util/align_util_test.cc:
##########
@@ -278,4 +279,225 @@ TEST(EnsureAlignment, Table) {
ASSERT_EQ(util::CheckAlignment(*aligned_table, 2048, &needs_alignment),
true);
}
+using TypesRequiringSomeKindOfAlignment =
+ testing::Types<Int16Type, Int32Type, Int64Type, UInt16Type, UInt32Type,
UInt64Type,
+ FloatType, DoubleType, Date32Type, Date64Type, Time32Type,
Time64Type,
+ Decimal128Type, Decimal256Type, TimestampType,
DurationType, MapType,
+ DenseUnionType, LargeBinaryType, LargeListType,
LargeStringType,
+ MonthIntervalType, DayTimeIntervalType,
MonthDayNanoIntervalType>;
+
+using TypesNotRequiringAlignment =
+ testing::Types<NullType, Int8Type, UInt8Type, FixedSizeListType,
FixedSizeBinaryType,
+ BooleanType, SparseUnionType>;
+
+TEST(EnsureAlignment, Malloc) {}
+
+template <typename ArrowType>
+std::shared_ptr<DataType> sample_type() {
+ return TypeTraits<ArrowType>::type_singleton();
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<FixedSizeBinaryType>() {
+ return fixed_size_binary(16);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<FixedSizeListType>() {
+ return fixed_size_list(uint8(), 16);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Decimal128Type>() {
+ return decimal128(32, 6);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Decimal256Type>() {
+ return decimal256(60, 10);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<LargeListType>() {
+ return large_list(int8());
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<DenseUnionType>() {
+ return dense_union({field("x", int8()), field("y", uint8())});
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<MapType>() {
+ return map(utf8(), field("item", utf8()));
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<DurationType>() {
+ return duration(TimeUnit::NANO);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<TimestampType>() {
+ return timestamp(TimeUnit::NANO);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Time32Type>() {
+ return time32(TimeUnit::SECOND);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<Time64Type>() {
+ return time64(TimeUnit::NANO);
+}
+
+template <>
+std::shared_ptr<DataType> sample_type<SparseUnionType>() {
+ return sparse_union({field("x", uint8()), field("y", int8())}, {1, 2});
+}
+
+template <typename ArrowType>
+std::shared_ptr<ArrayData> SampleArray() {
+ random::RandomArrayGenerator gen(42);
+ return gen.ArrayOf(sample_type<ArrowType>(), 100)->data();
+}
+
+template <>
+std::shared_ptr<ArrayData> SampleArray<SparseUnionType>() {
+ auto ty = sparse_union({field("ints", int64()), field("strs", utf8())}, {2,
7});
+ auto ints = ArrayFromJSON(int64(), "[0, 1, 2, 3]");
+ auto strs = ArrayFromJSON(utf8(), R"(["a", null, "c", "d"])");
+ auto ids = ArrayFromJSON(int8(), "[2, 7, 2, 7]")->data()->buffers[1];
+ const int length = 4;
+ SparseUnionArray arr(ty, length, {ints, strs}, ids);
+ return arr.data();
+}
+
+class MallocAlignment : public ::testing::Test {
+ public:
+ void CheckModified(const ArrayData& src, const ArrayData& dst) {
+ ASSERT_EQ(src.buffers.size(), dst.buffers.size());
+ for (std::size_t i = 0; i < src.buffers.size(); i++) {
+ if (!src.buffers[i] || !dst.buffers[i]) {
+ continue;
+ }
+ if (src.buffers[i]->address() != dst.buffers[i]->address()) {
+ return;
+ }
+ }
+ FAIL() << "Expected at least one buffer to have been modified by
EnsureAlignment";
+ }
+
+ void CheckUnmodified(const ArrayData& src, const ArrayData& dst) {
+ ASSERT_EQ(src.buffers.size(), dst.buffers.size());
+ for (std::size_t i = 0; i < src.buffers.size(); i++) {
+ if (!src.buffers[i] || !dst.buffers[i]) {
+ continue;
+ }
+ ASSERT_EQ(src.buffers[i]->address(), dst.buffers[i]->address());
+ }
+ }
+};
+
+template <typename T>
+class MallocAlignmentRequired : public MallocAlignment {};
+template <typename T>
+class MallocAlignmentNotRequired : public MallocAlignment {};
+
+TYPED_TEST_SUITE(MallocAlignmentRequired, TypesRequiringSomeKindOfAlignment);
+TYPED_TEST_SUITE(MallocAlignmentNotRequired, TypesNotRequiringAlignment);
+
+TYPED_TEST(MallocAlignmentRequired, RoundTrip) {
+ std::shared_ptr<ArrayData> data = SampleArray<TypeParam>();
+ std::shared_ptr<ArrayData> unaligned = UnalignValues(*data);
+ ASSERT_OK_AND_ASSIGN(
+ std::shared_ptr<ArrayData> aligned,
+ util::EnsureAlignment(unaligned, util::kValueAlignment,
default_memory_pool()));
+
+ AssertArraysEqual(*MakeArray(data), *MakeArray(aligned));
+ this->CheckModified(*unaligned, *aligned);
+}
+
+TYPED_TEST(MallocAlignmentNotRequired, RoundTrip) {
+ std::shared_ptr<ArrayData> data = SampleArray<TypeParam>();
+ std::shared_ptr<ArrayData> unaligned = UnalignValues(*data);
+ ASSERT_OK_AND_ASSIGN(
+ std::shared_ptr<ArrayData> aligned,
+ util::EnsureAlignment(unaligned, util::kValueAlignment,
default_memory_pool()));
+
+ AssertArraysEqual(*MakeArray(data), *MakeArray(aligned));
+ this->CheckUnmodified(*unaligned, *aligned);
+}
+
+TEST_F(MallocAlignment, RunEndEncoded) {
+ // Run end requires alignment, value type does not
+ std::shared_ptr<Array> run_ends = ArrayFromJSON(int32(), "[3, 5]");
+ std::shared_ptr<Array> values = ArrayFromJSON(int8(), "[50, 100]");
+ ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> array,
+ RunEndEncodedArray::Make(/*logical_length=*/5,
std::move(run_ends),
+ std::move(values), 0));
+
+ std::shared_ptr<ArrayData> unaligned_ree =
std::make_shared<ArrayData>(*array->data());
+ unaligned_ree->child_data[0] = UnalignValues(*unaligned_ree->child_data[0]);
+ unaligned_ree->child_data[1] = UnalignValues(*unaligned_ree->child_data[1]);
+
+ std::shared_ptr<ArrayData> aligned_ree =
std::make_shared<ArrayData>(*unaligned_ree);
+
+ ASSERT_OK_AND_ASSIGN(
+ aligned_ree,
+ util::EnsureAlignment(aligned_ree, util::kValueAlignment,
default_memory_pool()));
+
+ this->CheckModified(*unaligned_ree->child_data[0],
*aligned_ree->child_data[0]);
+ this->CheckUnmodified(*unaligned_ree->child_data[1],
*aligned_ree->child_data[1]);
+}
+
+TEST_F(MallocAlignment, Dictionary) {
+ // Dictionary values require alignment, dictionary keys do not
+ std::shared_ptr<DataType> int8_utf8 = dictionary(int8(), utf8());
+ std::shared_ptr<Array> array = ArrayFromJSON(int8_utf8, R"(["x", "x",
"y"])");
+
+ std::shared_ptr<ArrayData> unaligned_dict =
std::make_shared<ArrayData>(*array->data());
+ unaligned_dict->dictionary = UnalignValues(*unaligned_dict->dictionary);
+ unaligned_dict = UnalignValues(*unaligned_dict);
+
+ std::shared_ptr<ArrayData> aligned_dict =
std::make_shared<ArrayData>(*unaligned_dict);
+
+ ASSERT_OK_AND_ASSIGN(
+ aligned_dict,
+ util::EnsureAlignment(aligned_dict, util::kValueAlignment,
default_memory_pool()));
+
+ this->CheckUnmodified(*unaligned_dict, *aligned_dict);
+ this->CheckModified(*unaligned_dict->dictionary, *aligned_dict->dictionary);
+
+ // Dictionary values do not require alignment, dictionary keys do
+ std::shared_ptr<DataType> int16_int8 = dictionary(int16(), int8());
+ array = ArrayFromJSON(int16_int8, R"([7, 11])");
+
+ unaligned_dict = std::make_shared<ArrayData>(*array->data());
+ unaligned_dict->dictionary = UnalignValues(*unaligned_dict->dictionary);
+ unaligned_dict = UnalignValues(*unaligned_dict);
+
+ aligned_dict = std::make_shared<ArrayData>(*unaligned_dict);
+
+ ASSERT_OK_AND_ASSIGN(
+ aligned_dict,
+ util::EnsureAlignment(aligned_dict, util::kValueAlignment,
default_memory_pool()));
Review Comment:
Added a check
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]