mapleFU commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1359469546
##########
cpp/src/arrow/array/data.cc:
##########
@@ -233,9 +233,22 @@ BufferSpan OffsetsForScalar(uint8_t* scratch_space,
offset_type value_size) {
auto* offsets = reinterpret_cast<offset_type*>(scratch_space);
offsets[0] = 0;
offsets[1] = static_cast<offset_type>(value_size);
+ static_assert(2 * sizeof(offset_type) <= 16);
return {scratch_space, sizeof(offset_type) * 2};
}
+template <typename offset_type>
+std::pair<BufferSpan, BufferSpan> OffsetsAndSizesForScalar(uint8_t*
scratch_space,
+ offset_type
value_size) {
+ auto* offsets = scratch_space;
+ auto* sizes = scratch_space + sizeof(offset_type);
+ reinterpret_cast<offset_type*>(offsets)[0] = 0;
+ reinterpret_cast<offset_type*>(sizes)[0] = value_size;
Review Comment:
could `mutable_data_as()` used here?
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -137,6 +137,77 @@ Result<std::shared_ptr<typename
TypeTraits<TYPE>::ArrayType>> ListArrayFromArray
return std::make_shared<ArrayType>(std::move(data));
}
+template <typename TYPE>
+Result<std::shared_ptr<typename TypeTraits<TYPE>::ArrayType>>
ListViewArrayFromArrays(
+ std::shared_ptr<DataType> type, const Array& offsets, const Array& sizes,
+ const Array& values, MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap
= NULLPTR,
+ int64_t null_count = kUnknownNullCount) {
+ using offset_type = typename TYPE::offset_type;
+ using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+ using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+
+ if (offsets.type_id() != OffsetArrowType::type_id) {
+ return Status::TypeError("List offsets must be ",
OffsetArrowType::type_name());
+ }
+
+ if (sizes.length() != offsets.length() && sizes.length() != offsets.length()
- 1) {
+ return Status::Invalid(
+ "List sizes must have the same length as offsets or one less than
offsets");
+ }
+ if (sizes.type_id() != OffsetArrowType::type_id) {
+ return Status::TypeError("List sizes must be ",
OffsetArrowType::type_name());
+ }
+
+ if (offsets.offset() != sizes.offset()) {
+ return Status::Invalid("List offsets and sizes must have the same offset");
+ }
+ const int64_t offset = sizes.offset();
Review Comment:
Maybe I'm a bit stupid, but this offset would be easiliy confused with
`offsets`... Can we have better way to distinct them or current impl is good
enough?
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -189,11 +260,113 @@ Result<std::shared_ptr<Array>> FlattenListArray(const
ListArrayT& list_array,
return Concatenate(non_null_fragments, memory_pool);
}
+template <typename ListViewArrayT>
+Result<std::shared_ptr<Array>> FlattenListViewArray(const ListViewArrayT&
list_view_array,
+ MemoryPool* memory_pool) {
+ using offset_type = typename ListViewArrayT::offset_type;
+ const int64_t list_view_array_length = list_view_array.length();
+ std::shared_ptr<arrow::Array> value_array = list_view_array.values();
+
+ if (list_view_array_length == 0) {
+ return SliceArrayWithOffsets(*value_array, 0, 0);
+ }
+
+ // If the list array is *all* nulls, then just return an empty array.
+ if (list_view_array.null_count() == list_view_array.length()) {
+ return MakeEmptyArray(value_array->type(), memory_pool);
+ }
+
+ const auto* validity = list_view_array.data()->template
GetValues<uint8_t>(0, 0);
+ const auto* offsets = list_view_array.data()->template
GetValues<offset_type>(1);
+ const auto* sizes = list_view_array.data()->template
GetValues<offset_type>(2);
+
+ // If a ListViewArray:
+ //
+ // 1) does not contain nulls
+ // 2) has sorted offsets
+ // 3) has disjoint views which completely cover the values array
+ //
+ // then simply slice its value array with the first offset and end of the
last list
+ // view.
+ if (list_view_array.null_count() == 0) {
+ bool sorted_and_disjoint = true;
+ for (int64_t i = 1; sorted_and_disjoint && i < list_view_array_length;
++i) {
+ sorted_and_disjoint &=
+ sizes[i - 1] == 0 || offsets[i] - offsets[i - 1] == sizes[i - 1];
+ }
Review Comment:
Why do we skip offset check when `sizes[i - 1] == 0`?
```
offsets: 1 2 3 2 3
sizes: 1 1 0 1 1
```
does this introduce overlap and break (3)?
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -137,6 +137,77 @@ Result<std::shared_ptr<typename
TypeTraits<TYPE>::ArrayType>> ListArrayFromArray
return std::make_shared<ArrayType>(std::move(data));
}
+template <typename TYPE>
+Result<std::shared_ptr<typename TypeTraits<TYPE>::ArrayType>>
ListViewArrayFromArrays(
+ std::shared_ptr<DataType> type, const Array& offsets, const Array& sizes,
+ const Array& values, MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap
= NULLPTR,
+ int64_t null_count = kUnknownNullCount) {
+ using offset_type = typename TYPE::offset_type;
+ using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+ using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+
+ if (offsets.type_id() != OffsetArrowType::type_id) {
+ return Status::TypeError("List offsets must be ",
OffsetArrowType::type_name());
+ }
+
+ if (sizes.length() != offsets.length() && sizes.length() != offsets.length()
- 1) {
+ return Status::Invalid(
+ "List sizes must have the same length as offsets or one less than
offsets");
+ }
+ if (sizes.type_id() != OffsetArrowType::type_id) {
+ return Status::TypeError("List sizes must be ",
OffsetArrowType::type_name());
+ }
+
+ if (offsets.offset() != sizes.offset()) {
+ return Status::Invalid("List offsets and sizes must have the same offset");
+ }
Review Comment:
Hmmm should we denote it in comment or `array_nested.h`?
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -216,6 +231,171 @@ class ARROW_EXPORT LargeListArray : public
BaseListArray<LargeListType> {
void SetData(const std::shared_ptr<ArrayData>& data);
};
+// ----------------------------------------------------------------------
+// ListViewArray / LargeListViewArray
+
+template <typename TYPE>
+class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename TYPE::offset_type;
+
+ const TypeClass* list_view_type() const { return
this->var_length_list_like_type(); }
+
+ /// Note that this buffer does not account for any slice offset or length.
+ const std::shared_ptr<Buffer>& value_sizes() const { return
this->data_->buffers[2]; }
+
+ /// Return pointer to raw value offsets accounting for any slice offset
+ const offset_type* raw_value_sizes() const {
+ return raw_value_sizes_ + this->data_->offset;
+ }
+
+ offset_type value_length(int64_t i) const final {
+ return this->raw_value_sizes_[i + this->data_->offset];
+ }
+
+ protected:
+ const offset_type* raw_value_sizes_ = NULLPTR;
+};
+
+/// \brief Concrete Array class for list-view data
+class ARROW_EXPORT ListViewArray : public BaseListViewArray<ListViewType> {
+ public:
+ explicit ListViewArray(std::shared_ptr<ArrayData> data);
+
+ ListViewArray(std::shared_ptr<DataType> type, int64_t length,
+ std::shared_ptr<Buffer> value_offsets,
+ std::shared_ptr<Buffer> value_sizes, std::shared_ptr<Array>
values,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Construct ListViewArray from array of offsets, sizes, and child
+ /// value array
+ ///
+ /// Construct a ListViewArray using buffers from offsets and sizes arrays
+ /// that project views into the child values array.
+ ///
+ /// This function does the bare minimum of validation of the offsets/sizes
and
+ /// input types.
+ ///
+ /// Offsets of an Array's null bitmap can be present or an explicit
+ /// null_bitmap, but not both.
Review Comment:
Or here you just want to mean `Offsets Array's`
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -216,6 +231,171 @@ class ARROW_EXPORT LargeListArray : public
BaseListArray<LargeListType> {
void SetData(const std::shared_ptr<ArrayData>& data);
};
+// ----------------------------------------------------------------------
+// ListViewArray / LargeListViewArray
+
+template <typename TYPE>
+class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename TYPE::offset_type;
+
+ const TypeClass* list_view_type() const { return
this->var_length_list_like_type(); }
+
+ /// Note that this buffer does not account for any slice offset or length.
+ const std::shared_ptr<Buffer>& value_sizes() const { return
this->data_->buffers[2]; }
+
+ /// Return pointer to raw value offsets accounting for any slice offset
+ const offset_type* raw_value_sizes() const {
+ return raw_value_sizes_ + this->data_->offset;
+ }
+
+ offset_type value_length(int64_t i) const final {
+ return this->raw_value_sizes_[i + this->data_->offset];
+ }
+
+ protected:
+ const offset_type* raw_value_sizes_ = NULLPTR;
+};
+
+/// \brief Concrete Array class for list-view data
+class ARROW_EXPORT ListViewArray : public BaseListViewArray<ListViewType> {
+ public:
+ explicit ListViewArray(std::shared_ptr<ArrayData> data);
+
+ ListViewArray(std::shared_ptr<DataType> type, int64_t length,
+ std::shared_ptr<Buffer> value_offsets,
+ std::shared_ptr<Buffer> value_sizes, std::shared_ptr<Array>
values,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ /// \brief Construct ListViewArray from array of offsets, sizes, and child
+ /// value array
+ ///
+ /// Construct a ListViewArray using buffers from offsets and sizes arrays
+ /// that project views into the child values array.
+ ///
+ /// This function does the bare minimum of validation of the offsets/sizes
and
+ /// input types.
+ ///
+ /// Offsets of an Array's null bitmap can be present or an explicit
+ /// null_bitmap, but not both.
Review Comment:
After I go through the impl, I make clear what does this mean. But the
comment is a bit confusing, because this function has 3 arguments( specially
offsets, sizes ). I guess we can make it mroe clear?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]