mapleFU commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1359469546


##########
cpp/src/arrow/array/data.cc:
##########
@@ -233,9 +233,22 @@ BufferSpan OffsetsForScalar(uint8_t* scratch_space, 
offset_type value_size) {
   auto* offsets = reinterpret_cast<offset_type*>(scratch_space);
   offsets[0] = 0;
   offsets[1] = static_cast<offset_type>(value_size);
+  static_assert(2 * sizeof(offset_type) <= 16);
   return {scratch_space, sizeof(offset_type) * 2};
 }
 
+template <typename offset_type>
+std::pair<BufferSpan, BufferSpan> OffsetsAndSizesForScalar(uint8_t* 
scratch_space,
+                                                           offset_type 
value_size) {
+  auto* offsets = scratch_space;
+  auto* sizes = scratch_space + sizeof(offset_type);
+  reinterpret_cast<offset_type*>(offsets)[0] = 0;
+  reinterpret_cast<offset_type*>(sizes)[0] = value_size;

Review Comment:
   could `mutable_data_as()` used here?



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -137,6 +137,77 @@ Result<std::shared_ptr<typename 
TypeTraits<TYPE>::ArrayType>> ListArrayFromArray
   return std::make_shared<ArrayType>(std::move(data));
 }
 
+template <typename TYPE>
+Result<std::shared_ptr<typename TypeTraits<TYPE>::ArrayType>> 
ListViewArrayFromArrays(
+    std::shared_ptr<DataType> type, const Array& offsets, const Array& sizes,
+    const Array& values, MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap 
= NULLPTR,
+    int64_t null_count = kUnknownNullCount) {
+  using offset_type = typename TYPE::offset_type;
+  using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+  using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+
+  if (offsets.type_id() != OffsetArrowType::type_id) {
+    return Status::TypeError("List offsets must be ", 
OffsetArrowType::type_name());
+  }
+
+  if (sizes.length() != offsets.length() && sizes.length() != offsets.length() 
- 1) {
+    return Status::Invalid(
+        "List sizes must have the same length as offsets or one less than 
offsets");
+  }
+  if (sizes.type_id() != OffsetArrowType::type_id) {
+    return Status::TypeError("List sizes must be ", 
OffsetArrowType::type_name());
+  }
+
+  if (offsets.offset() != sizes.offset()) {
+    return Status::Invalid("List offsets and sizes must have the same offset");
+  }
+  const int64_t offset = sizes.offset();

Review Comment:
   Maybe I'm a bit stupid, but this offset would be easiliy confused with 
`offsets`... Can we have better way to distinct them or current impl is good 
enough?



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -189,11 +260,113 @@ Result<std::shared_ptr<Array>> FlattenListArray(const 
ListArrayT& list_array,
   return Concatenate(non_null_fragments, memory_pool);
 }
 
+template <typename ListViewArrayT>
+Result<std::shared_ptr<Array>> FlattenListViewArray(const ListViewArrayT& 
list_view_array,
+                                                    MemoryPool* memory_pool) {
+  using offset_type = typename ListViewArrayT::offset_type;
+  const int64_t list_view_array_length = list_view_array.length();
+  std::shared_ptr<arrow::Array> value_array = list_view_array.values();
+
+  if (list_view_array_length == 0) {
+    return SliceArrayWithOffsets(*value_array, 0, 0);
+  }
+
+  // If the list array is *all* nulls, then just return an empty array.
+  if (list_view_array.null_count() == list_view_array.length()) {
+    return MakeEmptyArray(value_array->type(), memory_pool);
+  }
+
+  const auto* validity = list_view_array.data()->template 
GetValues<uint8_t>(0, 0);
+  const auto* offsets = list_view_array.data()->template 
GetValues<offset_type>(1);
+  const auto* sizes = list_view_array.data()->template 
GetValues<offset_type>(2);
+
+  // If a ListViewArray:
+  //
+  //   1) does not contain nulls
+  //   2) has sorted offsets
+  //   3) has disjoint views which completely cover the values array
+  //
+  // then simply slice its value array with the first offset and end of the 
last list
+  // view.
+  if (list_view_array.null_count() == 0) {
+    bool sorted_and_disjoint = true;
+    for (int64_t i = 1; sorted_and_disjoint && i < list_view_array_length; 
++i) {
+      sorted_and_disjoint &=
+          sizes[i - 1] == 0 || offsets[i] - offsets[i - 1] == sizes[i - 1];
+    }

Review Comment:
   Why do we skip offset check when `sizes[i - 1] == 0`?
   
   ```
   offsets: 1 2 3 2 3
   sizes:    1 1 0 1 1
   ```
   
   does this introduce overlap and break (3)?



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -137,6 +137,77 @@ Result<std::shared_ptr<typename 
TypeTraits<TYPE>::ArrayType>> ListArrayFromArray
   return std::make_shared<ArrayType>(std::move(data));
 }
 
+template <typename TYPE>
+Result<std::shared_ptr<typename TypeTraits<TYPE>::ArrayType>> 
ListViewArrayFromArrays(
+    std::shared_ptr<DataType> type, const Array& offsets, const Array& sizes,
+    const Array& values, MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap 
= NULLPTR,
+    int64_t null_count = kUnknownNullCount) {
+  using offset_type = typename TYPE::offset_type;
+  using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+  using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+
+  if (offsets.type_id() != OffsetArrowType::type_id) {
+    return Status::TypeError("List offsets must be ", 
OffsetArrowType::type_name());
+  }
+
+  if (sizes.length() != offsets.length() && sizes.length() != offsets.length() 
- 1) {
+    return Status::Invalid(
+        "List sizes must have the same length as offsets or one less than 
offsets");
+  }
+  if (sizes.type_id() != OffsetArrowType::type_id) {
+    return Status::TypeError("List sizes must be ", 
OffsetArrowType::type_name());
+  }
+
+  if (offsets.offset() != sizes.offset()) {
+    return Status::Invalid("List offsets and sizes must have the same offset");
+  }

Review Comment:
   Hmmm should we denote it in comment or `array_nested.h`?



##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -216,6 +231,171 @@ class ARROW_EXPORT LargeListArray : public 
BaseListArray<LargeListType> {
   void SetData(const std::shared_ptr<ArrayData>& data);
 };
 
+// ----------------------------------------------------------------------
+// ListViewArray / LargeListViewArray
+
+template <typename TYPE>
+class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TYPE::offset_type;
+
+  const TypeClass* list_view_type() const { return 
this->var_length_list_like_type(); }
+
+  /// Note that this buffer does not account for any slice offset or length.
+  const std::shared_ptr<Buffer>& value_sizes() const { return 
this->data_->buffers[2]; }
+
+  /// Return pointer to raw value offsets accounting for any slice offset
+  const offset_type* raw_value_sizes() const {
+    return raw_value_sizes_ + this->data_->offset;
+  }
+
+  offset_type value_length(int64_t i) const final {
+    return this->raw_value_sizes_[i + this->data_->offset];
+  }
+
+ protected:
+  const offset_type* raw_value_sizes_ = NULLPTR;
+};
+
+/// \brief Concrete Array class for list-view data
+class ARROW_EXPORT ListViewArray : public BaseListViewArray<ListViewType> {
+ public:
+  explicit ListViewArray(std::shared_ptr<ArrayData> data);
+
+  ListViewArray(std::shared_ptr<DataType> type, int64_t length,
+                std::shared_ptr<Buffer> value_offsets,
+                std::shared_ptr<Buffer> value_sizes, std::shared_ptr<Array> 
values,
+                std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+                int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct ListViewArray from array of offsets, sizes, and child
+  /// value array
+  ///
+  /// Construct a ListViewArray using buffers from offsets and sizes arrays
+  /// that project views into the child values array.
+  ///
+  /// This function does the bare minimum of validation of the offsets/sizes 
and
+  /// input types.
+  ///
+  /// Offsets of an Array's null bitmap can be present or an explicit
+  /// null_bitmap, but not both.

Review Comment:
   Or here you just want to mean `Offsets Array's`



##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -216,6 +231,171 @@ class ARROW_EXPORT LargeListArray : public 
BaseListArray<LargeListType> {
   void SetData(const std::shared_ptr<ArrayData>& data);
 };
 
+// ----------------------------------------------------------------------
+// ListViewArray / LargeListViewArray
+
+template <typename TYPE>
+class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TYPE::offset_type;
+
+  const TypeClass* list_view_type() const { return 
this->var_length_list_like_type(); }
+
+  /// Note that this buffer does not account for any slice offset or length.
+  const std::shared_ptr<Buffer>& value_sizes() const { return 
this->data_->buffers[2]; }
+
+  /// Return pointer to raw value offsets accounting for any slice offset
+  const offset_type* raw_value_sizes() const {
+    return raw_value_sizes_ + this->data_->offset;
+  }
+
+  offset_type value_length(int64_t i) const final {
+    return this->raw_value_sizes_[i + this->data_->offset];
+  }
+
+ protected:
+  const offset_type* raw_value_sizes_ = NULLPTR;
+};
+
+/// \brief Concrete Array class for list-view data
+class ARROW_EXPORT ListViewArray : public BaseListViewArray<ListViewType> {
+ public:
+  explicit ListViewArray(std::shared_ptr<ArrayData> data);
+
+  ListViewArray(std::shared_ptr<DataType> type, int64_t length,
+                std::shared_ptr<Buffer> value_offsets,
+                std::shared_ptr<Buffer> value_sizes, std::shared_ptr<Array> 
values,
+                std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+                int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct ListViewArray from array of offsets, sizes, and child
+  /// value array
+  ///
+  /// Construct a ListViewArray using buffers from offsets and sizes arrays
+  /// that project views into the child values array.
+  ///
+  /// This function does the bare minimum of validation of the offsets/sizes 
and
+  /// input types.
+  ///
+  /// Offsets of an Array's null bitmap can be present or an explicit
+  /// null_bitmap, but not both.

Review Comment:
   After I go through the impl, I make clear what does this mean. But the 
comment is a bit confusing, because this function has 3 arguments( specially 
offsets, sizes ). I guess we can make it mroe clear?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to