bkietz commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1350318670


##########
cpp/src/arrow/array/util.cc:
##########
@@ -666,12 +682,29 @@ class RepeatedArrayFactory {
     std::shared_ptr<Buffer> offsets_buffer;
     auto size = static_cast<typename T::offset_type>(value->length());
     RETURN_NOT_OK(CreateOffsetsBuffer(size, &offsets_buffer));
-
     out_ =
         std::make_shared<ArrayType>(scalar_.type, length_, offsets_buffer, 
value_array);
     return Status::OK();
   }
 
+  template <typename T>
+  enable_if_list_view<T, Status> Visit(const T& type) {
+    using ScalarType = typename TypeTraits<T>::ScalarType;
+    using ArrayType = typename TypeTraits<T>::ArrayType;
+
+    auto value = checked_cast<const ScalarType&>(scalar_).value;
+
+    auto size = static_cast<typename T::offset_type>(value->length());
+    BufferVector buffers(3);

Review Comment:
   ```suggestion
   ```



##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -159,16 +166,67 @@ Status PutOffsets(const std::shared_ptr<Buffer>& src, 
Offset first_offset, Offse
 
   // Write offsets into dst, ensuring that the first offset written is
   // first_offset
-  auto adjustment = first_offset - src_begin[0];
+  auto displacement = first_offset - src_begin[0];
   // NOTE: Concatenate can be called during IPC reads to append delta 
dictionaries.
   // Avoid UB on non-validated input by doing the addition in the unsigned 
domain.
   // (the result can later be validated using Array::ValidateFull)
-  std::transform(src_begin, src_end, dst, [adjustment](Offset offset) {
-    return SafeSignedAdd(offset, adjustment);
+  std::transform(src_begin, src_end, dst, [displacement](Offset offset) {
+    return SafeSignedAdd(offset, displacement);
   });
   return Status::OK();
 }
 
+template <typename offset_type>
+void PutListViewOffsets(const Buffer& src, offset_type displacement, 
offset_type* dst);
+
+// Concatenate buffers holding list-view offsets into a single buffer of 
offsets
+//
+// value_ranges contains the relevant ranges of values in the child array 
actually
+// referenced to by the views. Most commonly, these ranges will start from 0,
+// but when that is not the case, we need to adjust the displacement of 
offsets.
+// The concatenated child array does not contain values from the beginning
+// if they are not referenced to by any view.
+template <typename offset_type>
+Status ConcatenateListViewOffsets(const BufferVector& buffers,
+                                  const std::vector<Range>& value_ranges,
+                                  MemoryPool* pool, std::shared_ptr<Buffer>* 
out) {
+  const int64_t out_size = SumBufferSizes(buffers);
+  if (out_size > std::numeric_limits<offset_type>::max()) {
+    return Status::Invalid("offset overflow while concatenating arrays");
+  }

Review Comment:
   This doesn't seem right; we could have a ListView array with `2**34` empty 
lists even though that number is greater than the max of offset type. I think 
this check needs to inspect `value_ranges`, which can tell us what the maximum 
offset would need to be



##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -159,16 +166,67 @@ Status PutOffsets(const std::shared_ptr<Buffer>& src, 
Offset first_offset, Offse
 
   // Write offsets into dst, ensuring that the first offset written is
   // first_offset
-  auto adjustment = first_offset - src_begin[0];
+  auto displacement = first_offset - src_begin[0];
   // NOTE: Concatenate can be called during IPC reads to append delta 
dictionaries.
   // Avoid UB on non-validated input by doing the addition in the unsigned 
domain.
   // (the result can later be validated using Array::ValidateFull)
-  std::transform(src_begin, src_end, dst, [adjustment](Offset offset) {
-    return SafeSignedAdd(offset, adjustment);
+  std::transform(src_begin, src_end, dst, [displacement](Offset offset) {
+    return SafeSignedAdd(offset, displacement);
   });
   return Status::OK();
 }
 
+template <typename offset_type>
+void PutListViewOffsets(const Buffer& src, offset_type displacement, 
offset_type* dst);
+
+// Concatenate buffers holding list-view offsets into a single buffer of 
offsets
+//
+// value_ranges contains the relevant ranges of values in the child array 
actually
+// referenced to by the views. Most commonly, these ranges will start from 0,
+// but when that is not the case, we need to adjust the displacement of 
offsets.
+// The concatenated child array does not contain values from the beginning
+// if they are not referenced to by any view.
+template <typename offset_type>
+Status ConcatenateListViewOffsets(const BufferVector& buffers,
+                                  const std::vector<Range>& value_ranges,
+                                  MemoryPool* pool, std::shared_ptr<Buffer>* 
out) {
+  const int64_t out_size = SumBufferSizes(buffers);
+  if (out_size > std::numeric_limits<offset_type>::max()) {
+    return Status::Invalid("offset overflow while concatenating arrays");
+  }
+  ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(out_size, pool));
+  auto* out_data = reinterpret_cast<offset_type*>((*out)->mutable_data());

Review Comment:
   ```suggestion
     auto* out_data = (*out)->mutable_data_as<offset_type>();
   ```



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -594,6 +595,217 @@ std::shared_ptr<Array> 
OffsetsFromLengthsArray(OffsetArrayType* lengths,
       std::make_shared<typename OffsetArrayType::TypeClass>(), size, buffers, 
null_count);
   return std::make_shared<OffsetArrayType>(array_data);
 }
+
+// Helper for RandomArrayGenerator::ArrayOf: extract some C value from
+// a given metadata key.
+template <typename T, typename ArrowType = typename CTypeTraits<T>::ArrowType>
+enable_if_parameter_free<ArrowType, T> GetMetadata(const KeyValueMetadata* 
metadata,
+                                                   const std::string& key,
+                                                   T default_value) {
+  if (!metadata) return default_value;
+  const auto index = metadata->FindKey(key);
+  if (index < 0) return default_value;
+  const auto& value = metadata->value(index);
+  T output{};
+  if (!internal::ParseValue<ArrowType>(value.data(), value.length(), &output)) 
{
+    ABORT_NOT_OK(Status::Invalid("Could not parse ", key, " = ", value));

Review Comment:
   Nice helper
   ```suggestion
       ABORT_NOT_OK(Status::Invalid("Could not parse ", key, " = ", value, " as 
", ArrowType::type_name()));
   ```



##########
cpp/src/arrow/ipc/writer.cc:
##########
@@ -430,6 +492,36 @@ class RecordBatchSerializer {
     return Status::OK();
   }
 
+  template <typename T>
+  enable_if_list_view<typename T::TypeClass, Status> Visit(const T& array) {
+    using offset_type = typename T::offset_type;
+
+    offset_type min_offset = 0;
+    offset_type max_end = 0;
+    {
+      std::shared_ptr<Buffer> value_offsets;
+      RETURN_NOT_OK(
+          GetZeroBasedListViewOffsets<T>(array, &value_offsets, &min_offset, 
&max_end));
+      out_->body_buffers.push_back(std::move(value_offsets));
+    }
+    {
+      std::shared_ptr<Buffer> value_sizes;
+      RETURN_NOT_OK(GetListViewSizes<T>(array, &value_sizes));
+      out_->body_buffers.push_back(std::move(value_sizes));
+    }
+
+    --max_recursion_depth_;

Review Comment:
   Nit: please move this to just above the VisitArray call



##########
cpp/src/arrow/array/builder_nested.h:
##########
@@ -80,100 +89,91 @@ class BaseListBuilder : public ArrayBuilder {
     value_builder_->Reset();
   }
 
-  /// \brief Vector append
-  ///
-  /// If passed, valid_bytes is of equal length to values, and any zero byte
-  /// will be considered as a null for that slot
-  Status AppendValues(const offset_type* offsets, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR) {
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    UnsafeAppendToBitmap(valid_bytes, length);
-    offsets_builder_.UnsafeAppend(offsets, length);
-    return Status::OK();
-  }
-
   /// \brief Start a new variable-length list slot
   ///
   /// This function should be called before beginning to append elements to the

Review Comment:
   That seems fine, then let's rewrite the comment as "Items appended to the 
value builder before this function is called will not be members of any list 
slot" or so



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -444,6 +444,10 @@ struct SchemaExporter {
 
   Status Visit(const LargeListType& type) { return SetFormat("+L"); }
 
+  Status Visit(const ListViewType& type) { return SetFormat("+lv"); }
+
+  Status Visit(const LargeListViewType& type) { return SetFormat("+Lv"); }

Review Comment:
   Looks like the vote has established
   ```suggestion
     Status Visit(const ListViewType& type) { return SetFormat("+vl"); }
   
     Status Visit(const LargeListViewType& type) { return SetFormat("+vL"); }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to