felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1339218473


##########
cpp/src/arrow/array/builder_nested.h:
##########
@@ -191,20 +191,129 @@ class BaseListBuilder : public ArrayBuilder {
     return 
std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
   }
 
+ private:
+  static constexpr const char* type_name() {
+    if constexpr (is_list_view(TYPE::type_id)) {
+      return "ListView";
+    } else {
+      return "List";
+    }
+  }
+
  protected:
+  /// \brief Append dimensions for num_values empty list slots.
+  ///
+  /// ListViewBuilder overrides this to also append the sizes.
+  virtual void UnsafeAppendEmptyDimensions(int64_t num_values) {
+    const int64_t offset = value_builder_->length();
+    for (int64_t i = 0; i < num_values; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
+    }
+  }
+
+  /// \brief Append dimensions for a single list slot.
+  ///
+  /// ListViewBuilder overrides this to also append the size.
+  virtual void UnsafeAppendDimensions(int64_t offset, int64_t size) {
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
+  }
+
   TypedBufferBuilder<offset_type> offsets_builder_;
   std::shared_ptr<ArrayBuilder> value_builder_;
   std::shared_ptr<Field> value_field_;
+};
+
+// ----------------------------------------------------------------------
+// ListBuilder / LargeListBuilder
+
+template <typename TYPE>
+class ARROW_EXPORT BaseListBuilder : public VarLengthListLikeBuilder<TYPE> {
+ private:
+  using BASE = VarLengthListLikeBuilder<TYPE>;
+
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename BASE::offset_type;
+
+  using BASE::BASE;
+
+  using BASE::Append;
+
+  ~BaseListBuilder() override = default;
+
+  /// \brief Start a new variable-length list slot
+  ///
+  /// This function should be called before beginning to append elements to the
+  /// value builder
+  ///
+  /// Prefer Append(is_valid, 0) as that works correctly for list-view types
+  /// as well as list types.
+  Status Append(bool is_valid = true) { return BASE::Append(is_valid, 0); }
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes is of equal length to values, and any zero byte
+  /// will be considered as a null for that slot
+  Status AppendValues(const offset_type* offsets, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    ARROW_RETURN_NOT_OK(this->Reserve(length));
+    this->UnsafeAppendToBitmap(valid_bytes, length);
+    this->offsets_builder_.UnsafeAppend(offsets, length);
+    return Status::OK();
+  }
+
+  Status AppendValues(const offset_type* offsets, const offset_type* sizes,
+                      int64_t length, const uint8_t* valid_bytes) final {
+    // offsets are assumed to be valid, but the first lenght-1 sizes have to be
+    // consistent with the offsets to rule out the possibility that the caller
+    // is passing sizes that could work if building a list-view, but don't work
+    // on building a list that requires offsets to be non-decreasing.
+    if (sizes) {

Review Comment:
   > What is the point of exposing this API on ListBuilder if sizes endures 
costly validation but is then entirely ignored? It doesn't seem to me like a 
very good idea, as builders are expected to be efficient.
   
   `ListBuilder` still exposes `AppendValues(offsets, length, valid_bytes)` 
with no loss of efficiency. `AppendValues` with `sizes` is overridden here so 
that we fulfill the generic contract of building var-length lists (requiring 
offsets + sizes).
   
   The expensive validation exists to catch people making mistakes of passing 
invalid `sizes` through the generic interface. They can gain more efficiency by 
passing `nullptr` on the sizes or using the `ListViewBuilder` directly. 
   
   > And if we want some compatible vector-append, then I suggest to do the 
reverse:
   expose a AppendValues(const offset_type* offsets, int64_t length, const 
uint8_t* valid_bytes) and let ListViewBuilder compute the sizes.
   
   Doing that has its problems as well: I can't delegate to a version that 
takes explicit `sizes` in list-view builders without allocating a temporary 
buffer. But I will implement like that and have you decide which way is better.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to