felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1339218473
##########
cpp/src/arrow/array/builder_nested.h:
##########
@@ -191,20 +191,129 @@ class BaseListBuilder : public ArrayBuilder {
return
std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
}
+ private:
+ static constexpr const char* type_name() {
+ if constexpr (is_list_view(TYPE::type_id)) {
+ return "ListView";
+ } else {
+ return "List";
+ }
+ }
+
protected:
+ /// \brief Append dimensions for num_values empty list slots.
+ ///
+ /// ListViewBuilder overrides this to also append the sizes.
+ virtual void UnsafeAppendEmptyDimensions(int64_t num_values) {
+ const int64_t offset = value_builder_->length();
+ for (int64_t i = 0; i < num_values; ++i) {
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
+ }
+ }
+
+ /// \brief Append dimensions for a single list slot.
+ ///
+ /// ListViewBuilder overrides this to also append the size.
+ virtual void UnsafeAppendDimensions(int64_t offset, int64_t size) {
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
+ }
+
TypedBufferBuilder<offset_type> offsets_builder_;
std::shared_ptr<ArrayBuilder> value_builder_;
std::shared_ptr<Field> value_field_;
+};
+
+// ----------------------------------------------------------------------
+// ListBuilder / LargeListBuilder
+
+template <typename TYPE>
+class ARROW_EXPORT BaseListBuilder : public VarLengthListLikeBuilder<TYPE> {
+ private:
+ using BASE = VarLengthListLikeBuilder<TYPE>;
+
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename BASE::offset_type;
+
+ using BASE::BASE;
+
+ using BASE::Append;
+
+ ~BaseListBuilder() override = default;
+
+ /// \brief Start a new variable-length list slot
+ ///
+ /// This function should be called before beginning to append elements to the
+ /// value builder
+ ///
+ /// Prefer Append(is_valid, 0) as that works correctly for list-view types
+ /// as well as list types.
+ Status Append(bool is_valid = true) { return BASE::Append(is_valid, 0); }
+
+ /// \brief Vector append
+ ///
+ /// If passed, valid_bytes is of equal length to values, and any zero byte
+ /// will be considered as a null for that slot
+ Status AppendValues(const offset_type* offsets, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ ARROW_RETURN_NOT_OK(this->Reserve(length));
+ this->UnsafeAppendToBitmap(valid_bytes, length);
+ this->offsets_builder_.UnsafeAppend(offsets, length);
+ return Status::OK();
+ }
+
+ Status AppendValues(const offset_type* offsets, const offset_type* sizes,
+ int64_t length, const uint8_t* valid_bytes) final {
+ // offsets are assumed to be valid, but the first lenght-1 sizes have to be
+ // consistent with the offsets to rule out the possibility that the caller
+ // is passing sizes that could work if building a list-view, but don't work
+ // on building a list that requires offsets to be non-decreasing.
+ if (sizes) {
Review Comment:
> What is the point of exposing this API on ListBuilder if sizes endures
costly validation but is then entirely ignored? It doesn't seem to me like a
very good idea, as builders are expected to be efficient.
`ListBuilder` still exposes `AppendValues(offsets, length, valid_bytes)`
with no loss of efficiency. `AppendValues` with `sizes` is overridden here so
that we fulfill the generic contract of building var-length lists (requiring
offsets + sizes).
The expensive validation exists to catch people making mistakes of passing
invalid `sizes` through the generic interface. They can gain more efficiency by
passing `nullptr` on the sizes or using the `ListViewBuilder` directly.
> And if we want some compatible vector-append, then I suggest to do the
reverse:
expose a AppendValues(const offset_type* offsets, int64_t length, const
uint8_t* valid_bytes) and let ListViewBuilder compute the sizes.
Doing that has its problems as well: I can't delegate to a version that
takes explicit `sizes` in list-view builders without allocating a temporary
buffer. But I will implement like that and have you decide which way is better.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]