felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1388463702
##########
cpp/src/arrow/array/builder_nested.h:
##########
@@ -191,20 +191,129 @@ class BaseListBuilder : public ArrayBuilder {
return
std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
}
+ private:
+ static constexpr const char* type_name() {
+ if constexpr (is_list_view(TYPE::type_id)) {
+ return "ListView";
+ } else {
+ return "List";
+ }
+ }
+
protected:
+ /// \brief Append dimensions for num_values empty list slots.
+ ///
+ /// ListViewBuilder overrides this to also append the sizes.
+ virtual void UnsafeAppendEmptyDimensions(int64_t num_values) {
+ const int64_t offset = value_builder_->length();
+ for (int64_t i = 0; i < num_values; ++i) {
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
+ }
+ }
+
+ /// \brief Append dimensions for a single list slot.
+ ///
+ /// ListViewBuilder overrides this to also append the size.
+ virtual void UnsafeAppendDimensions(int64_t offset, int64_t size) {
+ offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
+ }
+
TypedBufferBuilder<offset_type> offsets_builder_;
std::shared_ptr<ArrayBuilder> value_builder_;
std::shared_ptr<Field> value_field_;
+};
+
+// ----------------------------------------------------------------------
+// ListBuilder / LargeListBuilder
+
+template <typename TYPE>
+class ARROW_EXPORT BaseListBuilder : public VarLengthListLikeBuilder<TYPE> {
+ private:
+ using BASE = VarLengthListLikeBuilder<TYPE>;
+
+ public:
+ using TypeClass = TYPE;
+ using offset_type = typename BASE::offset_type;
+
+ using BASE::BASE;
+
+ using BASE::Append;
+
+ ~BaseListBuilder() override = default;
+
+ /// \brief Start a new variable-length list slot
+ ///
+ /// This function should be called before beginning to append elements to the
+ /// value builder
+ ///
+ /// Prefer Append(is_valid, 0) as that works correctly for list-view types
+ /// as well as list types.
+ Status Append(bool is_valid = true) { return BASE::Append(is_valid, 0); }
+
+ /// \brief Vector append
+ ///
+ /// If passed, valid_bytes is of equal length to values, and any zero byte
+ /// will be considered as a null for that slot
+ Status AppendValues(const offset_type* offsets, int64_t length,
+ const uint8_t* valid_bytes = NULLPTR) {
+ ARROW_RETURN_NOT_OK(this->Reserve(length));
+ this->UnsafeAppendToBitmap(valid_bytes, length);
+ this->offsets_builder_.UnsafeAppend(offsets, length);
+ return Status::OK();
+ }
+
+ Status AppendValues(const offset_type* offsets, const offset_type* sizes,
+ int64_t length, const uint8_t* valid_bytes) final {
+ // offsets are assumed to be valid, but the first lenght-1 sizes have to be
+ // consistent with the offsets to rule out the possibility that the caller
+ // is passing sizes that could work if building a list-view, but don't work
+ // on building a list that requires offsets to be non-decreasing.
+ if (sizes) {
Review Comment:
When I started this PR, I had everything completely separate and then I saw
myself writing a lot of tedious repetitive code. Then I spent quite some time
refactoring everything from the beginning to leverage the commonalities between
lists and list-views. I'm aware the builder implementation is not so clear, but
the complexity of implementation pays itself by the ease of writing general
code dealing with lists. This was proven in the unit tests code and will be
even more important when we start talking about compute kernels dealing with
lists.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]