lidavidm commented on a change in pull request #10556:
URL: https://github.com/apache/arrow/pull/10556#discussion_r654450504



##########
File path: cpp/src/arrow/util/converter.h
##########
@@ -302,32 +305,59 @@ class Chunker {
     return status;
   }
 
-  // we could get bit smarter here since the whole batch of appendable values
-  // will be rejected if a capacity error is raised
-  Status Extend(InputType values, int64_t size) {
-    auto status = converter_->Extend(values, size);
-    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
-      if (converter_->builder()->length() == 0) {
+  Status Extend(InputType values, int64_t size, int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->Extend(values, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to 
append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_capacity_error()) {

Review comment:
       Can we document this inline? I was going to question this since it's 
rather confusing looking at it.

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -655,6 +633,8 @@ class PyListConverter : public ListConverter<T, 
PyConverter, PyConverterTrait> {
     return ValidateBuilder(this->list_type_);
   }
 
+  bool rewind_on_capacity_error() const override { return true; }

Review comment:
       Is this correct? The binary builder checks capacity before append, right?

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -655,6 +633,8 @@ class PyListConverter : public ListConverter<T, 
PyConverter, PyConverterTrait> {
     return ValidateBuilder(this->list_type_);
   }
 
+  bool rewind_on_capacity_error() const override { return true; }

Review comment:
       Right - so we don't need to rewind, right?

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -655,6 +633,8 @@ class PyListConverter : public ListConverter<T, 
PyConverter, PyConverterTrait> {
     return ValidateBuilder(this->list_type_);
   }
 
+  bool rewind_on_capacity_error() const override { return true; }

Review comment:
       D'oh…sorry for the confusion.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to