danepitkin commented on code in PR #40482:
URL: https://github.com/apache/arrow/pull/40482#discussion_r1520728304


##########
python/pyarrow/src/arrow/python/arrow_to_pandas.cc:
##########
@@ -826,6 +796,145 @@ Status ConvertListsLike(PandasOptions options, const 
ChunkedArray& data,
   return Status::OK();
 }
 
+template <typename T>
+enable_if_list_view<T, Status> ConvertListsLikeChunks(PyObject* numpy_array,
+                                                      const ChunkedArray& data,
+                                                      PyObject** out_values) {
+  using ListArrayT = typename TypeTraits<T>::ArrayType;
+  const auto& arr = static_cast<const ListArrayT&>(*data.chunk(0));
+  const bool has_nulls = data.null_count() > 0;
+  for (int64_t i = 0; i < arr.length(); ++i) {
+    if (has_nulls && arr.IsNull(i)) {
+      Py_INCREF(Py_None);
+      *out_values = Py_None;
+    } else {
+      // Need to subtract value_offset(0) since the original chunk might be a 
slice
+      // into another array.
+      OwnedRef start(PyLong_FromLongLong(arr.value_offset(i)));
+      OwnedRef end(PyLong_FromLongLong(arr.value_offset(i) + 
arr.value_length(i)));
+      OwnedRef slice(PySlice_New(start.obj(), end.obj(), nullptr));
+
+      if (ARROW_PREDICT_FALSE(slice.obj() == nullptr)) {
+        // Fall out of loop, will return from RETURN_IF_PYERROR
+        break;
+      }
+      *out_values = PyObject_GetItem(numpy_array, slice.obj());
+
+      if (*out_values == nullptr) {
+        // Fall out of loop, will return from RETURN_IF_PYERROR
+        break;
+      }
+    }
+    ++out_values;
+  }
+  RETURN_IF_PYERROR();
+
+  return Status::OK();
+}
+
+// template <typename T>
+// enable_if_list_like<T, Status> FlattenList(ArrayVector& value_arrays, const 
ChunkedArray& data, PandasOptions options) {
+//   using ListArrayT = typename TypeTraits<T>::ArrayType;
+//   for (int c = 0; c < data.num_chunks(); c++) {
+//     const auto& arr = checked_cast<const ListArrayT&>(*data.chunk(c));
+//     std::shared_ptr<Array> flattened_values = arr.values()->Slice(
+//         arr.value_offset(0), arr.value_offset(arr.length()) - 
arr.value_offset(0));
+//     if (arr.value_type()->id() == Type::EXTENSION) {
+//       const auto& arr_ext = checked_cast<const 
ExtensionArray&>(*flattened_values);
+//       value_arrays.emplace_back(arr_ext.storage());
+//     } else {
+//       value_arrays.emplace_back(flattened_values);
+//     }
+//   }
+//   return Status::OK();
+// }
+
+// template <typename T>
+// enable_if_list_view<T, Status> FlattenList(ArrayVector& value_arrays, const 
ChunkedArray& data, PandasOptions options) {
+//   using ListArrayT = typename TypeTraits<T>::ArrayType;
+//   for (int c = 0; c < data.num_chunks(); c++) {
+//     const auto& arr = checked_cast<const ListArrayT&>(*data.chunk(c));
+//     std::vector<std::shared_ptr<Array>> slices;
+//     for (int i = 0; i < arr.length(); i++) {
+//       slices.push_back(arr.value_slice(i));
+//     }
+//     ARROW_ASSIGN_OR_RAISE(auto flattened_values, Concatenate(slices, 
options.pool));
+//     if (arr.value_type()->id() == Type::EXTENSION) {
+//       const auto& arr_ext = checked_cast<const 
ExtensionArray&>(*flattened_values);
+//       value_arrays.emplace_back(arr_ext.storage());
+//     } else {
+//       value_arrays.emplace_back(flattened_values);
+//     }
+//   }
+//   return Status::OK();
+// }

Review Comment:
   If we don't want to reuse the C++ Flatten APIs, the commented functions 
above are what duplicate implementations would look like in PyArrow.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to