jorisvandenbossche commented on code in PR #14238:
URL: https://github.com/apache/arrow/pull/14238#discussion_r983175968


##########
python/pyarrow/src/arrow_to_pandas.cc:
##########
@@ -730,31 +738,38 @@ Status DecodeDictionaries(MemoryPool* pool, const 
std::shared_ptr<DataType>& den
 template <typename ListArrayT>
 Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
                         PyObject** out_values) {
+  using ListArrayType = typename ListArrayT::TypeClass;
+  const auto& list_type = checked_cast<const ListArrayType&>(*data.type());
+  const auto& value_type = list_type.value_type();
+
+  const auto& val_type = checked_cast<const ExtensionType&>(*value_type);
+  const auto& storage_ty = val_type.storage_type();
+
   // Get column of underlying value arrays
   ArrayVector value_arrays;
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = checked_cast<const ListArrayT&>(*data.chunk(c));
-    value_arrays.emplace_back(arr.values());
+    if (arr.values()->type()->id() == Type::EXTENSION) {
+      const auto& arr_ext = checked_cast<const ExtensionArray&>(*arr.values());
+      value_arrays.emplace_back(arr_ext.storage());
+    } else {
+      value_arrays.emplace_back(arr.values());
+    }
   }
-  using ListArrayType = typename ListArrayT::TypeClass;
-  const auto& list_type = checked_cast<const ListArrayType&>(*data.type());
-  auto value_type = list_type.value_type();
 
-  auto flat_column = std::make_shared<ChunkedArray>(value_arrays, value_type);
+  auto flat_column = std::make_shared<ChunkedArray>(value_arrays, storage_ty);

Review Comment:
   I suppose this will need to be done conditionally (storage_type if (id == 
EXTENSION) else value_type) to keep the non-extension case working?



##########
python/pyarrow/src/arrow_to_pandas.cc:
##########
@@ -191,6 +192,13 @@ static inline bool ListTypeSupported(const DataType& type) 
{
       const auto& list_type = checked_cast<const BaseListType&>(type);
       return ListTypeSupported(*list_type.value_type());
     }
+    case Type::EXTENSION: {
+      auto ext = std::dynamic_pointer_cast<ExtensionType>(type.GetSharedPtr());
+      if (ext == nullptr) {
+        return false;
+      }

Review Comment:
   I think you can do a `checked_cast` instead similar as a few lines above for 
the list type? (which will do a dynamic cast in debug mode, and static cast in 
release mode)



##########
python/pyarrow/src/arrow_to_pandas.cc:
##########
@@ -730,31 +738,38 @@ Status DecodeDictionaries(MemoryPool* pool, const 
std::shared_ptr<DataType>& den
 template <typename ListArrayT>
 Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
                         PyObject** out_values) {
+  using ListArrayType = typename ListArrayT::TypeClass;
+  const auto& list_type = checked_cast<const ListArrayType&>(*data.type());
+  const auto& value_type = list_type.value_type();
+
+  const auto& val_type = checked_cast<const ExtensionType&>(*value_type);
+  const auto& storage_ty = val_type.storage_type();
+
   // Get column of underlying value arrays
   ArrayVector value_arrays;
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = checked_cast<const ListArrayT&>(*data.chunk(c));
-    value_arrays.emplace_back(arr.values());
+    if (arr.values()->type()->id() == Type::EXTENSION) {

Review Comment:
   ```suggestion
       if (arr.value_type()->id() == Type::EXTENSION) {
   ```
   
   (little bit indirection)



##########
python/pyarrow/src/arrow_to_pandas.cc:
##########
@@ -730,31 +738,38 @@ Status DecodeDictionaries(MemoryPool* pool, const 
std::shared_ptr<DataType>& den
 template <typename ListArrayT>
 Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
                         PyObject** out_values) {
+  using ListArrayType = typename ListArrayT::TypeClass;
+  const auto& list_type = checked_cast<const ListArrayType&>(*data.type());
+  const auto& value_type = list_type.value_type();
+
+  const auto& val_type = checked_cast<const ExtensionType&>(*value_type);
+  const auto& storage_ty = val_type.storage_type();

Review Comment:
   To reduce the size of the diff, I think you can leave this where it was 
originally after the loop (since you don't need to storage_type anymore in the 
for loop)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to