rok commented on code in PR #38008:
URL: https://github.com/apache/arrow/pull/38008#discussion_r1409346282


##########
python/pyarrow/array.pxi:
##########
@@ -3586,6 +3586,156 @@ class FixedShapeTensorArray(ExtensionArray):
         )
 
 
+cdef class VariableShapeTensorArray(ExtensionArray):
+    """
+    Concrete class for variable shape tensor extension arrays.
+
+    Examples
+    --------
+    Define the extension type for tensor array
+
+    >>> import pyarrow as pa
+    >>> tensor_type = pa.variable_shape_tensor(pa.int32(), 2)
+
+    Create an extension array
+
+    >>> shapes = pa.array([[2, 3], [1, 2]], pa.list_(pa.uint32(), 2))
+    >>> values = pa.array([[1, 2, 3, 4, 5, 6], [7, 8]], pa.list_(pa.int32()))
+    >>> arr = pa.StructArray.from_arrays([shapes, values], names=["shape", 
"data"])
+    >>> pa.ExtensionArray.from_storage(tensor_type, arr)
+    <pyarrow.lib.VariableShapeTensorArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: fixed_size_list<item: uint32>[2]
+      [
+        [
+          2,
+          3
+        ],
+        [
+          1,
+          2
+        ]
+      ]
+    -- child 1 type: list<item: int32>
+      [
+        [
+          1,
+          2,
+          3,
+          4,
+          5,
+          6
+        ],
+        [
+          7,
+          8
+        ]
+      ]
+    """
+
+    def to_numpy_ndarray(self):
+        """
+        Convert variable shape tensor extension array to list of numpy arrays.
+        """
+        tensors = []
+        for i in range(len(self.storage)):
+            tensors.append(self.get_tensor(i).to_numpy())
+
+        return tensors
+
+    def get_tensor(self, int64_t i):
+        """
+        Get i-th tensor from variable shape tensor extension array.
+
+        Parameters
+        ----------
+        i : int64_t
+            The index of the tensor to get.
+
+        Returns
+        -------
+        tensor : pyarrow.Tensor
+        """
+        cdef:
+            CVariableShapeTensorArray* ext_array = 
<CVariableShapeTensorArray*>(self.ap)
+            CResult[shared_ptr[CTensor]] ctensor
+        with nogil:
+            ctensor = ext_array.GetTensor(i)
+        return pyarrow_wrap_tensor(GetResultValue(ctensor))
+
+    @staticmethod
+    def from_numpy_ndarray(obj):
+        """
+        Convert a list of numpy arrays ndarrays to a variable shape tensor 
extension array.
+        The length of the list will become the length of the variable shape 
tensor array.
+
+        Numpy arrays needs to be C-contiguous in memory 
(``obj.flags["C_CONTIGUOUS"]==True``).
+
+        Parameters
+        ----------
+        obj : list(numpy.ndarray)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> ndarray_list = [
+        ...         np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+        ...         np.array([[7, 8]], dtype=np.float32),
+        ...     ]
+        >>> pa.VariableShapeTensorArray.from_numpy_ndarray(ndarray_list)
+        <pyarrow.lib.VariableShapeTensorArray object at ...>
+        -- is_valid: all not null
+        -- child 0 type: fixed_size_list<item: uint32>[2]
+          [
+            [
+              2,
+              3
+            ],
+            [
+              1,
+              2
+            ]
+          ]
+        -- child 1 type: list<item: float>
+          [
+            [
+              1,
+              2,
+              3,
+              4,
+              5,
+              6
+            ],
+            [
+              7,
+              8
+            ]
+          ]
+        """
+        if not all([o.flags["C_CONTIGUOUS"] for o in obj]):
+            raise ValueError('The data in the numpy arrays need to be in a 
single, '
+                             'C-style contiguous segment.')
+        numpy_type = obj[0].dtype
+        ndim = obj[0].ndim
+
+        if not all([o.dtype == numpy_type for o in obj]):
+            raise ValueError('All numpy arrays need to have the same dtype.')

Review Comment:
   Done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to