rok commented on code in PR #50203:
URL: https://github.com/apache/arrow/pull/50203#discussion_r3481048408


##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1730,6 +1730,82 @@ def test_tensor_array_from_numpy(np_type_str):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])
 
 
[email protected]
[email protected]("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_list_of_ndarrays(np_type_str):
+    # GH-49644

Review Comment:
   Comments like this one are not really necessary for tests. Ideally a test 
should be explicitly readable without comments already. Sometimes agents will 
add way too much comments and just make things harder to read.



##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1730,6 +1730,82 @@ def test_tensor_array_from_numpy(np_type_str):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])
 
 
[email protected]
[email protected]("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_list_of_ndarrays(np_type_str):
+    # GH-49644
+    np_dtype = np.dtype(np_type_str)
+    tensor_type = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 3))
+
+    elements = [
+        np.arange(6, dtype=np_dtype).reshape(2, 3),
+        np.arange(6, 12, dtype=np_dtype).reshape(2, 3),
+    ]
+    result = pa.array(elements, type=tensor_type)
+    assert isinstance(result, pa.FixedShapeTensorArray)
+    assert result.type == tensor_type
+    assert len(result) == 2
+
+    # Must match the existing from_numpy_ndarray path on the same data
+    expected = pa.FixedShapeTensorArray.from_numpy_ndarray(np.stack(elements))
+    assert result.storage.equals(expected.storage)
+
+    # Each element round-trips back to the original ndarray (with its shape)
+    for scalar, original in zip(result, elements):
+        np.testing.assert_array_equal(scalar.to_numpy(), original)
+
+    # Higher-dimensional tensors work too
+    tensor_3d = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 2, 3))
+    elements_3d = [np.arange(12, dtype=np_dtype).reshape(2, 2, 3)]
+    result_3d = pa.array(elements_3d, type=tensor_3d)
+    assert result_3d.type == tensor_3d
+    np.testing.assert_array_equal(result_3d[0].to_numpy(), elements_3d[0])
+
+    # None elements are allowed
+    result_with_null = pa.array([elements[0], None], type=tensor_type)
+    assert result_with_null.null_count == 1
+    assert result_with_null[1].as_py() is None
+
+    # A multi-dimensional element whose shape doesn't match the tensor shape is
+    # rejected, even when the total number of elements is the same (GH-49644).
+    with pytest.raises(ValueError, match="shape"):
+        pa.array([np.arange(6, dtype=np_dtype).reshape(3, 2)], 
type=tensor_type)
+
+    # Permuted tensor types can't be built from a sequence (the flatten would
+    # store the wrong layout), so they're rejected for now.
+    permuted_type = pa.fixed_shape_tensor(
+        pa.from_numpy_dtype(np_dtype), (2, 3), permutation=[1, 0])
+    with pytest.raises(NotImplementedError, match="permutation"):
+        pa.array(elements, type=permuted_type)
+
+
[email protected]
+def test_tensor_array_from_list_mixed_layout():
+    # GH-49644: C- and F-ordered arrays with the same values must produce the
+    # same result, since the values are always flattened in C order.
+    tensor_type = pa.fixed_shape_tensor(pa.int64(), (2, 3))
+    raw = [[1, 2, 3], [4, 5, 6]]
+    c_arr = np.array(raw, order="C")
+    f_arr = np.array(raw, order="F")
+    assert np.array_equal(c_arr, f_arr)
+    assert c_arr.tobytes("A") != f_arr.tobytes("A")
+
+    same = pa.array([c_arr, c_arr], type=tensor_type)
+    mixed = pa.array([c_arr, f_arr], type=tensor_type)
+    assert mixed.equals(same)
+    assert mixed.storage.to_pylist() == [[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 
6]]
+
+
[email protected]
+def test_tensor_array_from_list_of_0d_arrays():
+    # GH-49644: a scalar (0-dimensional) tensor can be built from 0-D arrays.

Review Comment:
   The name of the test already says the same as the comment. Either move the 
test into `test_tensor_array_from_list_of_ndarrays` or remove the comment.



##########
python/pyarrow/array.pxi:
##########
@@ -266,6 +266,24 @@ def array(object obj, type=None, mask=None, size=None, 
from_pandas=None,
     if type is not None and type.id == _Type_EXTENSION:
         extension_type = type
         type = type.storage_type
+        # GH-49644: when building a fixed_shape_tensor from a sequence of 
arrays,
+        # the converter only sees the flat storage type, so validate the
+        # tensor-specific constraints here where the type is still known.
+        if (isinstance(extension_type, FixedShapeTensorType)
+                and isinstance(obj, (list, tuple))):

Review Comment:
   Looks good!



##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1730,6 +1730,82 @@ def test_tensor_array_from_numpy(np_type_str):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])
 
 
[email protected]
[email protected]("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_list_of_ndarrays(np_type_str):
+    # GH-49644
+    np_dtype = np.dtype(np_type_str)
+    tensor_type = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 3))
+
+    elements = [
+        np.arange(6, dtype=np_dtype).reshape(2, 3),
+        np.arange(6, 12, dtype=np_dtype).reshape(2, 3),
+    ]
+    result = pa.array(elements, type=tensor_type)
+    assert isinstance(result, pa.FixedShapeTensorArray)
+    assert result.type == tensor_type
+    assert len(result) == 2
+
+    # Must match the existing from_numpy_ndarray path on the same data
+    expected = pa.FixedShapeTensorArray.from_numpy_ndarray(np.stack(elements))
+    assert result.storage.equals(expected.storage)
+
+    # Each element round-trips back to the original ndarray (with its shape)
+    for scalar, original in zip(result, elements):
+        np.testing.assert_array_equal(scalar.to_numpy(), original)
+
+    # Higher-dimensional tensors work too
+    tensor_3d = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 2, 3))
+    elements_3d = [np.arange(12, dtype=np_dtype).reshape(2, 2, 3)]
+    result_3d = pa.array(elements_3d, type=tensor_3d)
+    assert result_3d.type == tensor_3d
+    np.testing.assert_array_equal(result_3d[0].to_numpy(), elements_3d[0])
+
+    # None elements are allowed
+    result_with_null = pa.array([elements[0], None], type=tensor_type)
+    assert result_with_null.null_count == 1
+    assert result_with_null[1].as_py() is None
+
+    # A multi-dimensional element whose shape doesn't match the tensor shape is
+    # rejected, even when the total number of elements is the same (GH-49644).
+    with pytest.raises(ValueError, match="shape"):
+        pa.array([np.arange(6, dtype=np_dtype).reshape(3, 2)], 
type=tensor_type)
+
+    # Permuted tensor types can't be built from a sequence (the flatten would
+    # store the wrong layout), so they're rejected for now.
+    permuted_type = pa.fixed_shape_tensor(
+        pa.from_numpy_dtype(np_dtype), (2, 3), permutation=[1, 0])
+    with pytest.raises(NotImplementedError, match="permutation"):
+        pa.array(elements, type=permuted_type)
+
+
[email protected]
+def test_tensor_array_from_list_mixed_layout():
+    # GH-49644: C- and F-ordered arrays with the same values must produce the
+    # same result, since the values are always flattened in C order.

Review Comment:
   Including this comment makes sense because it's less obvious why we are 
setting different orders. However there's no need for `GH-49644:` part, please 
remove it. Check all other comments in tests and apply the same thinking.



##########
python/pyarrow/src/arrow/python/python_to_arrow.cc:
##########
@@ -908,13 +908,32 @@ class PyListConverter : public ListConverter<T, 
PyConverter, PyConverterTrait> {
 
   Status AppendNdarray(PyObject* value) {
     PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(value);
-    if (PyArray_NDIM(ndarray) != 1) {
-      return Status::Invalid("Can only convert 1-dimensional array values");
-    }
     if (PyArray_ISBYTESWAPPED(ndarray)) {
       // TODO
       return Status::NotImplemented("Byte-swapped arrays not supported");
     }
+    OwnedRef flattened;
+    if (PyArray_NDIM(ndarray) != 1) {
+      // GH-49644: a fixed-size list (e.g. fixed-shape-tensor storage) can be
+      // built from a multi-dimensional array, always flattened in C order
+      // regardless of the input's memory layout.
+      if (PyArray_NDIM(ndarray) < 2 || this->list_type_->id() != 
Type::FIXED_SIZE_LIST) {
+        return Status::Invalid(
+            "Can only convert 1-dimensional array values to a variable-sized 
list");
+      }
+      // Get an aligned, C-contiguous array (copying only if needed), then view
+      // it as 1-D so its values can be read directly in C order.
+      PyObject* contiguous =
+          PyArray_CheckFromAny(value, nullptr, /*min_depth=*/0, 
/*max_depth=*/0,
+                               NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_ALIGNED, 
nullptr);
+      RETURN_IF_PYERROR();
+      flattened.reset(
+          PyArray_Ravel(reinterpret_cast<PyArrayObject*>(contiguous), 
NPY_CORDER));

Review Comment:
   What do you want to do about this? @aboderinsamuel 



##########
python/pyarrow/tests/test_array.py:
##########
@@ -2924,6 +2924,36 @@ def test_array_from_invalid_dim_raises():
         pa.array(arr0d)
 
 
[email protected]
+def test_fixed_size_list_from_multidim_ndarray():
+    # GH-49644: a fixed-size list can be built from multi-dimensional ndarray
+    # elements by flattening them in C order.

Review Comment:
   No need for this comment here.



##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1730,6 +1730,60 @@ def test_tensor_array_from_numpy(np_type_str):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])
 
 
[email protected]
[email protected]("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_list_of_ndarrays(np_type_str):
+    # GH-49644
+    np_dtype = np.dtype(np_type_str)
+    tensor_type = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 3))
+
+    elements = [
+        np.arange(6, dtype=np_dtype).reshape(2, 3),
+        np.arange(6, 12, dtype=np_dtype).reshape(2, 3),
+    ]
+    result = pa.array(elements, type=tensor_type)
+    assert isinstance(result, pa.FixedShapeTensorArray)
+    assert result.type == tensor_type
+    assert len(result) == 2
+
+    # Must match the existing from_numpy_ndarray path on the same data
+    expected = pa.FixedShapeTensorArray.from_numpy_ndarray(np.stack(elements))
+    assert result.storage.equals(expected.storage)
+
+    # Each element round-trips back to the original ndarray (with its shape)
+    for scalar, original in zip(result, elements):
+        np.testing.assert_array_equal(scalar.to_numpy(), original)
+
+    # Higher-dimensional tensors work too
+    tensor_3d = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 2, 3))
+    elements_3d = [np.arange(12, dtype=np_dtype).reshape(2, 2, 3)]
+    result_3d = pa.array(elements_3d, type=tensor_3d)
+    assert result_3d.type == tensor_3d
+    np.testing.assert_array_equal(result_3d[0].to_numpy(), elements_3d[0])
+
+    # None elements are allowed
+    result_with_null = pa.array([elements[0], None], type=tensor_type)
+    assert result_with_null.null_count == 1
+    assert result_with_null[1].as_py() is None
+

Review Comment:
   Great!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to