Re: [PR] GH-37484: [Python] Add a FixedSizeTensorScalar class [arrow]

via GitHub Fri, 01 Dec 2023 11:08:06 -0800


jorisvandenbossche commented on code in PR #37533:
URL: https://github.com/apache/arrow/pull/37533#discussion_r1412460696



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -82,6 +82,45 @@ Status ComputeStrides(const FixedWidthType& type, const 
std::vector<int64_t>& sh
 
 }  // namespace
 
+const Result<std::shared_ptr<Tensor>> FixedShapeTensorArray::GetTensor(
+    const int64_t i) const {
+  auto ext_arr = 
internal::checked_pointer_cast<FixedSizeListArray>(this->storage());
+  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  auto value_type =
+      internal::checked_pointer_cast<FixedWidthType>(ext_type->value_type());
+  auto ndim = ext_type->ndim();
+  auto permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    for (int64_t j = 0; j < static_cast<int64_t>(ndim); ++j) {
+      permutation.emplace_back(j);
+    }
+  }
+
+  std::vector<std::string> dim_names;
+  if (!ext_type->dim_names().empty()) {
+    for (auto j : permutation) {
+      dim_names.emplace_back(ext_type->dim_names()[j]);
+    }
+  } else {
+    dim_names = {};
+  }
+
+  std::vector<int64_t> shape;
+  for (int64_t& j : permutation) {
+    shape.emplace_back(ext_type->shape()[j]);
+  }
+
+  std::vector<int64_t> strides;
+  ARROW_CHECK_OK(ComputeStrides(*value_type.get(), shape, permutation, 
&strides));
+
+  // TODO: can this be done without copying?

Review Comment:
   Does this make a copy at the moment?



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -82,6 +82,45 @@ Status ComputeStrides(const FixedWidthType& type, const 
std::vector<int64_t>& sh
 
 }  // namespace
 
+const Result<std::shared_ptr<Tensor>> FixedShapeTensorArray::GetTensor(
+    const int64_t i) const {
+  auto ext_arr = 
internal::checked_pointer_cast<FixedSizeListArray>(this->storage());
+  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  auto value_type =
+      internal::checked_pointer_cast<FixedWidthType>(ext_type->value_type());
+  auto ndim = ext_type->ndim();
+  auto permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    for (int64_t j = 0; j < static_cast<int64_t>(ndim); ++j) {
+      permutation.emplace_back(j);
+    }
+  }
+
+  std::vector<std::string> dim_names;
+  if (!ext_type->dim_names().empty()) {
+    for (auto j : permutation) {
+      dim_names.emplace_back(ext_type->dim_names()[j]);
+    }
+  } else {
+    dim_names = {};
+  }
+
+  std::vector<int64_t> shape;
+  for (int64_t& j : permutation) {
+    shape.emplace_back(ext_type->shape()[j]);
+  }
+
+  std::vector<int64_t> strides;
+  ARROW_CHECK_OK(ComputeStrides(*value_type.get(), shape, permutation, 
&strides));
+
+  // TODO: can this be done without copying?
+  auto list_arr = ext_arr->value_slice(i)->data();

Review Comment:
   This is no longer a list_arr I think, so `list_arr` is a bit confusing name



##########
python/pyarrow/array.pxi:
##########
@@ -3519,16 +3519,32 @@ class FixedShapeTensorArray(ExtensionArray):
     def to_numpy_ndarray(self):
         """
         Convert fixed shape tensor extension array to a numpy array (with 
dim+1).
+        """
+        cdef:
+            CFixedShapeTensorArray* ext_array = 
<CFixedShapeTensorArray*>(self.ap)
+            CResult[shared_ptr[CTensor]] ctensor
+        with nogil:
+            ctensor = ext_array.ToTensor()
+        return pyarrow_wrap_tensor(GetResultValue(ctensor)).to_numpy()

Review Comment:
   I don't remember exactly what ToTensor does by default with regards to 
permutation (assuming it takes it into account), but so this expands the 
behaviour of `to_numpy_ndarray`, I think. In that case, can you expand the 
docstring and add some tests for this? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-37484: [Python] Add a FixedSizeTensorScalar class [arrow]

Reply via email to