paleolimbot commented on PR #33925:
URL: https://github.com/apache/arrow/pull/33925#issuecomment-1413961867
This looks awesome!
A hacked together R implementation (worked great!)
<details>
```
library(R6)
library(rlang)
library(arrow, warn.conflicts = FALSE)
#> Some features are not enabled in this build of Arrow. Run `arrow_info()`
for more information.
FixedShapeTensor <- R6Class(
"FixedShapeTensor",
inherit = ExtensionType,
public = list(
deserialize_instance = function() {
storage <- self$storage_type()
if (!inherits(storage, "FixedSizeListType")) {
stop("storage for a FixedShapeTensor must be a FixedSizeList")
}
parsed <- jsonlite::fromJSON(self$extension_metadata_utf8())
private$shape <- as.integer(parsed$shape)
if (is.null(parsed$shape)) {
stop("Key 'shape' missing in JSON metdata")
}
if (prod(parsed$shape) != storage$list_size) {
stop(
sprintf(
"FixedSizeTensor of shape %s must have storage with fixed sized
list[%s]",
paste(parsed$shape, collapse = ", "),
prod(parsed$shape)
)
)
}
},
as_vector = function(extension_array) {
result <- super$as_vector(extension_array)
# R's matrices are row-major and this extension type specifies
# column-major storage
lapply(result, function(x) {
dim(x) <- rev(private$shape)
x[] <- aperm(x, rev(seq_along(dim(x))))
dim(x) <- private$shape
x
})
}
),
private = list(
shape = NULL
)
)
fixed_shape_tensor <- function(value_type, shape) {
shape <- as.integer(shape)
new_extension_type(
storage_type = fixed_size_list_of(value_type, prod(shape)),
extension_name = "arrow.fixed_shape_tensor",
extension_metadata = jsonlite::toJSON(list(shape = as.integer(shape))),
type_class = FixedShapeTensor
)
}
fixed_shape_tensor_array <- function(x, value_type = NULL) {
shape <- dim(x)
x[] <- aperm(x, rev(seq_along(dim(x))))
dim(x) <- NULL
if (is.null(value_type)) {
value_type <- infer_type(x[integer(0)])
}
storage <- as_arrow_array(
list(x),
type = fixed_size_list_of(value_type, list_size = prod(shape))
)
new_extension_array(
storage,
fixed_shape_tensor(value_type, shape)
)
}
(type <- fixed_shape_tensor(int32(), c(2, 3)))
#> FixedShapeTensor
#> FixedShapeTensor <{"shape":[2,3]}>
(r_matrix <- matrix(1:6, nrow = 2, ncol = 3))
#> [,1] [,2] [,3]
#> [1,] 1 3 5
#> [2,] 2 4 6
(array <- fixed_shape_tensor_array(r_matrix))
#> ExtensionArray
#> <FixedShapeTensor <{"shape":[2,3]}>>
#> [
#> [
#> 1,
#> 3,
#> 5,
#> 2,
#> 4,
#> 6
#> ]
#> ]
array$as_vector()
#> [[1]]
#> [,1] [,2] [,3]
#> [1,] 1 3 5
#> [2,] 2 4 6
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]