kou commented on code in PR #14381:
URL: https://github.com/apache/arrow/pull/14381#discussion_r993996967
##########
ruby/red-arrow/test/test-tensor.rb:
##########
@@ -16,6 +16,103 @@
# under the License.
class TensorTest < Test::Unit::TestCase
+ sub_test_case("class methods") do
+ sub_test_case(".new") do
+ def setup
+ @raw_tensor = [
+ [
+ [1, 2, 3, 4],
+ [5, 6, 7, 8],
+ ],
+ [
+ [9, 10, 11, 12],
+ [13, 14, 15, 16],
+ ],
+ [
+ [17, 18, 19, 20],
+ [21, 22, 23, 24],
+ ],
+ ]
+ @shape = [3, 2, 4]
+ @strides = [8, 4, 1]
+ end
+
+ test("Array") do
+ tensor = Arrow::Tensor.new(@raw_tensor)
+ assert_equal({
+ value_data_type: Arrow::UInt8DataType.new,
+ buffer: @raw_tensor.flatten.pack("C*"),
+ shape: @shape,
+ strides: @strides,
+ dimension_names: ["", "", ""],
+ },
+ {
+ value_data_type: tensor.value_data_type,
+ buffer: tensor.buffer.data.to_s,
+ shape: tensor.shape,
+ strides: tensor.strides,
+ dimension_names: tensor.dimension_names,
+ })
+ end
+
+ test("Array, data_type: Symbol") do
+ tensor = Arrow::Tensor.new(@raw_tensor, data_type: :int32)
+ assert_equal({
+ value_data_type: Arrow::Int32DataType.new,
+ buffer: @raw_tensor.flatten.pack("l*"),
+ shape: @shape,
+ strides: @strides.collect {|x| x * 4},
+ dimension_names: ["", "", ""],
+ },
+ {
+ value_data_type: tensor.value_data_type,
+ buffer: tensor.buffer.data.to_s,
+ shape: tensor.shape,
+ strides: tensor.strides,
+ dimension_names: tensor.dimension_names,
+ })
+ end
+
+ test("Array, dimension_names: Array<String>") do
+ tensor = Arrow::Tensor.new(@raw_tensor,
+ dimension_names: ["a", "b", "c"])
+ assert_equal({
+ value_data_type: Arrow::UInt8DataType.new,
+ buffer: @raw_tensor.flatten.pack("C*"),
+ shape: @shape,
+ strides: @strides,
+ dimension_names: ["a", "b", "c"],
+ },
+ {
+ value_data_type: tensor.value_data_type,
+ buffer: tensor.buffer.data.to_s,
+ shape: tensor.shape,
+ strides: tensor.strides,
+ dimension_names: tensor.dimension_names,
+ })
+ end
+
+ test("Array, dimension_names: Array<Symbol>") do
+ tensor = Arrow::Tensor.new(@raw_tensor,
+ dimension_names: [:a, :b, :c])
+ assert_equal({
+ value_data_type: Arrow::UInt8DataType.new,
+ buffer: @raw_tensor.flatten.pack("C*"),
+ shape: @shape,
+ strides: @strides,
+ dimension_names: ["a", "b", "c"],
+ },
+ {
+ value_data_type: tensor.value_data_type,
+ buffer: tensor.buffer.data.to_s,
+ shape: tensor.shape,
+ strides: tensor.strides,
+ dimension_names: tensor.dimension_names,
+ })
+ end
Review Comment:
Ah, I forgot to add it. `strides` is for passing `Arrow::Buffer` (not
`Array`) as the first argument.
I'll add some tests for `Arrow::Buffer`.
##########
ruby/red-arrow/lib/arrow/tensor.rb:
##########
@@ -15,8 +15,140 @@
# specific language governing permissions and limitations
# under the License.
+require_relative "raw-tensor-converter"
+
module Arrow
class Tensor
+ alias_method :initialize_raw, :initialize
+ # Creates a new {Arrow::Tensor}.
+ #
+ # @overload initialize(raw_tensor, data_type: nil, shape: nil, strides:
nil, dimension_names: nil)
+ #
+ # @param raw_tensor [::Array<Numeric>] The tensor represented by
+ # raw `Array` (not `Arrow::Array`) and `Numeric`s. You can
+ # nested `Array` for multi-dimensions array.
+ #
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
+ # ::Array<Symbol>, Hash, nil] The element data type of the tensor.
+ #
+ # If you specify `nil`, data type is guessed from `raw_tensor`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type.
+ #
+ # @param shape [::Array<Integer>, nil] The array of dimension sizes.
+ #
+ # If you specify `nil`, shape is guessed from `raw_tensor`.
+ #
+ # @param strides [::Array<Integer>, nil] The array of the number of
+ # bytes in each dimension.
Review Comment:
> Not really, these are the increments in bytes.
This is a problem of my English skill...
@mrkn Could you provide me a description about `strides` for this document?
> But I'm surprised you can pass `strides` here. I would expect this method
to simply always create a contiguous tensor.
You're right. I added the `strides` keyword argument for a case that
`Arrow::Buffer` as the `raw_tensor`. But I forgot to describe `Arrow::Buffer`
in document and add tests for `Arrow::Buffer` case. I'll add them.
##########
ruby/red-arrow/lib/arrow/tensor.rb:
##########
@@ -15,8 +15,140 @@
# specific language governing permissions and limitations
# under the License.
+require_relative "raw-tensor-converter"
+
module Arrow
class Tensor
+ alias_method :initialize_raw, :initialize
+ # Creates a new {Arrow::Tensor}.
+ #
+ # @overload initialize(raw_tensor, data_type: nil, shape: nil, strides:
nil, dimension_names: nil)
+ #
+ # @param raw_tensor [::Array<Numeric>] The tensor represented by
+ # raw `Array` (not `Arrow::Array`) and `Numeric`s. You can
+ # nested `Array` for multi-dimensions array.
+ #
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
+ # ::Array<Symbol>, Hash, nil] The element data type of the tensor.
+ #
+ # If you specify `nil`, data type is guessed from `raw_tensor`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type.
+ #
+ # @param shape [::Array<Integer>, nil] The array of dimension sizes.
+ #
+ # If you specify `nil`, shape is guessed from `raw_tensor`.
+ #
+ # @param strides [::Array<Integer>, nil] The array of the number of
+ # bytes in each dimension.
+ #
+ # If you specify `nil` or an empty `Array`, strides are
+ # guessed from `raw_tensor`.
+ #
+ # @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
+ # The array of the dimension names.
+ #
+ # If you specify `nil`, all dimensions doesn't have their names.
+ #
+ # @example Create a tensor from Ruby's Array
+ # raw_tensor = [
+ # [
+ # [1, 2, 3, 4],
+ # [5, 6, 7, 8],
+ # ],
+ # [
+ # [9, 10, 11, 12],
+ # [13, 14, 15, 16],
+ # ],
+ # [
+ # [17, 18, 19, 20],
+ # [21, 22, 23, 24],
+ # ],
+ # ]
+ # Arrow::Tensor.new(raw_tensor)
+ #
+ # @since 10.0.0
+ #
+ # @overload initialize(data_type, data, shape, strides, dimension_names)
+ #
+ # @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
+ # ::Array<Symbol>, Hash] The element data type of the tensor.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type.
+ #
+ # @param data [Arrow::Buffer] The data of the tensor.
+ #
+ # @param shape [::Array<Integer>] The array of dimension sizes.
+ #
+ # @param strides [::Array<Integer>, nil] The array of the number of
+ # bytes in each dimension.
+ #
+ # If you specify `nil` or an empty `Array`, strides are
+ # guessed from `data_type` and `data`.
+ #
+ # @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
+ # The array of the dimension names.
+ #
+ # If you specify `nil`, all dimensions doesn't have their names.
+ #
+ # @example Create a table from Arrow::Buffer
+ # raw_data = [
+ # 1, 2,
+ # 3, 4,
+ #
+ # 5, 6,
+ # 7, 8,
+ #
+ # 9, 10,
+ # 11, 12,
+ # ]
+ # data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
+ # shape = [3, 2, 2]
+ # strides = []
+ # names = ["a", "b", "c"]
+ # Arrow::Tensor.new(:int8, data, shape, strides, names)
+ def initialize(*args,
+ data_type: nil,
+ data: nil,
+ shape: nil,
+ strides: nil,
+ dimension_names: nil)
+ n_args = args.size
+ case n_args
+ when 1
+ converter = RawTensorConverter.new(args[0],
+ data_type: data_type,
+ shape: shape,
+ strides: strides,
+ dimension_names: dimension_names)
+ data_type = converter.data_type
+ data = converter.data
+ shape = converter.shape
+ strides = converter.strides
+ dimension_names = converter.dimension_names
+ when 5
Review Comment:
What does the "this" mean? Dispatching by the number of positional arguments?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]