This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fdcc6472bd GH-48537: [Ruby] Add support for reading fixed size binary
array (#48538)
fdcc6472bd is described below
commit fdcc6472bd94626a423f192d8fa067c59eacdc70
Author: Sutou Kouhei <[email protected]>
AuthorDate: Tue Dec 16 09:30:47 2025 +0900
GH-48537: [Ruby] Add support for reading fixed size binary array (#48538)
### Rationale for this change
It's a fixed size variant of binary array.
### What changes are included in this PR?
* Add `ArrowFormat::FixedSizeBinaryType`
* Add `ArrowFormat::FixedSizeBinaryArray`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48537
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/array.rb | 15 +++++++++++++++
ruby/red-arrow-format/lib/arrow-format/file-reader.rb | 18 ++++++++++++------
ruby/red-arrow-format/lib/arrow-format/type.rb | 13 ++++++++++++-
ruby/red-arrow-format/test/test-file-reader.rb | 12 ++++++++++++
4 files changed, 51 insertions(+), 7 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 4788df341a..23969bc24b 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -235,6 +235,21 @@ module ArrowFormat
end
end
+ class FixedSizeBinaryArray < Array
+ def initialize(type, size, validity_buffer, values_buffer)
+ super(type, size, validity_buffer)
+ @values_buffer = values_buffer
+ end
+
+ def to_a
+ byte_width = @type.byte_width
+ values = 0.step(@size * byte_width - 1, byte_width).collect do |offset|
+ @values_buffer.get_string(offset, byte_width)
+ end
+ apply_validity(values)
+ end
+ end
+
class VariableSizeListArray < Array
def initialize(type, size, validity_buffer, offsets_buffer, child)
super(type, size, validity_buffer)
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index fb1c9fb8a8..8b149cd175 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -232,6 +232,8 @@ module ArrowFormat
type = LargeBinaryType.singleton
when Org::Apache::Arrow::Flatbuf::Utf8
type = UTF8Type.singleton
+ when Org::Apache::Arrow::Flatbuf::FixedSizeBinary
+ type = FixedSizeBinaryType.new(fb_type.byte_width)
end
Field.new(fb_field.name, type, fb_field.nullable?)
end
@@ -263,6 +265,16 @@ module ArrowFormat
values_buffer = buffers.shift
values = body.slice(values_buffer.offset, values_buffer.length)
field.type.build_array(length, validity, values)
+ when VariableSizeBinaryType
+ offsets_buffer = buffers.shift
+ values_buffer = buffers.shift
+ offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
+ values = body.slice(values_buffer.offset, values_buffer.length)
+ field.type.build_array(length, validity, offsets, values)
+ when FixedSizeBinaryType
+ values_buffer = buffers.shift
+ values = body.slice(values_buffer.offset, values_buffer.length)
+ field.type.build_array(length, validity, values)
when VariableSizeListType
offsets_buffer = buffers.shift
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
@@ -289,12 +301,6 @@ module ArrowFormat
read_column(child, nodes, buffers, body)
end
field.type.build_array(length, types, children)
- when VariableSizeBinaryType
- offsets_buffer = buffers.shift
- values_buffer = buffers.shift
- offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
- values = body.slice(values_buffer.offset, values_buffer.length)
- field.type.build_array(length, validity, offsets, values)
end
end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 87d85f3419..055c0890c4 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -344,7 +344,6 @@ module ArrowFormat
end
end
- attr_reader :name
def initialize
super("UTF8")
end
@@ -354,6 +353,18 @@ module ArrowFormat
end
end
+ class FixedSizeBinaryType < Type
+ attr_reader :byte_width
+ def initialize(byte_width)
+ super("FixedSizeBinary")
+ @byte_width = byte_width
+ end
+
+ def build_array(size, validity_buffer, values_buffer)
+ FixedSizeBinaryArray.new(self, size, validity_buffer, values_buffer)
+ end
+ end
+
class VariableSizeListType < Type
attr_reader :child
def initialize(name, child)
diff --git a/ruby/red-arrow-format/test/test-file-reader.rb
b/ruby/red-arrow-format/test/test-file-reader.rb
index fb715ea2c4..b31e894045 100644
--- a/ruby/red-arrow-format/test/test-file-reader.rb
+++ b/ruby/red-arrow-format/test/test-file-reader.rb
@@ -358,6 +358,18 @@ class TestFileReader < Test::Unit::TestCase
end
end
+ sub_test_case("FixedSizeBinary") do
+ def build_array
+ data_type = Arrow::FixedSizeBinaryDataType.new(4)
+ Arrow::FixedSizeBinaryArray.new(data_type, ["0124".b, nil, "abcd".b])
+ end
+
+ def test_read
+ assert_equal([{"value" => ["0124".b, nil, "abcd".b]}],
+ read)
+ end
+ end
+
sub_test_case("List") do
def build_array
data_type = Arrow::ListDataType.new(name: "count", type: :int8)