This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4723b79d81 GH-48388: [Ruby] Add support for reading map array (#48389)
4723b79d81 is described below
commit 4723b79d812daff41f761e8452f7d936ad165548
Author: Sutou Kouhei <[email protected]>
AuthorDate: Mon Dec 8 10:48:33 2025 +0900
GH-48388: [Ruby] Add support for reading map array (#48389)
### Rationale for this change
It's a list of struct array.
### What changes are included in this PR?
* Add `ArrowFormat::MapType`
* Add `ArrowFormat::MapArray`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48388
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/array.rb | 21 +++++++++++-
ruby/red-arrow-format/lib/arrow-format/error.rb | 5 ++-
ruby/red-arrow-format/lib/arrow-format/field.rb | 7 +++-
.../lib/arrow-format/file-reader.rb | 7 ++--
ruby/red-arrow-format/lib/arrow-format/type.rb | 39 ++++++++++++++++++++--
ruby/red-arrow-format/test/test-file-reader.rb | 25 ++++++++++++++
6 files changed, 96 insertions(+), 8 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index c5b636dc75..ea728ce8ce 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -162,7 +162,7 @@ module ArrowFormat
end
end
- class ListArray < Array
+ class VariableSizeListArray < Array
def initialize(type, size, validity_buffer, offsets_buffer, child)
super(type, size, validity_buffer)
@offsets_buffer = offsets_buffer
@@ -181,6 +181,9 @@ module ArrowFormat
end
end
+ class ListArray < VariableSizeListArray
+ end
+
class StructArray < Array
def initialize(type, size, validity_buffer, children)
super(type, size, validity_buffer)
@@ -197,4 +200,20 @@ module ArrowFormat
apply_validity(values)
end
end
+
+ class MapArray < VariableSizeListArray
+ def to_a
+ super.collect do |entries|
+ if entries.nil?
+ entries
+ else
+ hash = {}
+ entries.each do |key, value|
+ hash[key] = value
+ end
+ hash
+ end
+ end
+ end
+ end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/error.rb
b/ruby/red-arrow-format/lib/arrow-format/error.rb
index 125c0cbcf8..39b0b8af15 100644
--- a/ruby/red-arrow-format/lib/arrow-format/error.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/error.rb
@@ -18,11 +18,14 @@ module ArrowFormat
class Error < StandardError
end
- class ReadError < StandardError
+ class ReadError < Error
attr_reader :buffer
def initialize(buffer, message)
@buffer = buffer
super("#{message}: #{@buffer}")
end
end
+
+ class TypeError < Error
+ end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/field.rb
b/ruby/red-arrow-format/lib/arrow-format/field.rb
index a5e146bcbd..ac531750f7 100644
--- a/ruby/red-arrow-format/lib/arrow-format/field.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/field.rb
@@ -18,9 +18,14 @@ module ArrowFormat
class Field
attr_reader :name
attr_reader :type
- def initialize(name, type)
+ def initialize(name, type, nullable)
@name = name
@type = type
+ @nullable = nullable
+ end
+
+ def nullable?
+ @nullable
end
end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 79ea86cd33..3b2dc22823 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -29,6 +29,7 @@ require_relative "org/apache/arrow/flatbuf/footer"
require_relative "org/apache/arrow/flatbuf/int"
require_relative "org/apache/arrow/flatbuf/large_binary"
require_relative "org/apache/arrow/flatbuf/list"
+require_relative "org/apache/arrow/flatbuf/map"
require_relative "org/apache/arrow/flatbuf/message"
require_relative "org/apache/arrow/flatbuf/null"
require_relative "org/apache/arrow/flatbuf/precision"
@@ -163,6 +164,8 @@ module ArrowFormat
when Org::Apache::Arrow::Flatbuf::Struct
children = fb_field.children.collect {|child| read_field(child)}
type = StructType.new(children)
+ when Org::Apache::Arrow::Flatbuf::Map
+ type = MapType.new(read_field(fb_field.children[0]))
when Org::Apache::Arrow::Flatbuf::Binary
type = BinaryType.singleton
when Org::Apache::Arrow::Flatbuf::LargeBinary
@@ -170,7 +173,7 @@ module ArrowFormat
when Org::Apache::Arrow::Flatbuf::Utf8
type = UTF8Type.singleton
end
- Field.new(fb_field.name, type)
+ Field.new(fb_field.name, type, fb_field.nullable?)
end
def read_schema(fb_schema)
@@ -199,7 +202,7 @@ module ArrowFormat
values_buffer = buffers.shift
values = body.slice(values_buffer.offset, values_buffer.length)
field.type.build_array(length, validity, values)
- when ListType
+ when VariableSizeListType
offsets_buffer = buffers.shift
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
child = read_column(field.type.child, nodes, buffers, body)
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 179a89ff4c..7726c23325 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -195,13 +195,20 @@ module ArrowFormat
end
end
- class ListType < Type
+ class VariableSizeListType < Type
attr_reader :child
- def initialize(child)
- super("List")
+ def initialize(name, child)
+ super(name)
@child = child
end
+ end
+
+ class ListType < VariableSizeListType
+ def initialize(child)
+ super("List", child)
+ end
+
def build_array(size, validity_buffer, offsets_buffer, child)
ListArray.new(self, size, validity_buffer, offsets_buffer, child)
end
@@ -218,4 +225,30 @@ module ArrowFormat
StructArray.new(self, size, validity_buffer, children)
end
end
+
+ class MapType < VariableSizeListType
+ def initialize(child)
+ if child.nullable?
+ raise TypeError.new("Map entry field must not be nullable: " +
+ child.inspect)
+ end
+ type = child.type
+ unless type.is_a?(StructType)
+ raise TypeError.new("Map entry type must be struct: #{type.inspect}")
+ end
+ unless type.children.size == 2
+ raise TypeError.new("Map entry struct type must have 2 children: " +
+ type.inspect)
+ end
+ if type.children[0].nullable?
+ raise TypeError.new("Map key field must not be nullable: " +
+ type.children[0].inspect)
+ end
+ super("Map", child)
+ end
+
+ def build_array(size, validity_buffer, offsets_buffer, child)
+ MapArray.new(self, size, validity_buffer, offsets_buffer, child)
+ end
+ end
end
diff --git a/ruby/red-arrow-format/test/test-file-reader.rb
b/ruby/red-arrow-format/test/test-file-reader.rb
index 48c20d1f3f..9748ede47d 100644
--- a/ruby/red-arrow-format/test/test-file-reader.rb
+++ b/ruby/red-arrow-format/test/test-file-reader.rb
@@ -160,4 +160,29 @@ class TestFileReader < Test::Unit::TestCase
read)
end
end
+
+ sub_test_case("Map") do
+ def build_array
+ data_type = Arrow::MapDataType.new(:string, :int8)
+ Arrow::MapArray.new(data_type,
+ [
+ {"a" => -128, "b" => 127},
+ nil,
+ {"c" => nil},
+ ])
+ end
+
+ def test_read
+ assert_equal([
+ {
+ "value" => [
+ {"a" => -128, "b" => 127},
+ nil,
+ {"c" => nil},
+ ],
+ },
+ ],
+ read)
+ end
+ end
end