This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new dbca9584c0 GH-49096: [Ruby] Add support for writing struct array
(#49097)
dbca9584c0 is described below
commit dbca9584c0d3b1c0df6abf7259dc2e62f612d6af
Author: Sutou Kouhei <[email protected]>
AuthorDate: Mon Feb 2 20:35:00 2026 +0900
GH-49096: [Ruby] Add support for writing struct array (#49097)
### Rationale for this change
It's a nested array.
### What changes are included in this PR?
* Add `ArrowFormat::StructType#to_flatbuffers`
* Add `ArrowFormat::StructArray#each_buffer`
* Add `ArrowFormat::StructArray#children`
* Fix `ArrowFormat::Array#n_nulls`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #49096
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/array.rb | 22 ++++++++++-------
ruby/red-arrow-format/lib/arrow-format/bitmap.rb | 4 ++--
ruby/red-arrow-format/lib/arrow-format/type.rb | 4 ++++
ruby/red-arrow-format/test/test-writer.rb | 30 ++++++++++++++++++++++++
4 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index df1356c614..8c0620cdfb 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -23,6 +23,7 @@ module ArrowFormat
attr_reader :type
attr_reader :size
alias_method :length, :size
+ attr_reader :validity_buffer
def initialize(type, size, validity_buffer)
@type = type
@size = size
@@ -31,7 +32,7 @@ module ArrowFormat
def valid?(i)
return true if @validity_buffer.nil?
- validity_bitmap[i] == 1
+ validity_bitmap[i]
end
def null?(i)
@@ -43,8 +44,8 @@ module ArrowFormat
0
else
# TODO: popcount
- validity_bitmap.count do |bit|
- bit == 1
+ validity_bitmap.count do |is_valid|
+ not is_valid
end
end
end
@@ -56,8 +57,8 @@ module ArrowFormat
def apply_validity(array)
return array if @validity_buffer.nil?
- validity_bitmap.each_with_index do |bit, i|
- array[i] = nil if bit.zero?
+ validity_bitmap.each_with_index do |is_valid, i|
+ array[i] = nil unless is_valid
end
array
end
@@ -94,9 +95,7 @@ module ArrowFormat
class BooleanArray < PrimitiveArray
def to_a
@values_bitmap ||= Bitmap.new(@values_buffer, @size)
- values = @values_bitmap.each.collect do |bit|
- not bit.zero?
- end
+ values = @values_bitmap.to_a
apply_validity(values)
end
end
@@ -411,11 +410,18 @@ module ArrowFormat
end
class StructArray < Array
+ attr_reader :children
def initialize(type, size, validity_buffer, children)
super(type, size, validity_buffer)
@children = children
end
+ def each_buffer(&block)
+ return to_enum(__method__) unless block_given?
+
+ yield(@validity_buffer)
+ end
+
def to_a
if @children.empty?
values = [[]] * @size
diff --git a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
index 5cff7e63d2..0cd517a37f 100644
--- a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
@@ -33,14 +33,14 @@ module ArrowFormat
n_bytes = @n_values / 8
@buffer.each(:U8, 0, n_bytes) do |offset, value|
7.times do |i|
- yield(value & (1 << (i % 8)))
+ yield((value & (1 << (i % 8))) > 0)
end
end
remained_bits = @n_values % 8
unless remained_bits.zero?
value = @buffer.get_value(:U8, n_bytes)
remained_bits.times do |i|
- yield(value & (1 << (i % 8)))
+ yield((value & (1 << (i % 8))) > 0)
end
end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 50c392f270..5be6a506d7 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -751,6 +751,10 @@ module ArrowFormat
def build_array(size, validity_buffer, children)
StructArray.new(self, size, validity_buffer, children)
end
+
+ def to_flatbuffers
+ FB::Struct::Data.new
+ end
end
class MapType < VariableSizeListType
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index bf05f20e4e..17c0b9ede1 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -87,6 +87,11 @@ module WriterTests
ArrowFormat::ListType.new(convert_field(red_arrow_type.field))
when Arrow::LargeListDataType
ArrowFormat::LargeListType.new(convert_field(red_arrow_type.field))
+ when Arrow::StructDataType
+ fields = red_arrow_type.fields.collect do |field|
+ convert_field(field)
+ end
+ ArrowFormat::StructType.new(fields)
else
raise "Unsupported type: #{red_arrow_type.inspect}"
end
@@ -127,6 +132,13 @@ module WriterTests
convert_buffer(red_arrow_array.null_bitmap),
convert_buffer(red_arrow_array.value_offsets_buffer),
convert_array(red_arrow_array.values_raw))
+ when ArrowFormat::StructType
+ children = red_arrow_array.fields.collect do |red_arrow_field|
+ convert_array(red_arrow_field)
+ end
+ type.build_array(red_arrow_array.size,
+ convert_buffer(red_arrow_array.null_bitmap),
+ children)
else
raise "Unsupported array #{red_arrow_array.inspect}"
end
@@ -748,6 +760,24 @@ module WriterTests
@values)
end
end
+
+ sub_test_case("Struct") do
+ def build_array
+ data_type = Arrow::StructDataType.new(count: :int8,
+ visible: :boolean)
+ Arrow::StructArray.new(data_type,
+ [[-128, nil], nil, [nil, true]])
+ end
+
+ def test_write
+ assert_equal([
+ {"count" => -128, "visible" => nil},
+ nil,
+ {"count" => nil, "visible" => true},
+ ],
+ @values)
+ end
+ end
end
end
end