This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6a37b9c0d5 GH-48346: [Ruby] Add support for reading boolean array
(#48348)
6a37b9c0d5 is described below
commit 6a37b9c0d51bcc7a224efcf8a39fda521ca14688
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Dec 5 15:04:18 2025 +0900
GH-48346: [Ruby] Add support for reading boolean array (#48348)
### Rationale for this change
This is a primitive type but we need to handle bitmap for boolean values.
### What changes are included in this PR?
* Add `ArrowFormat::BooleanType`
* Add `ArrowFormat::BooleanArray`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48346
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/array.rb | 32 ++++++++++------
ruby/red-arrow-format/lib/arrow-format/bitmap.rb | 44 ++++++++++++++++++++++
.../lib/arrow-format/file-reader.rb | 6 ++-
ruby/red-arrow-format/lib/arrow-format/type.rb | 16 ++++++++
ruby/red-arrow-format/test/test-file-reader.rb | 11 ++++++
5 files changed, 96 insertions(+), 13 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index bf54cf8759..6d164cc0b5 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -14,6 +14,8 @@
# specific language governing permissions and limitations
# under the License.
+require_relative "bitmap"
+
module ArrowFormat
class Array
attr_reader :type
@@ -37,18 +39,9 @@ module ArrowFormat
private
def apply_validity(array)
return array if @validity_buffer.nil?
- n_bytes = @size / 8
- @validity_buffer.each(:U8, 0, n_bytes) do |offset, value|
- 7.times do |i|
- array[offset * 8 + i] = nil if (value & (1 << (i % 8))).zero?
- end
- end
- remained_bits = @size % 8
- unless remained_bits.zero?
- value = @validity_buffer.get_value(:U8, n_bytes)
- remained_bits.times do |i|
- array[n_bytes * 8 + i] = nil if (value & (1 << (i % 8))).zero?
- end
+ @validity_bitmap ||= Bitmap.new(@validity_buffer, @size)
+ @validity_bitmap.each_with_index do |bit, i|
+ array[i] = nil if bit.zero?
end
array
end
@@ -64,6 +57,21 @@ module ArrowFormat
end
end
+ class BooleanArray < Array
+ def initialize(type, size, validity_buffer, values_buffer)
+ super(type, size, validity_buffer)
+ @values_buffer = values_buffer
+ end
+
+ def to_a
+ @values_bitmap ||= Bitmap.new(@values_buffer, @size)
+ values = @values_bitmap.each.collect do |bit|
+ not bit.zero?
+ end
+ apply_validity(values)
+ end
+ end
+
class IntArray < Array
def initialize(type, size, validity_buffer, values_buffer)
super(type, size, validity_buffer)
diff --git a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
new file mode 100644
index 0000000000..6f5b7ea801
--- /dev/null
+++ b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
@@ -0,0 +1,44 @@
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFormat
+ class Bitmap
+ include Enumerable
+
+ def initialize(buffer, n_values)
+ @buffer = buffer
+ @n_values = n_values
+ end
+
+ def each
+ return to_enum(__method__) unless block_given?
+
+ n_bytes = @n_values / 8
+ @buffer.each(:U8, 0, n_bytes) do |offset, value|
+ 7.times do |i|
+ yield(value & (1 << (i % 8)))
+ end
+ end
+ remained_bits = @n_values % 8
+ unless remained_bits.zero?
+ value = @buffer.get_value(:U8, n_bytes)
+ remained_bits.times do |i|
+ yield(value & (1 << (i % 8)))
+ end
+ end
+ end
+ end
+end
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 408343d7aa..733140a10b 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -22,6 +22,7 @@ require_relative "record-batch"
require_relative "schema"
require_relative "type"
+require_relative "org/apache/arrow/flatbuf/bool"
require_relative "org/apache/arrow/flatbuf/footer"
require_relative "org/apache/arrow/flatbuf/message"
require_relative "org/apache/arrow/flatbuf/binary"
@@ -134,6 +135,8 @@ module ArrowFormat
case fb_type
when Org::Apache::Arrow::Flatbuf::Null
type = NullType.singleton
+ when Org::Apache::Arrow::Flatbuf::Bool
+ type = BooleanType.singleton
when Org::Apache::Arrow::Flatbuf::Int
case fb_type.bit_width
when 8
@@ -164,7 +167,8 @@ module ArrowFormat
end
case field.type
- when Int8Type,
+ when BooleanType,
+ Int8Type,
UInt8Type
values_buffer = buffers.shift
values = body.slice(values_buffer.offset, values_buffer.length)
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index aa36fa2bae..22a246aeab 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -38,6 +38,22 @@ module ArrowFormat
end
end
+ class BooleanType < Type
+ class << self
+ def singleton
+ @singleton ||= new
+ end
+ end
+
+ def initialize
+ super("Boolean")
+ end
+
+ def build_array(size, validity_buffer, values_buffer)
+ BooleanArray.new(self, size, validity_buffer, values_buffer)
+ end
+ end
+
class IntType < Type
attr_reader :bit_width
attr_reader :signed
diff --git a/ruby/red-arrow-format/test/test-file-reader.rb
b/ruby/red-arrow-format/test/test-file-reader.rb
index 67dba14cb6..0029a57887 100644
--- a/ruby/red-arrow-format/test/test-file-reader.rb
+++ b/ruby/red-arrow-format/test/test-file-reader.rb
@@ -51,6 +51,17 @@ class TestFileReader < Test::Unit::TestCase
end
end
+ sub_test_case("Boolean") do
+ def build_array
+ Arrow::BooleanArray.new([true, nil, false])
+ end
+
+ def test_read
+ assert_equal([{"value" => [true, nil, false]}],
+ read)
+ end
+ end
+
sub_test_case("Int8") do
def build_array
Arrow::Int8Array.new([-128, nil, 127])