This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c5b3622b46 GH-48910: [Ruby] Add support for writing int8/uint8 arrays
(#48911)
c5b3622b46 is described below
commit c5b3622b46c8520ee3822fb10014523f892fed8b
Author: Sutou Kouhei <[email protected]>
AuthorDate: Wed Jan 21 10:19:18 2026 +0900
GH-48910: [Ruby] Add support for writing int8/uint8 arrays (#48911)
### Rationale for this change
They are 8 bit integer array variants.
### What changes are included in this PR?
* Rename `#to_flat_buffers` to `#to_flatbuffers`
* Add `ArrowFormat::Int8Type#to_flatbuffers`
* Add `ArrowFormat::UInt8Type#to_flatbuffers`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48910
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/field.rb | 8 +++---
.../lib/arrow-format/file-writer.rb | 2 +-
.../lib/arrow-format/record-batch.rb | 2 +-
ruby/red-arrow-format/lib/arrow-format/schema.rb | 4 +--
.../lib/arrow-format/streaming-writer.rb | 4 +--
ruby/red-arrow-format/lib/arrow-format/type.rb | 25 ++++++++++++++---
ruby/red-arrow-format/test/test-writer.rb | 32 +++++++++++++++++++++-
7 files changed, 62 insertions(+), 15 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/field.rb
b/ruby/red-arrow-format/lib/arrow-format/field.rb
index f2c9181c3d..fc5639bb66 100644
--- a/ruby/red-arrow-format/lib/arrow-format/field.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/field.rb
@@ -30,12 +30,12 @@ module ArrowFormat
@nullable
end
- def to_flat_buffers
+ def to_flatbuffers
fb_field = FB::Field::Data.new
fb_field.name = @name
fb_field.nullable = @nullable
if @type.is_a?(DictionaryType)
- fb_field.type = @type.value_type.to_flat_buffers
+ fb_field.type = @type.value_type.to_flatbuffers
dictionary_encoding = FB::DictionaryEncoding::Data.new
dictionary_encoding.id = @dictionary_id
int = FB::Int::Data.new
@@ -47,10 +47,10 @@ module ArrowFormat
FB::DictionaryKind::DENSE_ARRAY
fb_field.dictionary = dictionary
else
- fb_field.type = @type.to_flat_buffers
+ fb_field.type = @type.to_flatbuffers
end
if @type.respond_to?(:children)
- fb_field.children = @type.children.collect(&:to_flat_buffers)
+ fb_field.children = @type.children.collect(&:to_flatbuffers)
end
# fb_field.custom_metadata = @custom_metadata
fb_field
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
b/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
index b33de02fe0..8509be59b6 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-writer.rb
@@ -23,7 +23,7 @@ module ArrowFormat
MAGIC_PADDING = "\x00\x00"
def start(schema)
- @fb_schema = schema.to_flat_buffers
+ @fb_schema = schema.to_flatbuffers
write_data(MAGIC)
write_data(MAGIC_PADDING)
super
diff --git a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
index 51e0583f0e..cf925eebdf 100644
--- a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
@@ -37,7 +37,7 @@ module ArrowFormat
hash
end
- def to_flat_buffers
+ def to_flatbuffers
fb_record_batch = FB::RecordBatch::Data.new
fb_record_batch.length = @n_rows
fb_record_batch.nodes = all_columns_enumerator.collect do |array|
diff --git a/ruby/red-arrow-format/lib/arrow-format/schema.rb
b/ruby/red-arrow-format/lib/arrow-format/schema.rb
index 841f60afad..aba175a39c 100644
--- a/ruby/red-arrow-format/lib/arrow-format/schema.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/schema.rb
@@ -21,10 +21,10 @@ module ArrowFormat
@fields = fields
end
- def to_flat_buffers
+ def to_flatbuffers
fb_schema = FB::Schema::Data.new
fb_schema.endianness = FB::Endianness::LITTLE
- fb_schema.fields = fields.collect(&:to_flat_buffers)
+ fb_schema.fields = fields.collect(&:to_flatbuffers)
# fb_schema.custom_metadata = @custom_metadata
# fb_schema.features = @features
fb_schema
diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
index a9e323b675..313c1b38ad 100644
--- a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
@@ -33,7 +33,7 @@ module ArrowFormat
end
def start(schema)
- write_message(build_metadata(schema.to_flat_buffers))
+ write_message(build_metadata(schema.to_flatbuffers))
# TODO: Write dictionaries
end
@@ -42,7 +42,7 @@ module ArrowFormat
record_batch.all_buffers_enumerator.each do |buffer|
body_length += aligned_buffer_size(buffer) if buffer
end
- metadata = build_metadata(record_batch.to_flat_buffers, body_length)
+ metadata = build_metadata(record_batch.to_flatbuffers, body_length)
fb_block = FB::Block::Data.new
fb_block.offset = @offset
fb_block.meta_data_length =
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index b6db79327d..109d944254 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -33,12 +33,15 @@ module ArrowFormat
NullArray.new(self, size)
end
- def to_flat_buffers
+ def to_flatbuffers
FB::Null::Data.new
end
end
- class BooleanType < Type
+ class PrimitiveType < Type
+ end
+
+ class BooleanType < PrimitiveType
class << self
def singleton
@singleton ||= new
@@ -53,12 +56,12 @@ module ArrowFormat
BooleanArray.new(self, size, validity_buffer, values_buffer)
end
- def to_flat_buffers
+ def to_flatbuffers
FB::Bool::Data.new
end
end
- class NumberType < Type
+ class NumberType < PrimitiveType
end
class IntType < NumberType
@@ -96,6 +99,13 @@ module ArrowFormat
def build_array(size, validity_buffer, values_buffer)
Int8Array.new(self, size, validity_buffer, values_buffer)
end
+
+ def to_flatbuffers
+ fb_type = FB::Int::Data.new
+ fb_type.bit_width = 8
+ fb_type.signed = true
+ fb_type
+ end
end
class UInt8Type < IntType
@@ -120,6 +130,13 @@ module ArrowFormat
def build_array(size, validity_buffer, values_buffer)
UInt8Array.new(self, size, validity_buffer, values_buffer)
end
+
+ def to_flatbuffers
+ fb_type = FB::Int::Data.new
+ fb_type.bit_width = 8
+ fb_type.signed = false
+ fb_type
+ end
end
class Int16Type < IntType
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index 82b27301e7..f2313e64a7 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -22,6 +22,12 @@ module WriterTests
ArrowFormat::NullType.singleton
when Arrow::BooleanDataType
ArrowFormat::BooleanType.singleton
+ when Arrow::Int8DataType
+ ArrowFormat::Int8Type.singleton
+ when Arrow::UInt8DataType
+ ArrowFormat::UInt8Type.singleton
+ else
+ raise "Unsupported type: #{red_arrow_type.inspect}"
end
end
@@ -35,10 +41,12 @@ module WriterTests
case type
when ArrowFormat::NullType
type.build_array(red_arrow_array.size)
- when ArrowFormat::BooleanType
+ when ArrowFormat::PrimitiveType
type.build_array(red_arrow_array.size,
convert_buffer(red_arrow_array.null_bitmap),
convert_buffer(red_arrow_array.data_buffer))
+ else
+ raise "Unsupported array #{red_arrow_array.inspect}"
end
end
@@ -66,6 +74,28 @@ module WriterTests
@values)
end
end
+
+ sub_test_case("Int8") do
+ def build_array
+ Arrow::Int8Array.new([-128, nil, 127])
+ end
+
+ def test_write
+ assert_equal([-128, nil, 127],
+ @values)
+ end
+ end
+
+ sub_test_case("UInt8") do
+ def build_array
+ Arrow::UInt8Array.new([0, nil, 255])
+ end
+
+ def test_write
+ assert_equal([0, nil, 255],
+ @values)
+ end
+ end
end
end
end