This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new bc48921816 GH-49208: [Ruby] Add support for writing dictionary delta 
message (#49209)
bc48921816 is described below

commit bc489218164218387c9728b89497c80d41eb1c00
Author: Sutou Kouhei <[email protected]>
AuthorDate: Wed Feb 11 09:07:38 2026 +0900

    GH-49208: [Ruby] Add support for writing dictionary delta message (#49209)
    
    ### Rationale for this change
    
    This focuses on implementing base dictionary delta message support 
mechanism. So this adds support for only UTF-8 array as dictionary. Other 
arrays will be supported in follow-up tasks.
    
    ### What changes are included in this PR?
    
    * Add support for `ArrowFromat#slice` (But it's not completed. It just 
works partially.)
    * If the second record batch includes an updated dictionary (new entries 
are appended), these appended entries are sliced and they are only written as 
delta.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #49208
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ruby/red-arrow-format/lib/arrow-format/array.rb    |  310 +++--
 ruby/red-arrow-format/lib/arrow-format/bitmap.rb   |   12 +-
 .../lib/arrow-format/streaming-writer.rb           |    3 +-
 ruby/red-arrow-format/lib/arrow-format/type.rb     |   88 ++
 ruby/red-arrow-format/test/test-writer.rb          | 1325 +++++++++-----------
 5 files changed, 926 insertions(+), 812 deletions(-)

diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb 
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 73e87cf721..87dbd0e0d6 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -23,11 +23,20 @@ module ArrowFormat
     attr_reader :type
     attr_reader :size
     alias_method :length, :size
+    attr_reader :offset
     attr_reader :validity_buffer
     def initialize(type, size, validity_buffer)
       @type = type
       @size = size
+      @offset = 0
       @validity_buffer = validity_buffer
+      @sliced_buffers = {}
+    end
+
+    def slice(offset, size=nil)
+      sliced = dup
+      sliced.slice!(@offset + offset, size || @size - offset)
+      sliced
     end
 
     def valid?(i)
@@ -43,16 +52,20 @@ module ArrowFormat
       if @validity_buffer.nil?
         0
       else
-        # TODO: popcount
-        validity_bitmap.count do |is_valid|
-          not is_valid
-        end
+        @size - validity_bitmap.popcount
       end
     end
 
+    protected
+    def slice!(offset, size)
+      @offset = offset
+      @size = size
+      clear_cache
+    end
+
     private
     def validity_bitmap
-      @validity_bitmap ||= Bitmap.new(@validity_buffer, @size)
+      @validity_bitmap ||= Bitmap.new(@validity_buffer, @offset, @size)
     end
 
     def apply_validity(array)
@@ -62,6 +75,63 @@ module ArrowFormat
       end
       array
     end
+
+    def clear_cache
+      @validity_bitmap = nil
+      @sliced_buffers = {}
+    end
+
+    def slice_buffer(id, buffer)
+      return buffer if buffer.nil?
+      return buffer if @offset.zero?
+
+      @sliced_buffers[id] ||= yield(buffer)
+    end
+
+    def slice_bitmap_buffer(id, buffer)
+      slice_buffer(id, buffer) do
+        if (@offset % 8).zero?
+          buffer.slice(@offset / 8)
+        else
+          # We need to copy because we can't do bit level slice.
+          # TODO: Optimize.
+          valid_bytes = []
+          Bitmap.new(buffer, @offset, @size).each_slice(8) do |valids|
+            valid_byte = 0
+            valids.each_with_index do |valid, i|
+              valid_byte |= 1 << (i % 8) if valid
+            end
+            valid_bytes << valid_byte
+          end
+          IO::Buffer.for(valid_bytes.pack("C*"))
+        end
+      end
+    end
+
+    def slice_fixed_element_size_buffer(id, buffer, element_size)
+      slice_buffer(id, buffer) do
+        buffer.slice(element_size * @offset)
+      end
+    end
+
+    def slice_offsets_buffer(id, buffer, buffer_type)
+      slice_buffer(id, buffer) do
+        offset_size = IO::Buffer.size_of(buffer_type)
+        buffer_offset = offset_size * (@offset - 1)
+        first_offset = buffer.get_value(buffer_type, buffer_offset)
+        # TODO: Optimize
+        sliced_buffer = IO::Buffer.new(offset_size * (@size + 1))
+        buffer.each(buffer_type,
+                    buffer_offset,
+                    @size + 1).with_index do |(_, offset), i|
+          new_offset = offset - first_offset
+          sliced_buffer.set_value(buffer_type,
+                                  offset_size * i,
+                                  new_offset)
+        end
+        sliced_buffer
+      end
+    end
   end
 
   class NullArray < Array
@@ -84,26 +154,48 @@ module ArrowFormat
       @values_buffer = values_buffer
     end
 
+    def to_a
+      offset = element_size * @offset
+      apply_validity(@values_buffer.values(@type.buffer_type, offset, @size))
+    end
+
     def each_buffer
       return to_enum(__method__) unless block_given?
 
-      yield(@validity_buffer)
-      yield(@values_buffer)
+      yield(slice_bitmap_buffer(:validity, @validity_buffer))
+      yield(slice_fixed_element_size_buffer(:values,
+                                            @values_buffer,
+                                            element_size))
+    end
+
+    private
+    def element_size
+      IO::Buffer.size_of(@type.buffer_type)
     end
   end
 
   class BooleanArray < PrimitiveArray
     def to_a
-      @values_bitmap ||= Bitmap.new(@values_buffer, @size)
+      @values_bitmap ||= Bitmap.new(@values_buffer, @offset, @size)
       values = @values_bitmap.to_a
       apply_validity(values)
     end
+
+    def each_buffer
+      return to_enum(__method__) unless block_given?
+
+      yield(slice_bitmap_buffer(:validity, @validity_buffer))
+      yield(slice_bitmap_buffer(:values, @values_buffer))
+    end
+
+    private
+    def clear_cache
+      super
+      @values_bitmap = nil
+    end
   end
 
   class IntArray < PrimitiveArray
-    def to_a
-      apply_validity(@values_buffer.values(@type.buffer_type, 0, @size))
-    end
   end
 
   class Int8Array < IntArray
@@ -134,15 +226,9 @@ module ArrowFormat
   end
 
   class Float32Array < FloatingPointArray
-    def to_a
-      apply_validity(@values_buffer.values(:f32, 0, @size))
-    end
   end
 
   class Float64Array < FloatingPointArray
-    def to_a
-      apply_validity(@values_buffer.values(:f64, 0, @size))
-    end
   end
 
   class TemporalArray < PrimitiveArray
@@ -152,51 +238,34 @@ module ArrowFormat
   end
 
   class Date32Array < DateArray
-    def to_a
-      apply_validity(@values_buffer.values(:s32, 0, @size))
-    end
   end
 
   class Date64Array < DateArray
-    def to_a
-      apply_validity(@values_buffer.values(:s64, 0, @size))
-    end
   end
 
   class TimeArray < TemporalArray
   end
 
   class Time32Array < TimeArray
-    def to_a
-      apply_validity(@values_buffer.values(:s32, 0, @size))
-    end
   end
 
   class Time64Array < TimeArray
-    def to_a
-      apply_validity(@values_buffer.values(:s64, 0, @size))
-    end
   end
 
   class TimestampArray < TemporalArray
-    def to_a
-      apply_validity(@values_buffer.values(:s64, 0, @size))
-    end
   end
 
   class IntervalArray < TemporalArray
   end
 
   class YearMonthIntervalArray < IntervalArray
-    def to_a
-      apply_validity(@values_buffer.values(:s32, 0, @size))
-    end
   end
 
   class DayTimeIntervalArray < IntervalArray
     def to_a
+      offset = element_size * @offset
       values = @values_buffer.
-                 each(:s32, 0, @size * 2).
+                 each(@type.buffer_type, offset, @size * 2).
                  each_slice(2).
                  collect do |(_, day), (_, time)|
         [day, time]
@@ -207,20 +276,23 @@ module ArrowFormat
 
   class MonthDayNanoIntervalArray < IntervalArray
     def to_a
-      buffer_types = [:s32, :s32, :s64]
+      buffer_types = @type.buffer_types
       value_size = IO::Buffer.size_of(buffer_types)
+      base_offset = value_size * @offset
       values = @size.times.collect do |i|
-        offset = value_size * i
+        offset = base_offset + value_size * i
         @values_buffer.get_values(buffer_types, offset)
       end
       apply_validity(values)
     end
+
+    private
+    def element_size
+      IO::Buffer.size_of(@type.buffer_types)
+    end
   end
 
   class DurationArray < TemporalArray
-    def to_a
-      apply_validity(@values_buffer.values(:s64, 0, @size))
-    end
   end
 
   class VariableSizeBinaryLayoutArray < Array
@@ -233,65 +305,45 @@ module ArrowFormat
     def each_buffer
       return to_enum(__method__) unless block_given?
 
-      yield(@validity_buffer)
-      yield(@offsets_buffer)
-      yield(@values_buffer)
+      yield(slice_bitmap_buffer(:validity, @validity_buffer))
+      yield(slice_offsets_buffer(:offsets,
+                                 @offsets_buffer,
+                                 @type.offset_buffer_type))
+      sliced_values_buffer = slice_buffer(:values, @values_buffer) do
+        first_offset = @offsets_buffer.get_value(@type.offset_buffer_type,
+                                                 offset_size * @offset)
+        @values_buffer.slice(first_offset)
+      end
+      yield(sliced_values_buffer)
     end
 
     def to_a
       values = @offsets_buffer.
-        each(buffer_type, 0, @size + 1).
+        each(@type.offset_buffer_type, offset_size * @offset, @size + 1).
         each_cons(2).
         collect do |(_, offset), (_, next_offset)|
         length = next_offset - offset
-        @values_buffer.get_string(offset, length, encoding)
+        @values_buffer.get_string(offset, length, @type.encoding)
       end
       apply_validity(values)
     end
-  end
 
-  class BinaryArray < VariableSizeBinaryLayoutArray
     private
-    def buffer_type
-      :s32 # TODO: big endian support
+    def offset_size
+      IO::Buffer.size_of(@type.offset_buffer_type)
     end
+  end
 
-    def encoding
-      Encoding::ASCII_8BIT
-    end
+  class BinaryArray < VariableSizeBinaryLayoutArray
   end
 
   class LargeBinaryArray < VariableSizeBinaryLayoutArray
-    private
-    def buffer_type
-      :s64 # TODO: big endian support
-    end
-
-    def encoding
-      Encoding::ASCII_8BIT
-    end
   end
 
   class UTF8Array < VariableSizeBinaryLayoutArray
-    private
-    def buffer_type
-      :s32 # TODO: big endian support
-    end
-
-    def encoding
-      Encoding::UTF_8
-    end
   end
 
   class LargeUTF8Array < VariableSizeBinaryLayoutArray
-    private
-    def buffer_type
-      :s64 # TODO: big endian support
-    end
-
-    def encoding
-      Encoding::UTF_8
-    end
   end
 
   class FixedSizeBinaryArray < Array
@@ -303,8 +355,10 @@ module ArrowFormat
     def each_buffer
       return to_enum(__method__) unless block_given?
 
-      yield(@validity_buffer)
-      yield(@values_buffer)
+      yield(slice_bitmap_buffer(:validity, @validity_buffer))
+      yield(slice_fixed_element_size_buffer(:values,
+                                            @values_buffer,
+                                            @type.byte_width))
     end
 
     def to_a
@@ -320,8 +374,9 @@ module ArrowFormat
     def to_a
       byte_width = @type.byte_width
       buffer_types = [:u64] * (byte_width / 8 - 1) + [:s64]
+      base_offset = byte_width * @offset
       values = 0.step(@size * byte_width - 1, byte_width).collect do |offset|
-        @values_buffer.get_values(buffer_types, offset)
+        @values_buffer.get_values(buffer_types, base_offset + offset)
       end
       apply_validity(values).collect do |value|
         if value.nil?
@@ -379,34 +434,44 @@ module ArrowFormat
     def each_buffer(&block)
       return to_enum(__method__) unless block_given?
 
-      yield(@validity_buffer)
-      yield(@offsets_buffer)
+      yield(slice_bitmap_buffer(:validity, @validity_buffer))
+      yield(slice_offsets_buffer(:offsets,
+                                 @offsets_buffer,
+                                 @type.offset_buffer_type))
     end
 
     def to_a
       child_values = @child.to_a
       values = @offsets_buffer.
-        each(offset_type, 0, @size + 1).
+        each(@type.offset_buffer_type, offset_size * @offset, @size + 1).
         each_cons(2).
         collect do |(_, offset), (_, next_offset)|
         child_values[offset...next_offset]
       end
       apply_validity(values)
     end
-  end
 
-  class ListArray < VariableSizeListArray
     private
-    def offset_type
-      :s32 # TODO: big endian support
+    def offset_size
+      IO::Buffer.size_of(@type.offset_buffer_type)
     end
+
+    def slice!(offset, size)
+      super
+      first_offset =
+        @offsets_buffer.get_value(@type.offset_buffer_type,
+                                  offset_size * @offset)
+      last_offset =
+        @offsets_buffer.get_value(@type.offset_buffer_type,
+                                  offset_size * (@offset + @size + 1))
+      @child = @child.slice(first_offset, last_offset - first_offset)
+    end
+  end
+
+  class ListArray < VariableSizeListArray
   end
 
   class LargeListArray < VariableSizeListArray
-    private
-    def offset_type
-      :s64 # TODO: big endian support
-    end
   end
 
   class StructArray < Array
@@ -419,7 +484,7 @@ module ArrowFormat
     def each_buffer(&block)
       return to_enum(__method__) unless block_given?
 
-      yield(@validity_buffer)
+      yield(slice_bitmap_buffer(:validity, @validity_buffer))
     end
 
     def to_a
@@ -431,6 +496,14 @@ module ArrowFormat
       end
       apply_validity(values)
     end
+
+    private
+    def slice!(offset, size)
+      super
+      @children = @children.collect do |child|
+        child.slice(offset, size)
+      end
+    end
   end
 
   class MapArray < VariableSizeListArray
@@ -447,11 +520,6 @@ module ArrowFormat
         end
       end
     end
-
-    private
-    def offset_type
-      :s32 # TODO: big endian support
-    end
   end
 
   class UnionArray < Array
@@ -461,6 +529,15 @@ module ArrowFormat
       @types_buffer = types_buffer
       @children = children
     end
+
+    private
+    def type_buffer_type
+      :S8
+    end
+
+    def type_element_size
+      IO::Buffer.size_of(type_buffer_type)
+    end
   end
 
   class DenseUnionArray < UnionArray
@@ -476,35 +553,61 @@ module ArrowFormat
     def each_buffer(&block)
       return to_enum(__method__) unless block_given?
 
+      # TODO: Dictionary delta support (slice support)
       yield(@types_buffer)
       yield(@offsets_buffer)
     end
 
     def to_a
       children_values = @children.collect(&:to_a)
-      types = @types_buffer.each(:S8, 0, @size)
-      offsets = @offsets_buffer.each(:s32, 0, @size)
+      types = @types_buffer.each(type_buffer_type,
+                                 type_element_size * @offset,
+                                 @size)
+      offsets = @offsets_buffer.each(:s32,
+                                     offset_element_size * @offset,
+                                     @size)
       types.zip(offsets).collect do |(_, type), (_, offset)|
         index = @type.resolve_type_index(type)
         children_values[index][offset]
       end
     end
+
+    private
+    def offset_buffer_type
+      :s32
+    end
+
+    def offset_element_size
+      IO::Buffer.size_of(offset_buffer_type)
+    end
   end
 
   class SparseUnionArray < UnionArray
     def each_buffer(&block)
       return to_enum(__method__) unless block_given?
 
-      yield(@types_buffer)
+      yield(slice_fixed_element_size_buffer(:types,
+                                            @types_buffer,
+                                            type_element_size))
     end
 
     def to_a
       children_values = @children.collect(&:to_a)
-      @types_buffer.each(:S8, 0, @size).with_index.collect do |(_, type), i|
+      @types_buffer.each(type_buffer_type,
+                         type_element_size * @offset,
+                         @size).with_index.collect do |(_, type), i|
         index = @type.resolve_type_index(type)
         children_values[index][i]
       end
     end
+
+    private
+    def slice!(offset, size)
+      super
+      @children = @children.collect do |child|
+        child.slice(offset, size)
+      end
+    end
   end
 
   class DictionaryArray < Array
@@ -516,6 +619,7 @@ module ArrowFormat
       @dictionary = dictionary
     end
 
+    # TODO: Slice support
     def each_buffer
       return to_enum(__method__) unless block_given?
 
@@ -529,7 +633,9 @@ module ArrowFormat
         values.concat(dictionary_chunk.to_a)
       end
       buffer_type = @type.index_type.buffer_type
-      indices = apply_validity(@indices_buffer.values(buffer_type, 0, @size))
+      offset = IO::Buffer.size_of(buffer_type) * @offset
+      indices =
+        apply_validity(@indices_buffer.values(buffer_type, offset, @size))
       indices.collect do |index|
         if index.nil?
           nil
diff --git a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb 
b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
index 88a1ab2ff4..17d7db872e 100644
--- a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
@@ -18,15 +18,18 @@ module ArrowFormat
   class Bitmap
     include Enumerable
 
-    def initialize(buffer, n_values)
+    def initialize(buffer, offset, n_values)
       @buffer = buffer
+      @offset = offset
       @n_values = n_values
     end
 
     def [](i)
+      i += @offset
       (@buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0
     end
 
+    # TODO: offset support
     def each
       return to_enum(__method__) unless block_given?
 
@@ -44,5 +47,12 @@ module ArrowFormat
         end
       end
     end
+
+    def popcount
+      # TODO: Optimize
+      count do |flaged|
+        flaged
+      end
+    end
   end
 end
diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb 
b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
index 2f8f90b706..d63016a25b 100644
--- a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
@@ -118,8 +118,7 @@ module ArrowFormat
         is_delta = false
       else
         is_delta = true
-        raise NotImplementedError,
-              "Delta dictionary message isn't implemented yet"
+        dictionary = dictionary.slice(offset)
       end
 
       schema = Schema.new([Field.new("dummy", value_type, true, nil)])
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 4ea41a2538..8d49b3810b 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -305,6 +305,10 @@ module ArrowFormat
       "Float32"
     end
 
+    def buffer_type
+      :f32
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       Float32Array.new(self, size, validity_buffer, values_buffer)
     end
@@ -325,6 +329,10 @@ module ArrowFormat
       "Float64"
     end
 
+    def buffer_type
+      :f64
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       Float64Array.new(self, size, validity_buffer, values_buffer)
     end
@@ -362,6 +370,10 @@ module ArrowFormat
       "Date32"
     end
 
+    def buffer_type
+      :s32
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       Date32Array.new(self, size, validity_buffer, values_buffer)
     end
@@ -382,6 +394,10 @@ module ArrowFormat
       "Date64"
     end
 
+    def buffer_type
+      :s64
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       Date64Array.new(self, size, validity_buffer, values_buffer)
     end
@@ -413,6 +429,10 @@ module ArrowFormat
       "Time32"
     end
 
+    def buffer_type
+      :s32
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       Time32Array.new(self, size, validity_buffer, values_buffer)
     end
@@ -427,6 +447,10 @@ module ArrowFormat
       "Time64"
     end
 
+    def buffer_type
+      :s64
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       Time64Array.new(self, size, validity_buffer, values_buffer)
     end
@@ -445,6 +469,10 @@ module ArrowFormat
       "Timestamp"
     end
 
+    def buffer_type
+      :s64
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       TimestampArray.new(self, size, validity_buffer, values_buffer)
     end
@@ -486,6 +514,10 @@ module ArrowFormat
       "YearMonthInterval"
     end
 
+    def buffer_type
+      :s32
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       YearMonthIntervalArray.new(self, size, validity_buffer, values_buffer)
     end
@@ -500,6 +532,10 @@ module ArrowFormat
       "DayTimeInterval"
     end
 
+    def buffer_type
+      :s32
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       DayTimeIntervalArray.new(self, size, validity_buffer, values_buffer)
     end
@@ -514,6 +550,10 @@ module ArrowFormat
       "MonthDayNanoInterval"
     end
 
+    def buffer_types
+      @buffer_types ||= [:s32, :s32, :s64]
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       MonthDayNanoIntervalArray.new(self,
                                     size,
@@ -533,6 +573,10 @@ module ArrowFormat
       "Duration"
     end
 
+    def buffer_type
+      :s64
+    end
+
     def build_array(size, validity_buffer, values_buffer)
       DurationArray.new(self, size, validity_buffer, values_buffer)
     end
@@ -558,6 +602,14 @@ module ArrowFormat
       "Binary"
     end
 
+    def offset_buffer_type
+      :s32 # TODO: big endian support
+    end
+
+    def encoding
+      Encoding::ASCII_8BIT
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, values_buffer)
       BinaryArray.new(self,
                       size,
@@ -582,6 +634,14 @@ module ArrowFormat
       "LargeBinary"
     end
 
+    def offset_buffer_type
+      :s64 # TODO: big endian support
+    end
+
+    def encoding
+      Encoding::ASCII_8BIT
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, values_buffer)
       LargeBinaryArray.new(self,
                            size,
@@ -606,6 +666,14 @@ module ArrowFormat
       "UTF8"
     end
 
+    def offset_buffer_type
+      :s32 # TODO: big endian support
+    end
+
+    def encoding
+      Encoding::UTF_8
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, values_buffer)
       UTF8Array.new(self, size, validity_buffer, offsets_buffer, values_buffer)
     end
@@ -626,6 +694,14 @@ module ArrowFormat
       "LargeUTF8"
     end
 
+    def offset_buffer_type
+      :s64 # TODO: big endian support
+    end
+
+    def encoding
+      Encoding::UTF_8
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, values_buffer)
       LargeUTF8Array.new(self,
                          size,
@@ -720,6 +796,10 @@ module ArrowFormat
       "List"
     end
 
+    def offset_buffer_type
+      :s32 # TODO: big endian support
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, child)
       ListArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
@@ -734,6 +814,10 @@ module ArrowFormat
       "LargeList"
     end
 
+    def offset_buffer_type
+      :s64 # TODO: big endian support
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, child)
       LargeListArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
@@ -788,6 +872,10 @@ module ArrowFormat
       "Map"
     end
 
+    def offset_buffer_type
+      :s32 # TODO: big endian support
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, child)
       MapArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
diff --git a/ruby/red-arrow-format/test/test-writer.rb 
b/ruby/red-arrow-format/test/test-writer.rb
index 3e4b5bedba..33b3c2db22 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -198,465 +198,391 @@ module WriterTests
     end
   end
 
-  def write(writer)
-    red_arrow_array = build_array
-    array = convert_array(red_arrow_array)
-    red_arrow_field = Arrow::Field.new("value",
-                                       red_arrow_array.value_data_type,
-                                       true)
-    fields = [convert_field(red_arrow_field)]
-    schema = ArrowFormat::Schema.new(fields)
-    record_batch = ArrowFormat::RecordBatch.new(schema, array.size, [array])
-    writer.start(schema)
-    writer.write_record_batch(record_batch)
+  def write(writer, *inputs)
+    inputs.each_with_index do |input, i|
+      case input
+      when ArrowFormat::RecordBatch
+        record_batch = input
+      else
+        red_arrow_array = input
+        array = convert_array(red_arrow_array)
+        red_arrow_field = Arrow::Field.new("value",
+                                           red_arrow_array.value_data_type,
+                                           true)
+        fields = [convert_field(red_arrow_field)]
+        schema = ArrowFormat::Schema.new(fields)
+        record_batch = ArrowFormat::RecordBatch.new(schema,
+                                                    array.size,
+                                                    [array])
+      end
+      writer.start(record_batch.schema) if i.zero?
+      writer.write_record_batch(record_batch)
+    end
     writer.finish
   end
 
+  def roundtrip(*inputs)
+    Dir.mktmpdir do |tmp_dir|
+      path = File.join(tmp_dir, "data.#{file_extension}")
+      File.open(path, "wb") do |output|
+        writer = writer_class.new(output)
+        write(writer, *inputs)
+      end
+      data = File.open(path, "rb", &:read).freeze
+      table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrow)
+      [table.value.data_type, table.value.values]
+    end
+  end
+
   class << self
     def included(base)
       base.class_eval do
-        sub_test_case("Null") do
-          def build_array
-            Arrow::NullArray.new(3)
-          end
-
-          def test_write
-            assert_equal([nil, nil, nil],
-                         @values)
-          end
+        def test_null
+          array = Arrow::NullArray.new(3)
+          type, values = roundtrip(array)
+          assert_equal(["null", [nil, nil, nil]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Boolean") do
-          def build_array
-            Arrow::BooleanArray.new([true, nil, false])
-          end
-
-          def test_write
-            assert_equal([true, nil, false],
-                         @values)
-          end
+        def test_boolean
+          array = Arrow::BooleanArray.new([true, nil, false])
+          type, values = roundtrip(array)
+          assert_equal(["bool", [true, nil, false]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Int8") do
-          def build_array
-            Arrow::Int8Array.new([-128, nil, 127])
-          end
-
-          def test_write
-            assert_equal([-128, nil, 127],
-                         @values)
-          end
+        def test_int8
+          array = Arrow::Int8Array.new([-128, nil, 127])
+          type, values = roundtrip(array)
+          assert_equal(["int8", [-128, nil, 127]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("UInt8") do
-          def build_array
-            Arrow::UInt8Array.new([0, nil, 255])
-          end
-
-          def test_write
-            assert_equal([0, nil, 255],
-                         @values)
-          end
+        def test_uint8
+          array = Arrow::UInt8Array.new([0, nil, 255])
+          type, values = roundtrip(array)
+          assert_equal(["uint8", [0, nil, 255]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Int16") do
-          def build_array
-            Arrow::Int16Array.new([-32768, nil, 32767])
-          end
-
-          def test_write
-            assert_equal([-32768, nil, 32767],
-                         @values)
-          end
+        def test_int16
+          array = Arrow::Int16Array.new([-32768, nil, 32767])
+          type, values = roundtrip(array)
+          assert_equal(["int16", [-32768, nil, 32767]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("UInt16") do
-          def build_array
-            Arrow::UInt16Array.new([0, nil, 65535])
-          end
-
-          def test_write
-            assert_equal([0, nil, 65535],
-                         @values)
-          end
+        def test_uint16
+          array = Arrow::UInt16Array.new([0, nil, 65535])
+          type, values = roundtrip(array)
+          assert_equal(["uint16", [0, nil, 65535]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Int32") do
-          def build_array
-            Arrow::Int32Array.new([-2147483648, nil, 2147483647])
-          end
-
-          def test_write
-            assert_equal([-2147483648, nil, 2147483647],
-                         @values)
-          end
+        def test_int32
+          array = Arrow::Int32Array.new([-2147483648, nil, 2147483647])
+          type, values = roundtrip(array)
+          assert_equal(["int32", [-2147483648, nil, 2147483647]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("UInt32") do
-          def build_array
-            Arrow::UInt32Array.new([0, nil, 4294967295])
-          end
-
-          def test_write
-            assert_equal([0, nil, 4294967295],
-                         @values)
-          end
+        def test_uint32
+          array = Arrow::UInt32Array.new([0, nil, 4294967295])
+          type, values = roundtrip(array)
+          assert_equal(["uint32", [0, nil, 4294967295]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Int64") do
-          def build_array
-            Arrow::Int64Array.new([
-                                    -9223372036854775808,
-                                    nil,
-                                    9223372036854775807
-                                  ])
-          end
-
-          def test_write
-            assert_equal([
+        def test_int64
+          array = Arrow::Int64Array.new([
+                                          -9223372036854775808,
+                                          nil,
+                                          9223372036854775807
+                                        ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "int64",
+                         [
                            -9223372036854775808,
                            nil,
                            9223372036854775807
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("UInt64") do
-          def build_array
-            Arrow::UInt64Array.new([0, nil, 18446744073709551615])
-          end
-
-          def test_write
-            assert_equal([0, nil, 18446744073709551615],
-                         @values)
-          end
-        end
-
-        sub_test_case("Float32") do
-          def build_array
-            Arrow::FloatArray.new([-0.5, nil, 0.5])
-          end
-
-          def test_write
-            assert_equal([-0.5, nil, 0.5],
-                         @values)
-          end
-        end
-
-        sub_test_case("Float64") do
-          def build_array
-            Arrow::DoubleArray.new([-0.5, nil, 0.5])
-          end
-
-          def test_write
-            assert_equal([-0.5, nil, 0.5],
-                         @values)
-          end
-        end
-
-        sub_test_case("Date32") do
-          def setup(&block)
-            @date_2017_08_28 = 17406
-            @date_2025_12_09 = 20431
-            super(&block)
-          end
-
-          def build_array
-            Arrow::Date32Array.new([@date_2017_08_28, nil, @date_2025_12_09])
-          end
-
-          def test_write
-            assert_equal([Date.new(2017, 8, 28), nil, Date.new(2025, 12, 9)],
-                         @values)
-          end
-        end
-
-        sub_test_case("Date64") do
-          def setup(&block)
-            @date_2017_08_28_00_00_00 = 1503878400000
-            @date_2025_12_10_00_00_00 = 1765324800000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::Date64Array.new([
-                                     @date_2017_08_28_00_00_00,
-                                     nil,
-                                     @date_2025_12_10_00_00_00,
-                                   ])
-          end
-
-          def test_write
-            assert_equal([
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_uint64
+          array = Arrow::UInt64Array.new([0, nil, 18446744073709551615])
+          type, values = roundtrip(array)
+          assert_equal(["uint64", [0, nil, 18446744073709551615]],
+                       [type.to_s, values])
+        end
+
+        def test_float32
+          array = Arrow::FloatArray.new([-0.5, nil, 0.5])
+          type, values = roundtrip(array)
+          assert_equal(["float", [-0.5, nil, 0.5]],
+                       [type.to_s, values])
+        end
+
+        def test_float64
+          array = Arrow::DoubleArray.new([-0.5, nil, 0.5])
+          type, values = roundtrip(array)
+          assert_equal(["double", [-0.5, nil, 0.5]],
+                       [type.to_s, values])
+        end
+
+        def test_date32
+          date_2017_08_28 = 17406
+          date_2025_12_09 = 20431
+          array = Arrow::Date32Array.new([
+                                           date_2017_08_28,
+                                           nil,
+                                           date_2025_12_09,
+                                         ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "date32[day]",
+                         [Date.new(2017, 8, 28), nil, Date.new(2025, 12, 9)],
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_date64
+          date_2017_08_28_00_00_00 = 1503878400000
+          date_2025_12_10_00_00_00 = 1765324800000
+          array = Arrow::Date64Array.new([
+                                           date_2017_08_28_00_00_00,
+                                           nil,
+                                           date_2025_12_10_00_00_00,
+                                         ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "date64[ms]",
+                         [
                            DateTime.new(2017, 8, 28, 0, 0, 0),
                            nil,
                            DateTime.new(2025, 12, 10, 0, 0, 0),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Time32(:second)") do
-          def setup(&block)
-            @time_00_00_10 = 10
-            @time_00_01_10 = 60 + 10
-            super(&block)
-          end
-
-          def build_array
-            Arrow::Time32Array.new(:second,
-                                   [@time_00_00_10, nil, @time_00_01_10])
-          end
-
-          def test_write
-            assert_equal([
-                           Arrow::Time.new(:second, @time_00_00_10),
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_time32_second
+          time_00_00_10 = 10
+          time_00_01_10 = 60 + 10
+          array = Arrow::Time32Array.new(:second,
+                                         [time_00_00_10, nil, time_00_01_10])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "time32[s]",
+                         [
+                           Arrow::Time.new(:second, time_00_00_10),
                            nil,
-                           Arrow::Time.new(:second, @time_00_01_10),
+                           Arrow::Time.new(:second, time_00_01_10),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Time32(:millisecond)") do
-          def setup(&block)
-            @time_00_00_10_000 = 10 * 1000
-            @time_00_01_10_000 = (60 + 10) * 1000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::Time32Array.new(:milli,
-                                   [
-                                     @time_00_00_10_000,
-                                     nil,
-                                     @time_00_01_10_000,
-                                   ])
-          end
-
-          def test_write
-            assert_equal([
-                           Arrow::Time.new(:milli, @time_00_00_10_000),
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_time32_millisecond
+          time_00_00_10_000 = 10 * 1000
+          time_00_01_10_000 = (60 + 10) * 1000
+          array = Arrow::Time32Array.new(:milli,
+                                         [
+                                           time_00_00_10_000,
+                                           nil,
+                                           time_00_01_10_000,
+                                         ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "time32[ms]",
+                         [
+                           Arrow::Time.new(:milli, time_00_00_10_000),
                            nil,
-                           Arrow::Time.new(:milli, @time_00_01_10_000),
+                           Arrow::Time.new(:milli, time_00_01_10_000),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Time64(:microsecond)") do
-          def setup(&block)
-            @time_00_00_10_000_000 = 10 * 1_000_000
-            @time_00_01_10_000_000 = (60 + 10) * 1_000_000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::Time64Array.new(:micro,
-                                   [
-                                     @time_00_00_10_000_000,
-                                     nil,
-                                     @time_00_01_10_000_000,
-                                   ])
-          end
-
-          def test_write
-            assert_equal([
-                           Arrow::Time.new(:micro, @time_00_00_10_000_000),
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_time64_microsecond
+          time_00_00_10_000_000 = 10 * 1_000_000
+          time_00_01_10_000_000 = (60 + 10) * 1_000_000
+          array = Arrow::Time64Array.new(:micro,
+                                         [
+                                           time_00_00_10_000_000,
+                                           nil,
+                                           time_00_01_10_000_000,
+                                         ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "time64[us]",
+                         [
+                           Arrow::Time.new(:micro, time_00_00_10_000_000),
                            nil,
-                           Arrow::Time.new(:micro, @time_00_01_10_000_000),
+                           Arrow::Time.new(:micro, time_00_01_10_000_000),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Time64(:nanosecond)") do
-          def setup(&block)
-            @time_00_00_10_000_000_000 = 10 * 1_000_000_000
-            @time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::Time64Array.new(:nano,
-                                   [
-                                     @time_00_00_10_000_000_000,
-                                     nil,
-                                     @time_00_01_10_000_000_000,
-                                   ])
-          end
-
-          def test_write
-            assert_equal([
-                           Arrow::Time.new(:nano, @time_00_00_10_000_000_000),
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_time64_nanosecond
+          time_00_00_10_000_000_000 = 10 * 1_000_000_000
+          time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000
+          array = Arrow::Time64Array.new(:nano,
+                                         [
+                                           time_00_00_10_000_000_000,
+                                           nil,
+                                           time_00_01_10_000_000_000,
+                                         ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "time64[ns]",
+                         [
+                           Arrow::Time.new(:nano, time_00_00_10_000_000_000),
                            nil,
-                           Arrow::Time.new(:nano, @time_00_01_10_000_000_000),
+                           Arrow::Time.new(:nano, time_00_01_10_000_000_000),
                          ],
-                         @values)
-          end
+                       ],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Timestamp(:second)") do
-          def setup(&block)
-            @timestamp_2019_11_17_15_09_11 = 1574003351
-            @timestamp_2025_12_16_05_33_58 = 1765863238
-            super(&block)
-          end
-
-          def build_array
-            Arrow::TimestampArray.new(:second,
-                                      [
-                                        @timestamp_2019_11_17_15_09_11,
-                                        nil,
-                                        @timestamp_2025_12_16_05_33_58,
-                                      ])
-          end
-
-          def test_write
-            assert_equal([
-                           Time.at(@timestamp_2019_11_17_15_09_11),
+        def test_timestamp_second
+          timestamp_2019_11_17_15_09_11 = 1574003351
+          timestamp_2025_12_16_05_33_58 = 1765863238
+          array = Arrow::TimestampArray.new(:second,
+                                            [
+                                              timestamp_2019_11_17_15_09_11,
+                                              nil,
+                                              timestamp_2025_12_16_05_33_58,
+                                            ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "timestamp[s]",
+                         [
+                           Time.at(timestamp_2019_11_17_15_09_11),
                            nil,
-                           Time.at(@timestamp_2025_12_16_05_33_58),
+                           Time.at(timestamp_2025_12_16_05_33_58),
                          ],
-                         @values)
-          end
+                       ],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Timestamp(:millisecond)") do
-          def setup(&block)
-            @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
-            @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::TimestampArray.new(:milli,
-                                      [
-                                        @timestamp_2019_11_17_15_09_11,
-                                        nil,
-                                        @timestamp_2025_12_16_05_33_58,
-                                      ])
-          end
-
-          def test_write
-            assert_equal([
-                           Time.at(@timestamp_2019_11_17_15_09_11 / 1_000),
+        def test_timestamp_millisecond
+          timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
+          timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
+          array = Arrow::TimestampArray.new(:milli,
+                                            [
+                                              timestamp_2019_11_17_15_09_11,
+                                              nil,
+                                              timestamp_2025_12_16_05_33_58,
+                                            ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "timestamp[ms]",
+                         [
+                           Time.at(timestamp_2019_11_17_15_09_11 / 1_000),
                            nil,
-                           Time.at(@timestamp_2025_12_16_05_33_58 / 1_000),
+                           Time.at(timestamp_2025_12_16_05_33_58 / 1_000),
                          ],
-                         @values)
-          end
+                       ],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Timestamp(:microsecond)") do
-          def setup(&block)
-            @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
-            @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::TimestampArray.new(:micro,
-                                      [
-                                        @timestamp_2019_11_17_15_09_11,
-                                        nil,
-                                        @timestamp_2025_12_16_05_33_58,
-                                      ])
-          end
-
-          def test_write
-            assert_equal([
-                           Time.at(@timestamp_2019_11_17_15_09_11 / 1_000_000),
+        def test_timestamp_microsecond
+          timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
+          timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
+          array = Arrow::TimestampArray.new(:micro,
+                                            [
+                                              timestamp_2019_11_17_15_09_11,
+                                              nil,
+                                              timestamp_2025_12_16_05_33_58,
+                                            ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "timestamp[us]",
+                         [
+                           Time.at(timestamp_2019_11_17_15_09_11 / 1_000_000),
                            nil,
-                           Time.at(@timestamp_2025_12_16_05_33_58 / 1_000_000),
+                           Time.at(timestamp_2025_12_16_05_33_58 / 1_000_000),
                          ],
-                         @values)
-          end
+                       ],
+                       [type.to_s, values])
         end
 
-        sub_test_case("Timestamp(:nanosecond)") do
-          def setup(&block)
-            @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
-            @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
-            super(&block)
-          end
-
-          def build_array
-            Arrow::TimestampArray.new(:nano,
-                                      [
-                                        @timestamp_2019_11_17_15_09_11,
-                                        nil,
-                                        @timestamp_2025_12_16_05_33_58,
-                                      ])
-          end
-
-          def test_write
-            assert_equal([
-                           Time.at(@timestamp_2019_11_17_15_09_11 / 
1_000_000_000),
+        def test_timestamp_nanosecond
+          timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
+          timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
+          array = Arrow::TimestampArray.new(:nano,
+                                            [
+                                              timestamp_2019_11_17_15_09_11,
+                                              nil,
+                                              timestamp_2025_12_16_05_33_58,
+                                            ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "timestamp[ns]",
+                         [
+                           Time.at(timestamp_2019_11_17_15_09_11 / 
1_000_000_000),
                            nil,
-                           Time.at(@timestamp_2025_12_16_05_33_58 / 
1_000_000_000),
+                           Time.at(timestamp_2025_12_16_05_33_58 / 
1_000_000_000),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Timestamp(time_zone)") do
-          def setup(&block)
-            @time_zone = "UTC"
-            @timestamp_2019_11_17_15_09_11 = 1574003351
-            @timestamp_2025_12_16_05_33_58 = 1765863238
-            super(&block)
-          end
-
-          def build_array
-            data_type = Arrow::TimestampDataType.new(:second, @time_zone)
-            Arrow::TimestampArray.new(data_type,
-                                      [
-                                        @timestamp_2019_11_17_15_09_11,
-                                        nil,
-                                        @timestamp_2025_12_16_05_33_58,
-                                      ])
-          end
-
-          def test_type
-            assert_equal([Arrow::TimeUnit::SECOND, @time_zone],
-                         [@type.unit, @type.time_zone&.identifier])
-          end
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_timestamp_time_zone
+          time_zone = "UTC"
+          timestamp_2019_11_17_15_09_11 = 1574003351
+          timestamp_2025_12_16_05_33_58 = 1765863238
+          data_type = Arrow::TimestampDataType.new(:second, time_zone)
+          array = Arrow::TimestampArray.new(data_type,
+                                            [
+                                              timestamp_2019_11_17_15_09_11,
+                                              nil,
+                                              timestamp_2025_12_16_05_33_58,
+                                            ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "timestamp[s, tz=#{time_zone}]",
+                         [
+                           Time.at(timestamp_2019_11_17_15_09_11),
+                           nil,
+                           Time.at(timestamp_2025_12_16_05_33_58),
+                         ],
+                       ],
+                       [type.to_s, values])
         end
 
-        sub_test_case("YearMonthInterval") do
-          def build_array
-            Arrow::MonthIntervalArray.new([0, nil, 100])
-          end
-
-          def test_write
-            assert_equal([0, nil, 100],
-                         @values)
-          end
+        def test_year_month_interval
+          array = Arrow::MonthIntervalArray.new([0, nil, 100])
+          type, values = roundtrip(array)
+          assert_equal(["month_interval", [0, nil, 100]],
+                       [type.to_s, values])
         end
 
-        sub_test_case("DayTimeInterval") do
-          def build_array
+        def test_day_time_interval
+          array =
             Arrow::DayTimeIntervalArray.new([
                                               {day: 1, millisecond: 100},
                                               nil,
                                               {day: 3, millisecond: 300},
                                             ])
-          end
-
-          def test_write
-            assert_equal([
+          type, values = roundtrip(array)
+          assert_equal([
+                         "day_time_interval",
+                         [
                            {day: 1, millisecond: 100},
                            nil,
                            {day: 3, millisecond: 300},
                          ],
-                         @values)
-          end
+                       ],
+                       [type.to_s, values])
         end
 
-        sub_test_case("MonthDayNanoInterval") do
-          def build_array
+        def test_month_day_nano_interval
+          array =
             Arrow::MonthDayNanoIntervalArray.new([
                                                    {
                                                      month: 1,
@@ -670,10 +596,10 @@ module WriterTests
                                                      nanosecond: 300,
                                                    },
                                                  ])
-          end
-
-          def test_write
-            assert_equal([
+          type, values = roundtrip(array)
+          assert_equal([
+                         "month_day_nano_interval",
+                         [
                            {
                              month: 1,
                              day: 1,
@@ -686,307 +612,304 @@ module WriterTests
                              nanosecond: 300,
                            },
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Duration(:second)") do
-          def build_array
-            Arrow::DurationArray.new(:second, [0, nil, 100])
-          end
-
-          def test_write
-            assert_equal([0, nil, 100],
-                         @values)
-          end
-
-          def test_type
-            assert_equal(Arrow::TimeUnit::SECOND, @type.unit)
-          end
-        end
-
-        sub_test_case("Duration(:millisecond)") do
-          def build_array
-            Arrow::DurationArray.new(:milli, [0, nil, 100])
-          end
-
-          def test_write
-            assert_equal([0, nil, 100],
-                         @values)
-          end
-
-          def test_type
-            assert_equal(Arrow::TimeUnit::MILLI, @type.unit)
-          end
-        end
-
-        sub_test_case("Duration(:microsecond)") do
-          def build_array
-            Arrow::DurationArray.new(:micro, [0, nil, 100])
-          end
-
-          def test_write
-            assert_equal([0, nil, 100],
-                         @values)
-          end
-
-          def test_type
-            assert_equal(Arrow::TimeUnit::MICRO, @type.unit)
-          end
-        end
-
-        sub_test_case("Duration(:nanosecond)") do
-          def build_array
-            Arrow::DurationArray.new(:nano, [0, nil, 100])
-          end
-
-          def test_write
-            assert_equal([0, nil, 100],
-                         @values)
-          end
-
-          def test_type
-            assert_equal(Arrow::TimeUnit::NANO, @type.unit)
-          end
-        end
-
-        sub_test_case("Binary") do
-          def build_array
-            Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
-          end
-
-          def test_write
-            assert_equal(["Hello".b, nil, "World".b],
-                         @values)
-          end
-        end
-
-        sub_test_case("LargeBinary") do
-          def build_array
-            Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
-          end
-
-          def test_write
-            assert_equal(["Hello".b, nil, "World".b],
-                         @values)
-          end
-        end
-
-        sub_test_case("String") do
-          def build_array
-            Arrow::StringArray.new(["Hello", nil, "World"])
-          end
-
-          def test_write
-            assert_equal(["Hello", nil, "World"],
-                         @values)
-          end
-        end
-
-        sub_test_case("LargeString") do
-          def build_array
-            Arrow::LargeStringArray.new(["Hello", nil, "World"])
-          end
-
-          def test_write
-            assert_equal(["Hello", nil, "World"],
-                         @values)
-          end
-        end
-
-        sub_test_case("FixedSizeBinary") do
-          def build_array
-            data_type = Arrow::FixedSizeBinaryDataType.new(4)
-            Arrow::FixedSizeBinaryArray.new(data_type,
-                                            ["0124".b, nil, "abcd".b])
-          end
-
-          def test_write
-            assert_equal(["0124".b, nil, "abcd".b],
-                         @values)
-          end
-        end
-
-        sub_test_case("Decimal128") do
-          def build_array
-            @positive_small = "1.200"
-            @positive_large = ("1234567890" * 3) + "12345.678"
-            @negative_small = "-1.200"
-            @negative_large = "-" + ("1234567890" * 3) + "12345.678"
-            Arrow::Decimal128Array.new({precision: 38, scale: 3},
-                                       [
-                                         @positive_large,
-                                         @positive_small,
-                                         nil,
-                                         @negative_small,
-                                         @negative_large,
-                                       ])
-          end
-
-          def test_write
-            assert_equal([
-                           BigDecimal(@positive_large),
-                           BigDecimal(@positive_small),
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_duration_second
+          array = Arrow::DurationArray.new(:second, [0, nil, 100])
+          type, values = roundtrip(array)
+          assert_equal(["duration[s]", [0, nil, 100]],
+                       [type.to_s, values])
+        end
+
+        def test_duration_millisecond
+          array = Arrow::DurationArray.new(:milli, [0, nil, 100])
+          type, values = roundtrip(array)
+          assert_equal(["duration[ms]", [0, nil, 100]],
+                       [type.to_s, values])
+        end
+
+        def test_duration_microsecond
+          array = Arrow::DurationArray.new(:micro, [0, nil, 100])
+          type, values = roundtrip(array)
+          assert_equal(["duration[us]", [0, nil, 100]],
+                       [type.to_s, values])
+        end
+
+        def test_duration_nanosecond
+          array = Arrow::DurationArray.new(:nano, [0, nil, 100])
+          type, values = roundtrip(array)
+          assert_equal(["duration[ns]", [0, nil, 100]],
+                       [type.to_s, values])
+        end
+
+        def test_binary
+          array = Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
+          type, values = roundtrip(array)
+          assert_equal(["binary", ["Hello".b, nil, "World".b]],
+                       [type.to_s, values])
+        end
+
+        def test_large_binary
+          array = Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
+          type, values = roundtrip(array)
+          assert_equal(["large_binary", ["Hello".b, nil, "World".b]],
+                       [type.to_s, values])
+        end
+
+        def test_utf8
+          array = Arrow::StringArray.new(["Hello", nil, "World"])
+          type, values = roundtrip(array)
+          assert_equal(["string", ["Hello", nil, "World"]],
+                       [type.to_s, values])
+        end
+
+        def test_large_utf8
+          array = Arrow::LargeStringArray.new(["Hello", nil, "World"])
+          type, values = roundtrip(array)
+          assert_equal(["large_string", ["Hello", nil, "World"]],
+                       [type.to_s, values])
+        end
+
+        def test_fixed_size_binary
+          data_type = Arrow::FixedSizeBinaryDataType.new(4)
+          array = Arrow::FixedSizeBinaryArray.new(data_type,
+                                                  ["0124".b, nil, "abcd".b])
+          type, values = roundtrip(array)
+          assert_equal(["fixed_size_binary[4]", ["0124".b, nil, "abcd".b]],
+                       [type.to_s, values])
+        end
+
+        def test_decimal128
+          positive_small = "1.200"
+          positive_large = ("1234567890" * 3) + "12345.678"
+          negative_small = "-1.200"
+          negative_large = "-" + ("1234567890" * 3) + "12345.678"
+          array = Arrow::Decimal128Array.new({precision: 38, scale: 3},
+                                             [
+                                               positive_large,
+                                               positive_small,
+                                               nil,
+                                               negative_small,
+                                               negative_large,
+                                             ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "decimal128(38, 3)",
+                         [
+                           BigDecimal(positive_large),
+                           BigDecimal(positive_small),
                            nil,
-                           BigDecimal(@negative_small),
-                           BigDecimal(@negative_large),
+                           BigDecimal(negative_small),
+                           BigDecimal(negative_large),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Decimal256") do
-          def build_array
-            @positive_small = "1.200"
-            @positive_large = ("1234567890" * 7) + "123.456"
-            @negative_small = "-1.200"
-            @negative_large = "-" + ("1234567890" * 7) + "123.456"
-            Arrow::Decimal256Array.new({precision: 76, scale: 3},
-                                       [
-                                         @positive_large,
-                                         @positive_small,
-                                         nil,
-                                         @negative_small,
-                                         @negative_large,
-                                       ])
-          end
-
-          def test_write
-            assert_equal([
-                           BigDecimal(@positive_large),
-                           BigDecimal(@positive_small),
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_decimal256
+          positive_small = "1.200"
+          positive_large = ("1234567890" * 7) + "123.456"
+          negative_small = "-1.200"
+          negative_large = "-" + ("1234567890" * 7) + "123.456"
+          array = Arrow::Decimal256Array.new({precision: 76, scale: 3},
+                                             [
+                                               positive_large,
+                                               positive_small,
+                                               nil,
+                                               negative_small,
+                                               negative_large,
+                                             ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "decimal256(76, 3)",
+                         [
+                           BigDecimal(positive_large),
+                           BigDecimal(positive_small),
                            nil,
-                           BigDecimal(@negative_small),
-                           BigDecimal(@negative_large),
+                           BigDecimal(negative_small),
+                           BigDecimal(negative_large),
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("List") do
-          def build_array
-            data_type = Arrow::ListDataType.new(name: "count", type: :int8)
-            Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
-          end
-
-          def test_write
-            assert_equal([[-128, 127], nil, [-1, 0, 1]],
-                         @values)
-          end
-        end
-
-        sub_test_case("LargeList") do
-          def build_array
-            data_type = Arrow::LargeListDataType.new(name: "count",
-                                                     type: :int8)
-            Arrow::LargeListArray.new(data_type,
-                                      [[-128, 127], nil, [-1, 0, 1]])
-          end
-
-          def test_write
-            assert_equal([[-128, 127], nil, [-1, 0, 1]],
-                         @values)
-          end
-        end
-
-        sub_test_case("Map") do
-          def build_array
-            data_type = Arrow::MapDataType.new(:string, :int8)
-            Arrow::MapArray.new(data_type,
-                                [
-                                  {"a" => -128, "b" => 127},
-                                  nil,
-                                  {"c" => nil},
-                                ])
-          end
-
-          def test_write
-            assert_equal([
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_list
+          data_type = Arrow::ListDataType.new(name: "count", type: :int8)
+          array = Arrow::ListArray.new(data_type,
+                                       [[-128, 127], nil, [-1, 0, 1]])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "list<count: int8>",
+                         [[-128, 127], nil, [-1, 0, 1]],
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_large_lsit
+          data_type = Arrow::LargeListDataType.new(name: "count",
+                                                   type: :int8)
+          array = Arrow::LargeListArray.new(data_type,
+                                            [[-128, 127], nil, [-1, 0, 1]])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "large_list<count: int8>",
+                         [[-128, 127], nil, [-1, 0, 1]],
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_map
+          data_type = Arrow::MapDataType.new(:string, :int8)
+          array = Arrow::MapArray.new(data_type,
+                                      [
+                                        {"a" => -128, "b" => 127},
+                                        nil,
+                                        {"c" => nil},
+                                      ])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "map<string, int8>",
+                         [
                            {"a" => -128, "b" => 127},
                            nil,
                            {"c" => nil},
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("Struct") do
-          def build_array
-            data_type = Arrow::StructDataType.new(count: :int8,
-                                                  visible: :boolean)
-            Arrow::StructArray.new(data_type,
-                                   [[-128, nil], nil, [nil, true]])
-          end
-
-          def test_write
-            assert_equal([
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_struct
+          data_type = Arrow::StructDataType.new(count: :int8,
+                                                visible: :boolean)
+          array = Arrow::StructArray.new(data_type,
+                                         [[-128, nil], nil, [nil, true]])
+          type, values = roundtrip(array)
+          assert_equal([
+                         "struct<count: int8, visible: bool>",
+                         [
                            {"count" => -128, "visible" => nil},
                            nil,
                            {"count" => nil, "visible" => true},
                          ],
-                         @values)
-          end
-        end
-
-        sub_test_case("DenseUnion") do
-          def build_array
-            fields = [
-              Arrow::Field.new("number", :int8),
-              Arrow::Field.new("text", :string),
-            ]
-            type_ids = [11, 13]
-            data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
-            types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
-            value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
-            children = [
-              Arrow::Int8Array.new([1, nil]),
-              Arrow::StringArray.new(["a", "b", "c"])
-            ]
-            Arrow::DenseUnionArray.new(data_type,
-                                       types,
-                                       value_offsets,
-                                       children)
-          end
-
-          def test_write
-            assert_equal([1, "a", nil, "b", "c"],
-                         @values)
-          end
-        end
-
-        sub_test_case("SparseUnion") do
-          def build_array
-            fields = [
-              Arrow::Field.new("number", :int8),
-              Arrow::Field.new("text", :string),
-            ]
-            type_ids = [11, 13]
-            data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
-            types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
-            children = [
-              Arrow::Int8Array.new([1, nil, nil, nil, 5]),
-              Arrow::StringArray.new([nil, "b", nil, "d", nil])
-            ]
-            Arrow::SparseUnionArray.new(data_type, types, children)
-          end
-
-          def test_write
-            assert_equal([1, "b", nil, "d", 5],
-                         @values)
-          end
-        end
-
-        sub_test_case("Dictionary") do
-          def build_array
-            values = ["a", "b", "c", nil, "a"]
-            string_array = Arrow::StringArray.new(values)
-            string_array.dictionary_encode
-          end
-
-          def test_write
-            assert_equal(["a", "b", "c", nil, "a"],
-                         @values)
-          end
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_dense_union
+          fields = [
+            Arrow::Field.new("number", :int8),
+            Arrow::Field.new("text", :string),
+          ]
+          type_ids = [11, 13]
+          data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
+          types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
+          value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
+          children = [
+            Arrow::Int8Array.new([1, nil]),
+            Arrow::StringArray.new(["a", "b", "c"])
+          ]
+          array = Arrow::DenseUnionArray.new(data_type,
+                                             types,
+                                             value_offsets,
+                                             children)
+          type, values = roundtrip(array)
+          assert_equal([
+                         "dense_union<number: int8=11, text: string=13>",
+                         [1, "a", nil, "b", "c"],
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_sparse_union
+          fields = [
+            Arrow::Field.new("number", :int8),
+            Arrow::Field.new("text", :string),
+          ]
+          type_ids = [11, 13]
+          data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
+          types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
+          children = [
+            Arrow::Int8Array.new([1, nil, nil, nil, 5]),
+            Arrow::StringArray.new([nil, "b", nil, "d", nil])
+          ]
+          array = Arrow::SparseUnionArray.new(data_type, types, children)
+          type, values = roundtrip(array)
+          assert_equal([
+                         "sparse_union<number: int8=11, text: string=13>",
+                         [1, "b", nil, "d", 5],
+                       ],
+                       [type.to_s, values])
+        end
+
+        def test_dictionary
+          values = ["a", "b", "c", nil, "a"]
+          string_array = Arrow::StringArray.new(values)
+          array = string_array.dictionary_encode
+          type, values = roundtrip(array)
+          assert_equal([
+                         "dictionary<values=string, " +
+                         "indices=int32, " +
+                         "ordered=0>",
+                         ["a", "b", "c", nil, "a"],
+                       ],
+                       [type.to_s, values])
+        end
+
+        def build_dictionary_delta_schema(value_type)
+          index_type = ArrowFormat::Int32Type.singleton
+          ordered = false
+          type = ArrowFormat::DictionaryType.new(index_type,
+                                                 value_type,
+                                                 ordered)
+          nullable = true
+          dictionary_id = 1
+          field = ArrowFormat::Field.new("value",
+                                         type,
+                                         nullable,
+                                         dictionary_id)
+          ArrowFormat::Schema.new([field])
+        end
+
+        def build_dictionary_array(type, indices, dictionary)
+          indices_buffer = IO::Buffer.for(indices.pack("l<*"))
+          ArrowFormat::DictionaryArray.new(type,
+                                           indices.size,
+                                           nil,
+                                           indices_buffer,
+                                           dictionary)
+        end
+
+        def test_dictionary_delta_utf8
+          value_type = ArrowFormat::UTF8Type.singleton
+          schema = build_dictionary_delta_schema(value_type)
+          type = schema.fields[0].type
+
+          dictionary = convert_array(Arrow::StringArray.new(["a", "b", "c"]))
+          # ["c", "a", "b", "a", "a"]
+          indices = [2, 0, 1, 0, 0]
+          array = build_dictionary_array(type, indices, dictionary)
+          record_batch =
+            ArrowFormat::RecordBatch.new(schema, array.size, [array])
+
+          dictionary_more =
+            convert_array(Arrow::StringArray.new(["a", "b", "c", "d", "e"]))
+          # ["e", "a", "c", "d", "b", "d"]
+          indices = [4, 0, 2, 3, 1, 3]
+          array = build_dictionary_array(type, indices, dictionary_more)
+          record_batch_delta =
+            ArrowFormat::RecordBatch.new(schema, array.size, [array])
+
+          type, values = roundtrip(record_batch, record_batch_delta)
+          assert_equal([
+                         "dictionary<values=string, " +
+                         "indices=int32, " +
+                         "ordered=0>",
+                         ["c", "a", "b", "a", "a"] +
+                         ["e", "a", "c", "d", "b", "d"],
+                       ],
+                       [type.to_s, values])
         end
       end
     end
@@ -996,35 +919,23 @@ end
 class TestFileWriter < Test::Unit::TestCase
   include WriterTests
 
-  def setup
-    Dir.mktmpdir do |tmp_dir|
-      path = File.join(tmp_dir, "data.arrow")
-      File.open(path, "wb") do |output|
-        writer = ArrowFormat::FileWriter.new(output)
-        write(writer)
-      end
-      data = File.open(path, "rb", &:read).freeze
-      table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrow)
-      @type = table.value.data_type
-      @values = table.value.values
-    end
+  def file_extension
+    "arrow"
+  end
+
+  def writer_class
+    ArrowFormat::FileWriter
   end
 end
 
 class TestStreamingWriter < Test::Unit::TestCase
   include WriterTests
 
-  def setup
-    Dir.mktmpdir do |tmp_dir|
-      path = File.join(tmp_dir, "data.arrows")
-      File.open(path, "wb") do |output|
-        writer = ArrowFormat::StreamingWriter.new(output)
-        write(writer)
-      end
-      data = File.open(path, "rb", &:read).freeze
-      table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrows)
-      @type = table.value.data_type
-      @values = table.value.values
-    end
+  def file_extension
+    "arrows"
+  end
+
+  def writer_class
+    ArrowFormat::StreamingWriter
   end
 end

Reply via email to