This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new a272046852 GH-49225: [Ruby] Add support for writing dictionary delta
for primitive dictionary (#49226)
a272046852 is described below
commit a272046852b7c1e0927263a31d019d3d6b8752db
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Feb 13 06:44:29 2026 +0900
GH-49225: [Ruby] Add support for writing dictionary delta for primitive
dictionary (#49226)
### Rationale for this change
Nested types and dictionary type are out of scope of this.
### What changes are included in this PR?
* Fix `ArrowFormat::Array#slice_offsets_buffer`
* Add `ArrowFormat::DayTimeIntervalArray#element_size`
* `ArrowFormat::Bitmap#each`: Add support for offset
* Add support for chunked dictionaries
* Add support for `Arrow::DictionaryArray#raw_records` with large binary
and large UTF-8 dictionaries
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #49225
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/array.rb | 24 +-
ruby/red-arrow-format/lib/arrow-format/bitmap.rb | 11 +-
.../lib/arrow-format/file-reader.rb | 2 +-
ruby/red-arrow-format/lib/arrow-format/readable.rb | 4 +-
.../lib/arrow-format/streaming-pull-reader.rb | 2 +-
.../lib/arrow-format/streaming-writer.rb | 43 +-
ruby/red-arrow-format/lib/arrow-format/type.rb | 4 +-
ruby/red-arrow-format/test/test-reader.rb | 42 +-
ruby/red-arrow-format/test/test-writer.rb | 1845 +++++++++++++-------
ruby/red-arrow/ext/arrow/converters.hpp | 2 +
.../test/raw-records/test-dictionary-array.rb | 20 +
.../red-arrow/test/values/test-dictionary-array.rb | 20 +
12 files changed, 1320 insertions(+), 699 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 87dbd0e0d6..5bc7588f3a 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -117,13 +117,14 @@ module ArrowFormat
def slice_offsets_buffer(id, buffer, buffer_type)
slice_buffer(id, buffer) do
offset_size = IO::Buffer.size_of(buffer_type)
- buffer_offset = offset_size * (@offset - 1)
- first_offset = buffer.get_value(buffer_type, buffer_offset)
+ buffer_offset = offset_size * @offset
+ first_offset = nil
# TODO: Optimize
sliced_buffer = IO::Buffer.new(offset_size * (@size + 1))
buffer.each(buffer_type,
buffer_offset,
@size + 1).with_index do |(_, offset), i|
+ first_offset ||= offset
new_offset = offset - first_offset
sliced_buffer.set_value(buffer_type,
offset_size * i,
@@ -272,6 +273,11 @@ module ArrowFormat
end
apply_validity(values)
end
+
+ private
+ def element_size
+ super * 2
+ end
end
class MonthDayNanoIntervalArray < IntervalArray
@@ -612,11 +618,15 @@ module ArrowFormat
class DictionaryArray < Array
attr_reader :indices_buffer
- attr_reader :dictionary
- def initialize(type, size, validity_buffer, indices_buffer, dictionary)
+ attr_reader :dictionaries
+ def initialize(type,
+ size,
+ validity_buffer,
+ indices_buffer,
+ dictionaries)
super(type, size, validity_buffer)
@indices_buffer = indices_buffer
- @dictionary = dictionary
+ @dictionaries = dictionaries
end
# TODO: Slice support
@@ -629,8 +639,8 @@ module ArrowFormat
def to_a
values = []
- @dictionary.each do |dictionary_chunk|
- values.concat(dictionary_chunk.to_a)
+ @dictionaries.each do |dictionary|
+ values.concat(dictionary.to_a)
end
buffer_type = @type.index_type.buffer_type
offset = IO::Buffer.size_of(buffer_type) * @offset
diff --git a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
index 17d7db872e..e4a0dc76d3 100644
--- a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb
@@ -29,20 +29,25 @@ module ArrowFormat
(@buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0
end
- # TODO: offset support
def each
return to_enum(__method__) unless block_given?
- n_bytes = @n_values / 8
+ # TODO: Optimize
+ current = -1
+ n_bytes = (@offset + @n_values) / 8
@buffer.each(:U8, 0, n_bytes) do |offset, value|
7.times do |i|
+ current += 1
+ next if current < @offset
yield((value & (1 << (i % 8))) > 0)
end
end
- remained_bits = @n_values % 8
+ remained_bits = (@offset + @n_values) % 8
unless remained_bits.zero?
value = @buffer.get_value(:U8, n_bytes)
remained_bits.times do |i|
+ current += 1
+ next if current < @offset
yield((value & (1 << (i % 8))) > 0)
end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 6218fbcf14..03514a3cc2 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -203,7 +203,7 @@ module ArrowFormat
dictionaries
end
- def find_dictionary(id)
+ def find_dictionaries(id)
@dictionaries[id]
end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/readable.rb
b/ruby/red-arrow-format/lib/arrow-format/readable.rb
index 867a54c17b..ff09c6129d 100644
--- a/ruby/red-arrow-format/lib/arrow-format/readable.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/readable.rb
@@ -233,8 +233,8 @@ module ArrowFormat
when DictionaryType
indices_buffer = buffers.shift
indices = body.slice(indices_buffer.offset, indices_buffer.length)
- dictionary = find_dictionary(field.dictionary_id)
- field.type.build_array(length, validity, indices, dictionary)
+ dictionaries = find_dictionaries(field.dictionary_id)
+ field.type.build_array(length, validity, indices, dictionaries)
end
end
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb
b/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb
index ffa4cb5534..98263de77e 100644
--- a/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb
@@ -231,7 +231,7 @@ module ArrowFormat
end
end
- def find_dictionary(id)
+ def find_dictionaries(id)
@dictionaries[id]
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
index d63016a25b..11f2b4375a 100644
--- a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
@@ -111,27 +111,32 @@ module ArrowFormat
def write_dictionary(id, dictionary_array)
value_type = dictionary_array.type.value_type
- dictionary = dictionary_array.dictionary
+ base_offset = 0
+ dictionary_array.dictionaries.each do |dictionary|
+ written_offset = @written_dictionary_offsets[id] || 0
+ current_base_offset = base_offset
+ next_base_offset = base_offset + dictionary.size
+ base_offset = next_base_offset
+
+ next if next_base_offset <= written_offset
+
+ is_delta = (not written_offset.zero?)
+ if current_base_offset < written_offset
+ dictionary = dictionary.slice(written_offset - current_base_offset)
+ end
- offset = @written_dictionary_offsets[id]
- if offset.nil?
- is_delta = false
- else
- is_delta = true
- dictionary = dictionary.slice(offset)
+ schema = Schema.new([Field.new("dummy", value_type, true, nil)])
+ size = dictionary.size
+ record_batch = RecordBatch.new(schema, size, [dictionary])
+ fb_dictionary_batch = FB::DictionaryBatch::Data.new
+ fb_dictionary_batch.id = id
+ fb_dictionary_batch.data = record_batch.to_flatbuffers
+ fb_dictionary_batch.delta = is_delta
+ write_record_batch_based_message(record_batch,
+ fb_dictionary_batch,
+ @fb_dictionary_blocks)
+ @written_dictionary_offsets[id] = written_offset + dictionary.size
end
-
- schema = Schema.new([Field.new("dummy", value_type, true, nil)])
- size = dictionary.size
- record_batch = RecordBatch.new(schema, size, [dictionary])
- fb_dictionary_batch = FB::DictionaryBatch::Data.new
- fb_dictionary_batch.id = id
- fb_dictionary_batch.data = record_batch.to_flatbuffers
- fb_dictionary_batch.delta = is_delta
- write_record_batch_based_message(record_batch,
- fb_dictionary_batch,
- @fb_dictionary_blocks)
- @written_dictionary_offsets[id] = dictionary_array.dictionary.size
end
def write_message(metadata)
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 8d49b3810b..bc2b313285 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -954,12 +954,12 @@ module ArrowFormat
"Dictionary"
end
- def build_array(size, validity_buffer, indices_buffer, dictionary)
+ def build_array(size, validity_buffer, indices_buffer, dictionaries)
DictionaryArray.new(self,
size,
validity_buffer,
indices_buffer,
- dictionary)
+ dictionaries)
end
def build_fb_field(fb_field, field)
diff --git a/ruby/red-arrow-format/test/test-reader.rb
b/ruby/red-arrow-format/test/test-reader.rb
index e2e27d3dbc..10a2597f4a 100644
--- a/ruby/red-arrow-format/test/test-reader.rb
+++ b/ruby/red-arrow-format/test/test-reader.rb
@@ -16,6 +16,20 @@
# under the License.
module ReaderTests
+ def read
+ @reader.collect do |record_batch|
+ record_batch.to_h.tap do |hash|
+ hash.each do |key, value|
+ hash[key] = value.to_a
+ end
+ end
+ end
+ end
+
+ def type
+ @type ||= @reader.first.schema.fields[0].type
+ end
+
class << self
def included(base)
base.class_eval do
@@ -901,20 +915,6 @@ class TestFileReader < Test::Unit::TestCase
GC.start
end
end
-
- def read
- @reader.to_a.collect do |record_batch|
- record_batch.to_h.tap do |hash|
- hash.each do |key, value|
- hash[key] = value.to_a
- end
- end
- end
- end
-
- def type
- @type ||= @reader.first.schema.fields[0].type
- end
end
class TestStreamingReader < Test::Unit::TestCase
@@ -933,18 +933,4 @@ class TestStreamingReader < Test::Unit::TestCase
GC.start
end
end
-
- def read
- @reader.collect do |record_batch|
- record_batch.to_h.tap do |hash|
- hash.each do |key, value|
- hash[key] = value.to_a
- end
- end
- end
- end
-
- def type
- @type ||= @reader.first.schema.fields[0].type
- end
end
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index 33b3c2db22..3b97d08fc4 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-module WriterTests
+module WriterHelper
def convert_time_unit(red_arrow_time_unit)
if red_arrow_time_unit.nick == "second"
red_arrow_time_unit.nick.to_sym
@@ -134,7 +134,7 @@ module WriterTests
def convert_buffer(buffer)
return nil if buffer.nil?
- IO::Buffer.for(buffer.data.to_s)
+ IO::Buffer.for(buffer.data.to_s.dup)
end
def convert_array(red_arrow_array)
@@ -192,7 +192,7 @@ module WriterTests
type.build_array(red_arrow_array.size,
validity_buffer,
indices_buffer,
- dictionary)
+ [dictionary])
else
raise "Unsupported array #{red_arrow_array.inspect}"
end
@@ -228,696 +228,1203 @@ module WriterTests
writer = writer_class.new(output)
write(writer, *inputs)
end
+ # pp(read(path)) # debug
data = File.open(path, "rb", &:read).freeze
table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrow)
[table.value.data_type, table.value.values]
end
end
+end
- class << self
- def included(base)
- base.class_eval do
- def test_null
- array = Arrow::NullArray.new(3)
- type, values = roundtrip(array)
- assert_equal(["null", [nil, nil, nil]],
- [type.to_s, values])
- end
+module WriterTests
+ def test_null
+ array = Arrow::NullArray.new(3)
+ type, values = roundtrip(array)
+ assert_equal(["null", [nil, nil, nil]],
+ [type.to_s, values])
+ end
- def test_boolean
- array = Arrow::BooleanArray.new([true, nil, false])
- type, values = roundtrip(array)
- assert_equal(["bool", [true, nil, false]],
- [type.to_s, values])
- end
+ def test_boolean
+ array = Arrow::BooleanArray.new([true, nil, false])
+ type, values = roundtrip(array)
+ assert_equal(["bool", [true, nil, false]],
+ [type.to_s, values])
+ end
- def test_int8
- array = Arrow::Int8Array.new([-128, nil, 127])
- type, values = roundtrip(array)
- assert_equal(["int8", [-128, nil, 127]],
- [type.to_s, values])
- end
+ def test_int8
+ array = Arrow::Int8Array.new([-128, nil, 127])
+ type, values = roundtrip(array)
+ assert_equal(["int8", [-128, nil, 127]],
+ [type.to_s, values])
+ end
- def test_uint8
- array = Arrow::UInt8Array.new([0, nil, 255])
- type, values = roundtrip(array)
- assert_equal(["uint8", [0, nil, 255]],
- [type.to_s, values])
- end
+ def test_uint8
+ array = Arrow::UInt8Array.new([0, nil, 255])
+ type, values = roundtrip(array)
+ assert_equal(["uint8", [0, nil, 255]],
+ [type.to_s, values])
+ end
- def test_int16
- array = Arrow::Int16Array.new([-32768, nil, 32767])
- type, values = roundtrip(array)
- assert_equal(["int16", [-32768, nil, 32767]],
- [type.to_s, values])
- end
+ def test_int16
+ array = Arrow::Int16Array.new([-32768, nil, 32767])
+ type, values = roundtrip(array)
+ assert_equal(["int16", [-32768, nil, 32767]],
+ [type.to_s, values])
+ end
- def test_uint16
- array = Arrow::UInt16Array.new([0, nil, 65535])
- type, values = roundtrip(array)
- assert_equal(["uint16", [0, nil, 65535]],
- [type.to_s, values])
- end
+ def test_uint16
+ array = Arrow::UInt16Array.new([0, nil, 65535])
+ type, values = roundtrip(array)
+ assert_equal(["uint16", [0, nil, 65535]],
+ [type.to_s, values])
+ end
- def test_int32
- array = Arrow::Int32Array.new([-2147483648, nil, 2147483647])
- type, values = roundtrip(array)
- assert_equal(["int32", [-2147483648, nil, 2147483647]],
- [type.to_s, values])
- end
+ def test_int32
+ array = Arrow::Int32Array.new([-2147483648, nil, 2147483647])
+ type, values = roundtrip(array)
+ assert_equal(["int32", [-2147483648, nil, 2147483647]],
+ [type.to_s, values])
+ end
- def test_uint32
- array = Arrow::UInt32Array.new([0, nil, 4294967295])
- type, values = roundtrip(array)
- assert_equal(["uint32", [0, nil, 4294967295]],
- [type.to_s, values])
- end
+ def test_uint32
+ array = Arrow::UInt32Array.new([0, nil, 4294967295])
+ type, values = roundtrip(array)
+ assert_equal(["uint32", [0, nil, 4294967295]],
+ [type.to_s, values])
+ end
- def test_int64
- array = Arrow::Int64Array.new([
- -9223372036854775808,
- nil,
- 9223372036854775807
- ])
- type, values = roundtrip(array)
- assert_equal([
- "int64",
- [
- -9223372036854775808,
- nil,
- 9223372036854775807
- ],
- ],
- [type.to_s, values])
- end
+ def test_int64
+ array = Arrow::Int64Array.new([
+ -9223372036854775808,
+ nil,
+ 9223372036854775807
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "int64",
+ [
+ -9223372036854775808,
+ nil,
+ 9223372036854775807
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_uint64
- array = Arrow::UInt64Array.new([0, nil, 18446744073709551615])
- type, values = roundtrip(array)
- assert_equal(["uint64", [0, nil, 18446744073709551615]],
- [type.to_s, values])
- end
+ def test_uint64
+ array = Arrow::UInt64Array.new([0, nil, 18446744073709551615])
+ type, values = roundtrip(array)
+ assert_equal(["uint64", [0, nil, 18446744073709551615]],
+ [type.to_s, values])
+ end
- def test_float32
- array = Arrow::FloatArray.new([-0.5, nil, 0.5])
- type, values = roundtrip(array)
- assert_equal(["float", [-0.5, nil, 0.5]],
- [type.to_s, values])
- end
+ def test_float32
+ array = Arrow::FloatArray.new([-0.5, nil, 0.5])
+ type, values = roundtrip(array)
+ assert_equal(["float", [-0.5, nil, 0.5]],
+ [type.to_s, values])
+ end
- def test_float64
- array = Arrow::DoubleArray.new([-0.5, nil, 0.5])
- type, values = roundtrip(array)
- assert_equal(["double", [-0.5, nil, 0.5]],
- [type.to_s, values])
- end
+ def test_float64
+ array = Arrow::DoubleArray.new([-0.5, nil, 0.5])
+ type, values = roundtrip(array)
+ assert_equal(["double", [-0.5, nil, 0.5]],
+ [type.to_s, values])
+ end
- def test_date32
- date_2017_08_28 = 17406
- date_2025_12_09 = 20431
- array = Arrow::Date32Array.new([
- date_2017_08_28,
- nil,
- date_2025_12_09,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "date32[day]",
- [Date.new(2017, 8, 28), nil, Date.new(2025, 12, 9)],
- ],
- [type.to_s, values])
- end
+ def test_date32
+ date_2017_08_28 = 17406
+ date_2025_12_09 = 20431
+ array = Arrow::Date32Array.new([
+ date_2017_08_28,
+ nil,
+ date_2025_12_09,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "date32[day]",
+ [Date.new(2017, 8, 28), nil, Date.new(2025, 12, 9)],
+ ],
+ [type.to_s, values])
+ end
- def test_date64
- date_2017_08_28_00_00_00 = 1503878400000
- date_2025_12_10_00_00_00 = 1765324800000
- array = Arrow::Date64Array.new([
- date_2017_08_28_00_00_00,
- nil,
- date_2025_12_10_00_00_00,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "date64[ms]",
- [
- DateTime.new(2017, 8, 28, 0, 0, 0),
- nil,
- DateTime.new(2025, 12, 10, 0, 0, 0),
- ],
- ],
- [type.to_s, values])
- end
+ def test_date64
+ date_2017_08_28_00_00_00 = 1503878400000
+ date_2025_12_10_00_00_00 = 1765324800000
+ array = Arrow::Date64Array.new([
+ date_2017_08_28_00_00_00,
+ nil,
+ date_2025_12_10_00_00_00,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "date64[ms]",
+ [
+ DateTime.new(2017, 8, 28, 0, 0, 0),
+ nil,
+ DateTime.new(2025, 12, 10, 0, 0, 0),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_time32_second
- time_00_00_10 = 10
- time_00_01_10 = 60 + 10
- array = Arrow::Time32Array.new(:second,
- [time_00_00_10, nil, time_00_01_10])
- type, values = roundtrip(array)
- assert_equal([
- "time32[s]",
- [
- Arrow::Time.new(:second, time_00_00_10),
- nil,
- Arrow::Time.new(:second, time_00_01_10),
- ],
- ],
- [type.to_s, values])
- end
+ def test_time32_second
+ time_00_00_10 = 10
+ time_00_01_10 = 60 + 10
+ array = Arrow::Time32Array.new(:second,
+ [time_00_00_10, nil, time_00_01_10])
+ type, values = roundtrip(array)
+ assert_equal([
+ "time32[s]",
+ [
+ Arrow::Time.new(:second, time_00_00_10),
+ nil,
+ Arrow::Time.new(:second, time_00_01_10),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_time32_millisecond
- time_00_00_10_000 = 10 * 1000
- time_00_01_10_000 = (60 + 10) * 1000
- array = Arrow::Time32Array.new(:milli,
- [
- time_00_00_10_000,
- nil,
- time_00_01_10_000,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "time32[ms]",
- [
- Arrow::Time.new(:milli, time_00_00_10_000),
- nil,
- Arrow::Time.new(:milli, time_00_01_10_000),
- ],
- ],
- [type.to_s, values])
- end
+ def test_time32_millisecond
+ time_00_00_10_000 = 10 * 1000
+ time_00_01_10_000 = (60 + 10) * 1000
+ array = Arrow::Time32Array.new(:milli,
+ [
+ time_00_00_10_000,
+ nil,
+ time_00_01_10_000,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "time32[ms]",
+ [
+ Arrow::Time.new(:milli, time_00_00_10_000),
+ nil,
+ Arrow::Time.new(:milli, time_00_01_10_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_time64_microsecond
- time_00_00_10_000_000 = 10 * 1_000_000
- time_00_01_10_000_000 = (60 + 10) * 1_000_000
- array = Arrow::Time64Array.new(:micro,
- [
- time_00_00_10_000_000,
- nil,
- time_00_01_10_000_000,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "time64[us]",
- [
- Arrow::Time.new(:micro, time_00_00_10_000_000),
- nil,
- Arrow::Time.new(:micro, time_00_01_10_000_000),
- ],
- ],
- [type.to_s, values])
- end
+ def test_time64_microsecond
+ time_00_00_10_000_000 = 10 * 1_000_000
+ time_00_01_10_000_000 = (60 + 10) * 1_000_000
+ array = Arrow::Time64Array.new(:micro,
+ [
+ time_00_00_10_000_000,
+ nil,
+ time_00_01_10_000_000,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "time64[us]",
+ [
+ Arrow::Time.new(:micro, time_00_00_10_000_000),
+ nil,
+ Arrow::Time.new(:micro, time_00_01_10_000_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_time64_nanosecond
- time_00_00_10_000_000_000 = 10 * 1_000_000_000
- time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000
- array = Arrow::Time64Array.new(:nano,
- [
- time_00_00_10_000_000_000,
- nil,
- time_00_01_10_000_000_000,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "time64[ns]",
- [
- Arrow::Time.new(:nano, time_00_00_10_000_000_000),
- nil,
- Arrow::Time.new(:nano, time_00_01_10_000_000_000),
- ],
- ],
- [type.to_s, values])
- end
+ def test_time64_nanosecond
+ time_00_00_10_000_000_000 = 10 * 1_000_000_000
+ time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000
+ array = Arrow::Time64Array.new(:nano,
+ [
+ time_00_00_10_000_000_000,
+ nil,
+ time_00_01_10_000_000_000,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "time64[ns]",
+ [
+ Arrow::Time.new(:nano, time_00_00_10_000_000_000),
+ nil,
+ Arrow::Time.new(:nano, time_00_01_10_000_000_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_timestamp_second
- timestamp_2019_11_17_15_09_11 = 1574003351
- timestamp_2025_12_16_05_33_58 = 1765863238
- array = Arrow::TimestampArray.new(:second,
- [
- timestamp_2019_11_17_15_09_11,
- nil,
- timestamp_2025_12_16_05_33_58,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "timestamp[s]",
- [
- Time.at(timestamp_2019_11_17_15_09_11),
- nil,
- Time.at(timestamp_2025_12_16_05_33_58),
- ],
- ],
- [type.to_s, values])
- end
+ def test_timestamp_second
+ timestamp_2019_11_17_15_09_11 = 1574003351
+ timestamp_2025_12_16_05_33_58 = 1765863238
+ array = Arrow::TimestampArray.new(:second,
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "timestamp[s]",
+ [
+ Time.at(timestamp_2019_11_17_15_09_11),
+ nil,
+ Time.at(timestamp_2025_12_16_05_33_58),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_timestamp_millisecond
- timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
- timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
- array = Arrow::TimestampArray.new(:milli,
- [
- timestamp_2019_11_17_15_09_11,
- nil,
- timestamp_2025_12_16_05_33_58,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "timestamp[ms]",
- [
- Time.at(timestamp_2019_11_17_15_09_11 / 1_000),
- nil,
- Time.at(timestamp_2025_12_16_05_33_58 / 1_000),
- ],
- ],
- [type.to_s, values])
- end
+ def test_timestamp_millisecond
+ timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
+ timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
+ array = Arrow::TimestampArray.new(:milli,
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "timestamp[ms]",
+ [
+ Time.at(timestamp_2019_11_17_15_09_11 / 1_000),
+ nil,
+ Time.at(timestamp_2025_12_16_05_33_58 / 1_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_timestamp_microsecond
- timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
- timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
- array = Arrow::TimestampArray.new(:micro,
- [
- timestamp_2019_11_17_15_09_11,
- nil,
- timestamp_2025_12_16_05_33_58,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "timestamp[us]",
- [
- Time.at(timestamp_2019_11_17_15_09_11 / 1_000_000),
- nil,
- Time.at(timestamp_2025_12_16_05_33_58 / 1_000_000),
- ],
- ],
- [type.to_s, values])
- end
+ def test_timestamp_microsecond
+ timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
+ timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
+ array = Arrow::TimestampArray.new(:micro,
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "timestamp[us]",
+ [
+ Time.at(timestamp_2019_11_17_15_09_11 / 1_000_000),
+ nil,
+ Time.at(timestamp_2025_12_16_05_33_58 / 1_000_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_timestamp_nanosecond
- timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
- timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
- array = Arrow::TimestampArray.new(:nano,
- [
- timestamp_2019_11_17_15_09_11,
- nil,
- timestamp_2025_12_16_05_33_58,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "timestamp[ns]",
- [
- Time.at(timestamp_2019_11_17_15_09_11 /
1_000_000_000),
- nil,
- Time.at(timestamp_2025_12_16_05_33_58 /
1_000_000_000),
- ],
- ],
- [type.to_s, values])
- end
+ def test_timestamp_nanosecond
+ timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
+ timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
+ array = Arrow::TimestampArray.new(:nano,
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "timestamp[ns]",
+ [
+ Time.at(timestamp_2019_11_17_15_09_11 / 1_000_000_000),
+ nil,
+ Time.at(timestamp_2025_12_16_05_33_58 / 1_000_000_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_timestamp_time_zone
- time_zone = "UTC"
- timestamp_2019_11_17_15_09_11 = 1574003351
- timestamp_2025_12_16_05_33_58 = 1765863238
- data_type = Arrow::TimestampDataType.new(:second, time_zone)
- array = Arrow::TimestampArray.new(data_type,
- [
- timestamp_2019_11_17_15_09_11,
- nil,
- timestamp_2025_12_16_05_33_58,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "timestamp[s, tz=#{time_zone}]",
- [
- Time.at(timestamp_2019_11_17_15_09_11),
- nil,
- Time.at(timestamp_2025_12_16_05_33_58),
- ],
- ],
- [type.to_s, values])
- end
+ def test_timestamp_time_zone
+ time_zone = "UTC"
+ timestamp_2019_11_17_15_09_11 = 1574003351
+ timestamp_2025_12_16_05_33_58 = 1765863238
+ data_type = Arrow::TimestampDataType.new(:second, time_zone)
+ array = Arrow::TimestampArray.new(data_type,
+ [
+ timestamp_2019_11_17_15_09_11,
+ nil,
+ timestamp_2025_12_16_05_33_58,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "timestamp[s, tz=#{time_zone}]",
+ [
+ Time.at(timestamp_2019_11_17_15_09_11),
+ nil,
+ Time.at(timestamp_2025_12_16_05_33_58),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_year_month_interval
- array = Arrow::MonthIntervalArray.new([0, nil, 100])
- type, values = roundtrip(array)
- assert_equal(["month_interval", [0, nil, 100]],
- [type.to_s, values])
- end
+ def test_year_month_interval
+ array = Arrow::MonthIntervalArray.new([0, nil, 100])
+ type, values = roundtrip(array)
+ assert_equal(["month_interval", [0, nil, 100]],
+ [type.to_s, values])
+ end
- def test_day_time_interval
- array =
- Arrow::DayTimeIntervalArray.new([
- {day: 1, millisecond: 100},
- nil,
- {day: 3, millisecond: 300},
- ])
- type, values = roundtrip(array)
- assert_equal([
- "day_time_interval",
- [
- {day: 1, millisecond: 100},
- nil,
- {day: 3, millisecond: 300},
- ],
- ],
- [type.to_s, values])
- end
+ def test_day_time_interval
+ array =
+ Arrow::DayTimeIntervalArray.new([
+ {day: 1, millisecond: 100},
+ nil,
+ {day: 3, millisecond: 300},
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "day_time_interval",
+ [
+ {day: 1, millisecond: 100},
+ nil,
+ {day: 3, millisecond: 300},
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_month_day_nano_interval
- array =
- Arrow::MonthDayNanoIntervalArray.new([
- {
- month: 1,
- day: 1,
- nanosecond: 100,
- },
- nil,
- {
- month: 3,
- day: 3,
- nanosecond: 300,
- },
- ])
- type, values = roundtrip(array)
- assert_equal([
- "month_day_nano_interval",
- [
- {
- month: 1,
- day: 1,
- nanosecond: 100,
- },
- nil,
- {
- month: 3,
- day: 3,
- nanosecond: 300,
- },
- ],
- ],
- [type.to_s, values])
- end
+ def test_month_day_nano_interval
+ array =
+ Arrow::MonthDayNanoIntervalArray.new([
+ {
+ month: 1,
+ day: 1,
+ nanosecond: 100,
+ },
+ nil,
+ {
+ month: 3,
+ day: 3,
+ nanosecond: 300,
+ },
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "month_day_nano_interval",
+ [
+ {
+ month: 1,
+ day: 1,
+ nanosecond: 100,
+ },
+ nil,
+ {
+ month: 3,
+ day: 3,
+ nanosecond: 300,
+ },
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_duration_second
- array = Arrow::DurationArray.new(:second, [0, nil, 100])
- type, values = roundtrip(array)
- assert_equal(["duration[s]", [0, nil, 100]],
- [type.to_s, values])
- end
+ def test_duration_second
+ array = Arrow::DurationArray.new(:second, [0, nil, 100])
+ type, values = roundtrip(array)
+ assert_equal(["duration[s]", [0, nil, 100]],
+ [type.to_s, values])
+ end
- def test_duration_millisecond
- array = Arrow::DurationArray.new(:milli, [0, nil, 100])
- type, values = roundtrip(array)
- assert_equal(["duration[ms]", [0, nil, 100]],
- [type.to_s, values])
- end
+ def test_duration_millisecond
+ array = Arrow::DurationArray.new(:milli, [0, nil, 100])
+ type, values = roundtrip(array)
+ assert_equal(["duration[ms]", [0, nil, 100]],
+ [type.to_s, values])
+ end
- def test_duration_microsecond
- array = Arrow::DurationArray.new(:micro, [0, nil, 100])
- type, values = roundtrip(array)
- assert_equal(["duration[us]", [0, nil, 100]],
- [type.to_s, values])
- end
+ def test_duration_microsecond
+ array = Arrow::DurationArray.new(:micro, [0, nil, 100])
+ type, values = roundtrip(array)
+ assert_equal(["duration[us]", [0, nil, 100]],
+ [type.to_s, values])
+ end
- def test_duration_nanosecond
- array = Arrow::DurationArray.new(:nano, [0, nil, 100])
- type, values = roundtrip(array)
- assert_equal(["duration[ns]", [0, nil, 100]],
- [type.to_s, values])
- end
+ def test_duration_nanosecond
+ array = Arrow::DurationArray.new(:nano, [0, nil, 100])
+ type, values = roundtrip(array)
+ assert_equal(["duration[ns]", [0, nil, 100]],
+ [type.to_s, values])
+ end
- def test_binary
- array = Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
- type, values = roundtrip(array)
- assert_equal(["binary", ["Hello".b, nil, "World".b]],
- [type.to_s, values])
- end
+ def test_binary
+ array = Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
+ type, values = roundtrip(array)
+ assert_equal(["binary", ["Hello".b, nil, "World".b]],
+ [type.to_s, values])
+ end
- def test_large_binary
- array = Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
- type, values = roundtrip(array)
- assert_equal(["large_binary", ["Hello".b, nil, "World".b]],
- [type.to_s, values])
- end
+ def test_large_binary
+ array = Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
+ type, values = roundtrip(array)
+ assert_equal(["large_binary", ["Hello".b, nil, "World".b]],
+ [type.to_s, values])
+ end
- def test_utf8
- array = Arrow::StringArray.new(["Hello", nil, "World"])
- type, values = roundtrip(array)
- assert_equal(["string", ["Hello", nil, "World"]],
- [type.to_s, values])
- end
+ def test_utf8
+ array = Arrow::StringArray.new(["Hello", nil, "World"])
+ type, values = roundtrip(array)
+ assert_equal(["string", ["Hello", nil, "World"]],
+ [type.to_s, values])
+ end
- def test_large_utf8
- array = Arrow::LargeStringArray.new(["Hello", nil, "World"])
- type, values = roundtrip(array)
- assert_equal(["large_string", ["Hello", nil, "World"]],
- [type.to_s, values])
- end
+ def test_large_utf8
+ array = Arrow::LargeStringArray.new(["Hello", nil, "World"])
+ type, values = roundtrip(array)
+ assert_equal(["large_string", ["Hello", nil, "World"]],
+ [type.to_s, values])
+ end
- def test_fixed_size_binary
- data_type = Arrow::FixedSizeBinaryDataType.new(4)
- array = Arrow::FixedSizeBinaryArray.new(data_type,
- ["0124".b, nil, "abcd".b])
- type, values = roundtrip(array)
- assert_equal(["fixed_size_binary[4]", ["0124".b, nil, "abcd".b]],
- [type.to_s, values])
- end
+ def test_fixed_size_binary
+ data_type = Arrow::FixedSizeBinaryDataType.new(4)
+ array = Arrow::FixedSizeBinaryArray.new(data_type,
+ ["0124".b, nil, "abcd".b])
+ type, values = roundtrip(array)
+ assert_equal(["fixed_size_binary[4]", ["0124".b, nil, "abcd".b]],
+ [type.to_s, values])
+ end
- def test_decimal128
- positive_small = "1.200"
- positive_large = ("1234567890" * 3) + "12345.678"
- negative_small = "-1.200"
- negative_large = "-" + ("1234567890" * 3) + "12345.678"
- array = Arrow::Decimal128Array.new({precision: 38, scale: 3},
- [
- positive_large,
- positive_small,
- nil,
- negative_small,
- negative_large,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "decimal128(38, 3)",
- [
- BigDecimal(positive_large),
- BigDecimal(positive_small),
- nil,
- BigDecimal(negative_small),
- BigDecimal(negative_large),
- ],
- ],
- [type.to_s, values])
- end
+ def test_decimal128
+ positive_small = "1.200"
+ positive_large = ("1234567890" * 3) + "12345.678"
+ negative_small = "-1.200"
+ negative_large = "-" + ("1234567890" * 3) + "12345.678"
+ array = Arrow::Decimal128Array.new({precision: 38, scale: 3},
+ [
+ positive_large,
+ positive_small,
+ nil,
+ negative_small,
+ negative_large,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "decimal128(38, 3)",
+ [
+ BigDecimal(positive_large),
+ BigDecimal(positive_small),
+ nil,
+ BigDecimal(negative_small),
+ BigDecimal(negative_large),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_decimal256
- positive_small = "1.200"
- positive_large = ("1234567890" * 7) + "123.456"
- negative_small = "-1.200"
- negative_large = "-" + ("1234567890" * 7) + "123.456"
- array = Arrow::Decimal256Array.new({precision: 76, scale: 3},
- [
- positive_large,
- positive_small,
- nil,
- negative_small,
- negative_large,
- ])
- type, values = roundtrip(array)
- assert_equal([
- "decimal256(76, 3)",
- [
- BigDecimal(positive_large),
- BigDecimal(positive_small),
- nil,
- BigDecimal(negative_small),
- BigDecimal(negative_large),
- ],
- ],
- [type.to_s, values])
- end
+ def test_decimal256
+ positive_small = "1.200"
+ positive_large = ("1234567890" * 7) + "123.456"
+ negative_small = "-1.200"
+ negative_large = "-" + ("1234567890" * 7) + "123.456"
+ array = Arrow::Decimal256Array.new({precision: 76, scale: 3},
+ [
+ positive_large,
+ positive_small,
+ nil,
+ negative_small,
+ negative_large,
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "decimal256(76, 3)",
+ [
+ BigDecimal(positive_large),
+ BigDecimal(positive_small),
+ nil,
+ BigDecimal(negative_small),
+ BigDecimal(negative_large),
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_list
- data_type = Arrow::ListDataType.new(name: "count", type: :int8)
- array = Arrow::ListArray.new(data_type,
- [[-128, 127], nil, [-1, 0, 1]])
- type, values = roundtrip(array)
- assert_equal([
- "list<count: int8>",
- [[-128, 127], nil, [-1, 0, 1]],
- ],
- [type.to_s, values])
- end
+ def test_list
+ data_type = Arrow::ListDataType.new(name: "count", type: :int8)
+ array = Arrow::ListArray.new(data_type,
+ [[-128, 127], nil, [-1, 0, 1]])
+ type, values = roundtrip(array)
+ assert_equal([
+ "list<count: int8>",
+ [[-128, 127], nil, [-1, 0, 1]],
+ ],
+ [type.to_s, values])
+ end
- def test_large_lsit
- data_type = Arrow::LargeListDataType.new(name: "count",
- type: :int8)
- array = Arrow::LargeListArray.new(data_type,
- [[-128, 127], nil, [-1, 0, 1]])
- type, values = roundtrip(array)
- assert_equal([
- "large_list<count: int8>",
- [[-128, 127], nil, [-1, 0, 1]],
- ],
- [type.to_s, values])
- end
+ def test_large_list
+ data_type = Arrow::LargeListDataType.new(name: "count",
+ type: :int8)
+ array = Arrow::LargeListArray.new(data_type,
+ [[-128, 127], nil, [-1, 0, 1]])
+ type, values = roundtrip(array)
+ assert_equal([
+ "large_list<count: int8>",
+ [[-128, 127], nil, [-1, 0, 1]],
+ ],
+ [type.to_s, values])
+ end
- def test_map
- data_type = Arrow::MapDataType.new(:string, :int8)
- array = Arrow::MapArray.new(data_type,
- [
- {"a" => -128, "b" => 127},
- nil,
- {"c" => nil},
- ])
- type, values = roundtrip(array)
- assert_equal([
- "map<string, int8>",
- [
- {"a" => -128, "b" => 127},
- nil,
- {"c" => nil},
- ],
- ],
- [type.to_s, values])
- end
+ def test_map
+ data_type = Arrow::MapDataType.new(:string, :int8)
+ array = Arrow::MapArray.new(data_type,
+ [
+ {"a" => -128, "b" => 127},
+ nil,
+ {"c" => nil},
+ ])
+ type, values = roundtrip(array)
+ assert_equal([
+ "map<string, int8>",
+ [
+ {"a" => -128, "b" => 127},
+ nil,
+ {"c" => nil},
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_struct
- data_type = Arrow::StructDataType.new(count: :int8,
- visible: :boolean)
- array = Arrow::StructArray.new(data_type,
- [[-128, nil], nil, [nil, true]])
- type, values = roundtrip(array)
- assert_equal([
- "struct<count: int8, visible: bool>",
- [
- {"count" => -128, "visible" => nil},
- nil,
- {"count" => nil, "visible" => true},
- ],
- ],
- [type.to_s, values])
- end
+ def test_struct
+ data_type = Arrow::StructDataType.new(count: :int8,
+ visible: :boolean)
+ array = Arrow::StructArray.new(data_type,
+ [[-128, nil], nil, [nil, true]])
+ type, values = roundtrip(array)
+ assert_equal([
+ "struct<count: int8, visible: bool>",
+ [
+ {"count" => -128, "visible" => nil},
+ nil,
+ {"count" => nil, "visible" => true},
+ ],
+ ],
+ [type.to_s, values])
+ end
- def test_dense_union
- fields = [
- Arrow::Field.new("number", :int8),
- Arrow::Field.new("text", :string),
- ]
- type_ids = [11, 13]
- data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
- types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
- value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
- children = [
- Arrow::Int8Array.new([1, nil]),
- Arrow::StringArray.new(["a", "b", "c"])
- ]
- array = Arrow::DenseUnionArray.new(data_type,
- types,
- value_offsets,
- children)
- type, values = roundtrip(array)
- assert_equal([
- "dense_union<number: int8=11, text: string=13>",
- [1, "a", nil, "b", "c"],
- ],
- [type.to_s, values])
- end
+ def test_dense_union
+ fields = [
+ Arrow::Field.new("number", :int8),
+ Arrow::Field.new("text", :string),
+ ]
+ type_ids = [11, 13]
+ data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
+ types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
+ value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
+ children = [
+ Arrow::Int8Array.new([1, nil]),
+ Arrow::StringArray.new(["a", "b", "c"])
+ ]
+ array = Arrow::DenseUnionArray.new(data_type,
+ types,
+ value_offsets,
+ children)
+ type, values = roundtrip(array)
+ assert_equal([
+ "dense_union<number: int8=11, text: string=13>",
+ [1, "a", nil, "b", "c"],
+ ],
+ [type.to_s, values])
+ end
- def test_sparse_union
- fields = [
- Arrow::Field.new("number", :int8),
- Arrow::Field.new("text", :string),
- ]
- type_ids = [11, 13]
- data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
- types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
- children = [
- Arrow::Int8Array.new([1, nil, nil, nil, 5]),
- Arrow::StringArray.new([nil, "b", nil, "d", nil])
- ]
- array = Arrow::SparseUnionArray.new(data_type, types, children)
- type, values = roundtrip(array)
- assert_equal([
- "sparse_union<number: int8=11, text: string=13>",
- [1, "b", nil, "d", 5],
- ],
- [type.to_s, values])
- end
+ def test_sparse_union
+ fields = [
+ Arrow::Field.new("number", :int8),
+ Arrow::Field.new("text", :string),
+ ]
+ type_ids = [11, 13]
+ data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
+ types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
+ children = [
+ Arrow::Int8Array.new([1, nil, nil, nil, 5]),
+ Arrow::StringArray.new([nil, "b", nil, "d", nil])
+ ]
+ array = Arrow::SparseUnionArray.new(data_type, types, children)
+ type, values = roundtrip(array)
+ assert_equal([
+ "sparse_union<number: int8=11, text: string=13>",
+ [1, "b", nil, "d", 5],
+ ],
+ [type.to_s, values])
+ end
- def test_dictionary
- values = ["a", "b", "c", nil, "a"]
- string_array = Arrow::StringArray.new(values)
- array = string_array.dictionary_encode
- type, values = roundtrip(array)
- assert_equal([
- "dictionary<values=string, " +
- "indices=int32, " +
- "ordered=0>",
- ["a", "b", "c", nil, "a"],
- ],
- [type.to_s, values])
- end
+ def test_dictionary
+ values = ["a", "b", "c", nil, "a"]
+ string_array = Arrow::StringArray.new(values)
+ array = string_array.dictionary_encode
+ type, values = roundtrip(array)
+ assert_equal([
+ "dictionary<values=string, " +
+ "indices=int32, " +
+ "ordered=0>",
+ ["a", "b", "c", nil, "a"],
+ ],
+ [type.to_s, values])
+ end
+end
- def build_dictionary_delta_schema(value_type)
- index_type = ArrowFormat::Int32Type.singleton
- ordered = false
- type = ArrowFormat::DictionaryType.new(index_type,
- value_type,
- ordered)
- nullable = true
- dictionary_id = 1
- field = ArrowFormat::Field.new("value",
- type,
- nullable,
- dictionary_id)
- ArrowFormat::Schema.new([field])
- end
+module WriterDictionaryDeltaTests
+ def build_schema(value_type)
+ index_type = ArrowFormat::Int32Type.singleton
+ ordered = false
+ type = ArrowFormat::DictionaryType.new(index_type,
+ value_type,
+ ordered)
+ nullable = true
+ dictionary_id = 1
+ field = ArrowFormat::Field.new("value",
+ type,
+ nullable,
+ dictionary_id)
+ ArrowFormat::Schema.new([field])
+ end
- def build_dictionary_array(type, indices, dictionary)
- indices_buffer = IO::Buffer.for(indices.pack("l<*"))
- ArrowFormat::DictionaryArray.new(type,
- indices.size,
- nil,
- indices_buffer,
- dictionary)
- end
+ def build_dictionary_array(type, indices, dictionaries)
+ indices_buffer = IO::Buffer.for(indices.pack("l<*"))
+ ArrowFormat::DictionaryArray.new(type,
+ indices.size,
+ nil,
+ indices_buffer,
+ dictionaries)
+ end
- def test_dictionary_delta_utf8
- value_type = ArrowFormat::UTF8Type.singleton
- schema = build_dictionary_delta_schema(value_type)
- type = schema.fields[0].type
-
- dictionary = convert_array(Arrow::StringArray.new(["a", "b", "c"]))
- # ["c", "a", "b", "a", "a"]
- indices = [2, 0, 1, 0, 0]
- array = build_dictionary_array(type, indices, dictionary)
- record_batch =
- ArrowFormat::RecordBatch.new(schema, array.size, [array])
-
- dictionary_more =
- convert_array(Arrow::StringArray.new(["a", "b", "c", "d", "e"]))
- # ["e", "a", "c", "d", "b", "d"]
- indices = [4, 0, 2, 3, 1, 3]
- array = build_dictionary_array(type, indices, dictionary_more)
- record_batch_delta =
- ArrowFormat::RecordBatch.new(schema, array.size, [array])
-
- type, values = roundtrip(record_batch, record_batch_delta)
- assert_equal([
- "dictionary<values=string, " +
- "indices=int32, " +
- "ordered=0>",
- ["c", "a", "b", "a", "a"] +
- ["e", "a", "c", "d", "b", "d"],
- ],
- [type.to_s, values])
- end
+ def build_record_batches(red_arrow_value_type, values1, values2)
+ value_type = convert_type(red_arrow_value_type)
+ schema = build_schema(value_type)
+ type = schema.fields[0].type
+
+ # The first record batch with new dictionary.
+ raw_dictionary = values1.uniq
+ red_arrow_dictionary =
+ red_arrow_value_type.build_array(raw_dictionary)
+ dictionary = convert_array(red_arrow_dictionary)
+ indices1 = values1.collect do |value|
+ raw_dictionary.index(value)
+ end
+ array1 = build_dictionary_array(type, indices1, [dictionary])
+ record_batch =
+ ArrowFormat::RecordBatch.new(schema, array1.size, [array1])
+
+ if chunked_dictionaries?
+ # The second record batch with the first dictionary and
+ # a delta dictionary.
+ raw_dictionary_delta = (values2.uniq - raw_dictionary)
+ raw_dictionary_more = raw_dictionary + raw_dictionary_delta
+ red_arrow_dictionary_delta =
+ red_arrow_value_type.build_array(raw_dictionary_delta)
+ dictionary_delta = convert_array(red_arrow_dictionary_delta)
+ indices2 = values2.collect do |value|
+ raw_dictionary_more.index(value)
+ end
+ array2 = build_dictionary_array(type,
+ indices2,
+ [dictionary, dictionary_delta])
+ else
+ # The second record batch with the combined dictionary.
+ raw_dictionary_more = raw_dictionary | values2.uniq
+ red_arrow_dictionary_more =
+ red_arrow_value_type.build_array(raw_dictionary_more)
+ dictionary_more = convert_array(red_arrow_dictionary_more)
+ indices2 = values2.collect do |value|
+ raw_dictionary_more.index(value)
end
+ array2 = build_dictionary_array(type,
+ indices2,
+ [dictionary_more])
end
+ record_batch_delta =
+ ArrowFormat::RecordBatch.new(schema, array2.size, [array2])
+
+ [record_batch, record_batch_delta]
+ end
+
+ def roundtrip(value_type, values1, values2)
+ r = build_record_batches(value_type, values1, values2)
+ GC.start
+ super(*r)
+ end
+
+ def test_boolean
+ value_type = Arrow::BooleanDataType.new
+ values1 = [true, true]
+ values2 = [false, true, false]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=bool, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_int8
+ value_type = Arrow::Int8DataType.new
+ values1 = [-128, 0, -128]
+ values2 = [127, -128, 0, 127]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=int8, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_uint8
+ value_type = Arrow::UInt8DataType.new
+ values1 = [1, 0, 1]
+ values2 = [255, 0, 1, 255]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=uint8, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_int16
+ value_type = Arrow::Int16DataType.new
+ values1 = [-32768, 0, -32768]
+ values2 = [32767, -32768, 0, 32767]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=int16, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_uint16
+ value_type = Arrow::UInt16DataType.new
+ values1 = [1, 0, 1]
+ values2 = [65535, 0, 1, 65535]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=uint16, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_int32
+ value_type = Arrow::Int32DataType.new
+ values1 = [-2147483648, 0, -2147483648]
+ values2 = [2147483647, -2147483648, 0, 2147483647]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=int32, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_uint32
+ value_type = Arrow::UInt32DataType.new
+ values1 = [1, 0, 1]
+ values2 = [4294967295, 0, 1, 4294967295]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=uint32, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_int64
+ value_type = Arrow::Int64DataType.new
+ values1 = [
+ -9223372036854775808,
+ 0,
+ -9223372036854775808,
+ ]
+ values2 = [
+ 9223372036854775807,
+ -9223372036854775808,
+ 0,
+ 9223372036854775807,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=int64, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_uint64
+ value_type = Arrow::UInt64DataType.new
+ values1 = [1, 0, 1]
+ values2 = [
+ 18446744073709551615,
+ 0,
+ 1,
+ 18446744073709551615,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=uint64, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_float32
+ value_type = Arrow::FloatDataType.new
+ values1 = [-0.5, 0.0, -0.5]
+ values2 = [0.5, -0.5, 0.0, 0.5]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=float, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_float64
+ value_type = Arrow::DoubleDataType.new
+ values1 = [-0.5, 0.0, -0.5]
+ values2 = [0.5, -0.5, 0.0, 0.5]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=double, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_date32
+ date_2017_08_28 = 17406
+ date_2025_12_09 = 20431
+ value_type = Arrow::Date32DataType.new
+ values1 = [date_2017_08_28, date_2017_08_28]
+ values2 = [date_2025_12_09, date_2017_08_28, date_2025_12_09]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=date32[day], " +
+ "indices=int32, " +
+ "ordered=0>",
+ [
+ Date.new(2017, 8, 28),
+ Date.new(2017, 8, 28),
+ Date.new(2025, 12, 9),
+ Date.new(2017, 8, 28),
+ Date.new(2025, 12, 9),
+ ],
+ ],
+ [type.to_s, values])
+ end
+
+ def test_date64
+ date_2017_08_28_00_00_00 = 1503878400000
+ date_2025_12_10_00_00_00 = 1765324800000
+ value_type = Arrow::Date64DataType.new
+ values1 = [date_2017_08_28_00_00_00, date_2017_08_28_00_00_00]
+ values2 = [
+ date_2025_12_10_00_00_00,
+ date_2017_08_28_00_00_00,
+ date_2025_12_10_00_00_00,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=date64[ms], " +
+ "indices=int32, " +
+ "ordered=0>",
+ [
+ DateTime.new(2017, 8, 28),
+ DateTime.new(2017, 8, 28),
+ DateTime.new(2025, 12, 10),
+ DateTime.new(2017, 8, 28),
+ DateTime.new(2025, 12, 10),
+ ],
+ ],
+ [type.to_s, values])
+ end
+
+ def test_time32
+ time_00_00_10 = 10
+ time_00_01_10 = 60 + 10
+ value_type = Arrow::Time32DataType.new(:second)
+ values1 = [time_00_00_10, time_00_00_10]
+ values2 = [time_00_01_10, time_00_00_10, time_00_01_10]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=time32[s], " +
+ "indices=int32, " +
+ "ordered=0>",
+ [
+ Arrow::Time.new(:second, time_00_00_10),
+ Arrow::Time.new(:second, time_00_00_10),
+ Arrow::Time.new(:second, time_00_01_10),
+ Arrow::Time.new(:second, time_00_00_10),
+ Arrow::Time.new(:second, time_00_01_10),
+ ],
+ ],
+ [type.to_s, values])
+ end
+
+ def test_time64
+ time_00_00_10_000_000 = 10 * 1_000_000
+ time_00_01_10_000_000 = (60 + 10) * 1_000_000
+ value_type = Arrow::Time64DataType.new(:micro)
+ values1 = [time_00_00_10_000_000, time_00_00_10_000_000]
+ values2 = [
+ time_00_01_10_000_000,
+ time_00_00_10_000_000,
+ time_00_01_10_000_000,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=time64[us], " +
+ "indices=int32, " +
+ "ordered=0>",
+ [
+ Arrow::Time.new(:micro, time_00_00_10_000_000),
+ Arrow::Time.new(:micro, time_00_00_10_000_000),
+ Arrow::Time.new(:micro, time_00_01_10_000_000),
+ Arrow::Time.new(:micro, time_00_00_10_000_000),
+ Arrow::Time.new(:micro, time_00_01_10_000_000),
+ ],
+ ],
+ [type.to_s, values])
+ end
+
+ def test_timestamp
+ timestamp_2019_11_17_15_09_11 = 1574003351
+ timestamp_2025_12_16_05_33_58 = 1765863238
+ value_type = Arrow::TimestampDataType.new(:second)
+ values1 = [
+ timestamp_2019_11_17_15_09_11,
+ timestamp_2019_11_17_15_09_11,
+ ]
+ values2 = [
+ timestamp_2025_12_16_05_33_58,
+ timestamp_2019_11_17_15_09_11,
+ timestamp_2025_12_16_05_33_58,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=timestamp[s], " +
+ "indices=int32, " +
+ "ordered=0>",
+ [
+ Time.at(timestamp_2019_11_17_15_09_11),
+ Time.at(timestamp_2019_11_17_15_09_11),
+ Time.at(timestamp_2025_12_16_05_33_58),
+ Time.at(timestamp_2019_11_17_15_09_11),
+ Time.at(timestamp_2025_12_16_05_33_58),
+ ],
+ ],
+ [type.to_s, values])
+ end
+
+ def test_year_month_interval
+ value_type = Arrow::MonthIntervalDataType.new
+ values1 = [100, 0, 100]
+ values2 = [1000, 100, 0, 1000]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=month_interval, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_day_time_interval
+ value_type = Arrow::DayTimeIntervalDataType.new
+ values1 = [
+ {day: 1, millisecond: 100},
+ {day: 1, millisecond: 100},
+ ]
+ values2 = [
+ {day: 3, millisecond: 300},
+ {day: 1, millisecond: 100},
+ {day: 3, millisecond: 300},
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=day_time_interval, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_month_day_nano_interval
+ value_type = Arrow::MonthDayNanoIntervalDataType.new
+ values1 = [
+ {month: 1, day: 1, nanosecond: 100},
+ {month: 1, day: 1, nanosecond: 100},
+ ]
+ values2 = [
+ {month: 3, day: 3, nanosecond: 300},
+ {month: 1, day: 1, nanosecond: 100},
+ {month: 3, day: 3, nanosecond: 300},
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=month_day_nano_interval, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_duration
+ value_type = Arrow::DurationDataType.new(:second)
+ values1 = [100, 0, 100]
+ values2 = [1000, 100, 0, 1000]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=duration[s], " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_binary
+ value_type = Arrow::BinaryDataType.new
+ values1 = ["ab".b, "c".b, "ab".b]
+ values2 = ["c".b, "de".b, "ab".b, "de".b]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=binary, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_large_binary
+ value_type = Arrow::LargeBinaryDataType.new
+ values1 = ["ab".b, "c".b, "ab".b]
+ values2 = ["c".b, "de".b, "ab".b, "de".b]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=large_binary, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_utf8
+ value_type = Arrow::StringDataType.new
+ values1 = ["ab", "c", "ab"]
+ values2 = ["c", "de", "ab", "de"]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=string, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_large_utf8
+ value_type = Arrow::LargeStringDataType.new
+ values1 = ["ab", "c", "ab"]
+ values2 = ["c", "de", "ab", "de"]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=large_string, " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_fixed_size_binary
+ value_type = Arrow::FixedSizeBinaryDataType.new(2)
+ values1 = ["ab".b, "cd".b, "ab".b]
+ values2 = ["ef".b, "cd".b, "ab".b, "ef".b]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=fixed_size_binary[2], " +
+ "indices=int32, " +
+ "ordered=0>",
+ values1 + values2,
+ ],
+ [type.to_s, values])
+ end
+
+ def test_decimal128
+ positive_small = "1.200"
+ positive_large = ("1234567890" * 3) + "12345.678"
+ negative_small = "-1.200"
+ negative_large = "-" + ("1234567890" * 3) + "12345.678"
+ value_type = Arrow::Decimal128DataType.new(precision: 38,
+ scale: 3)
+ values1 = [positive_small, negative_small, positive_small]
+ values2 = [
+ positive_large,
+ positive_small,
+ negative_small,
+ positive_large,
+ negative_large,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=decimal128(38, 3), " +
+ "indices=int32, " +
+ "ordered=0>",
+ (values1 + values2).collect {|v| BigDecimal(v)},
+ ],
+ [type.to_s, values])
+ end
+
+ def test_decimal256
+ positive_small = "1.200"
+ positive_large = ("1234567890" * 7) + "123.456"
+ negative_small = "-1.200"
+ negative_large = "-" + ("1234567890" * 7) + "123.456"
+ value_type = Arrow::Decimal256DataType.new(precision: 76,
+ scale: 3)
+ values1 = [positive_small, negative_small, positive_small]
+ values2 = [
+ positive_large,
+ positive_small,
+ negative_small,
+ positive_large,
+ negative_large,
+ ]
+ type, values = roundtrip(value_type, values1, values2)
+ assert_equal([
+ "dictionary<values=decimal256(76, 3), " +
+ "indices=int32, " +
+ "ordered=0>",
+ (values1 + values2).collect {|v| BigDecimal(v)},
+ ],
+ [type.to_s, values])
end
end
class TestFileWriter < Test::Unit::TestCase
- include WriterTests
+ include WriterHelper
def file_extension
"arrow"
@@ -926,10 +1433,43 @@ class TestFileWriter < Test::Unit::TestCase
def writer_class
ArrowFormat::FileWriter
end
+
+ def read(path)
+ File.open(path, "rb") do |input|
+ reader = ArrowFormat::FileReader.new(input)
+ reader.to_a.collect do |record_batch|
+ record_batch.to_h.tap do |hash|
+ hash.each do |key, value|
+ hash[key] = value.to_a
+ end
+ end
+ end
+ end
+ end
+
+ sub_test_case("Basic") do
+ include WriterTests
+ end
+
+ sub_test_case("Dictionary: delta") do
+ include WriterDictionaryDeltaTests
+
+ def chunked_dictionaries?
+ true
+ end
+ end
+
+ sub_test_case("Dictionary: delta: slice") do
+ include WriterDictionaryDeltaTests
+
+ def chunked_dictionaries?
+ false
+ end
+ end
end
class TestStreamingWriter < Test::Unit::TestCase
- include WriterTests
+ include WriterHelper
def file_extension
"arrows"
@@ -938,4 +1478,37 @@ class TestStreamingWriter < Test::Unit::TestCase
def writer_class
ArrowFormat::StreamingWriter
end
+
+ def read(path)
+ File.open(path, "rb") do |input|
+ reader = ArrowFormat::StreamingReader.new(input)
+ reader.collect do |record_batch|
+ record_batch.to_h.tap do |hash|
+ hash.each do |key, value|
+ hash[key] = value.to_a
+ end
+ end
+ end
+ end
+ end
+
+ sub_test_case("Basic") do
+ include WriterTests
+ end
+
+ sub_test_case("Dictionary: delta") do
+ include WriterDictionaryDeltaTests
+
+ def chunked_dictionaries?
+ true
+ end
+ end
+
+ sub_test_case("Dictionary: delta: slice") do
+ include WriterDictionaryDeltaTests
+
+ def chunked_dictionaries?
+ false
+ end
+ end
end
diff --git a/ruby/red-arrow/ext/arrow/converters.hpp
b/ruby/red-arrow/ext/arrow/converters.hpp
index b4838c8f79..099aa91686 100644
--- a/ruby/red-arrow/ext/arrow/converters.hpp
+++ b/ruby/red-arrow/ext/arrow/converters.hpp
@@ -902,7 +902,9 @@ namespace red_arrow {
VISIT(Float)
VISIT(Double)
VISIT(Binary)
+ VISIT(LargeBinary)
VISIT(String)
+ VISIT(LargeString)
VISIT(FixedSizeBinary)
VISIT(Date32)
VISIT(Date64)
diff --git a/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
b/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
index 09d472b215..2a4966316a 100644
--- a/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
@@ -153,6 +153,16 @@ module RawRecordsDictionaryArrayTests
assert_equal(records, actual_records(target))
end
+ def test_large_binary
+ records = [
+ ["\x00".b],
+ [nil],
+ ["\xff".b],
+ ]
+ target = build(Arrow::LargeBinaryArray.new(records.collect(&:first)))
+ assert_equal(records, actual_records(target))
+ end
+
def test_string
records = [
["Ruby"],
@@ -163,6 +173,16 @@ module RawRecordsDictionaryArrayTests
assert_equal(records, actual_records(target))
end
+ def test_large_string
+ records = [
+ ["Ruby"],
+ [nil],
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
+ ]
+ target = build(Arrow::LargeStringArray.new(records.collect(&:first)))
+ assert_equal(records, actual_records(target))
+ end
+
def test_date32
records = [
[Date.new(1960, 1, 1)],
diff --git a/ruby/red-arrow/test/values/test-dictionary-array.rb
b/ruby/red-arrow/test/values/test-dictionary-array.rb
index 115656b7d7..f06c0427fc 100644
--- a/ruby/red-arrow/test/values/test-dictionary-array.rb
+++ b/ruby/red-arrow/test/values/test-dictionary-array.rb
@@ -137,6 +137,16 @@ module ValuesDictionaryArrayTests
assert_equal(values, target.values)
end
+ def test_large_binary
+ values = [
+ "\x00".b,
+ nil,
+ "\xff".b,
+ ]
+ target = build(Arrow::LargeBinaryArray.new(values))
+ assert_equal(values, target.values)
+ end
+
def test_string
values = [
"Ruby",
@@ -147,6 +157,16 @@ module ValuesDictionaryArrayTests
assert_equal(values, target.values)
end
+ def test_large_string
+ values = [
+ "Ruby",
+ nil,
+ "\u3042", # U+3042 HIRAGANA LETTER A
+ ]
+ target = build(Arrow::LargeStringArray.new(values))
+ assert_equal(values, target.values)
+ end
+
def test_date32
values = [
Date.new(1960, 1, 1),