This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 07c1c44967 GH-48945: [Ruby] Add support for writing large binary array
(#48946)
07c1c44967 is described below
commit 07c1c44967c564bb399ff1e7ef5341e8bedb279e
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Jan 23 12:11:17 2026 +0900
GH-48945: [Ruby] Add support for writing large binary array (#48946)
### Rationale for this change
It's a large variant of binary array.
### What changes are included in this PR?
* Add `ArrowFormat::LargeBinaryType#to_flatbuffers`
* Add support for `Arrow::LargeBinaryArray#values`
* Add support for `Arrow::LargeBinaryArray` in
`Arrow::{RecordBatch,Table}#raw_records`
* Add support for `Arrow::LargeBinaryArray` in
`Arrow::{RecordBatch,Table}#each_record`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48945
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/type.rb | 4 ++++
ruby/red-arrow-format/test/test-writer.rb | 13 +++++++++++++
ruby/red-arrow/ext/arrow/converters.hpp | 10 +++++++++-
ruby/red-arrow/ext/arrow/raw-records.cpp | 2 ++
ruby/red-arrow/ext/arrow/values.cpp | 1 +
ruby/red-arrow/test/raw-records/test-basic-arrays.rb | 10 ++++++++++
ruby/red-arrow/test/values/test-basic-arrays.rb | 12 +++++++++++-
7 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 7e863b0517..c648e5b631 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -511,6 +511,10 @@ module ArrowFormat
offsets_buffer,
values_buffer)
end
+
+ def to_flatbuffers
+ FB::LargeBinary::Data.new
+ end
end
class UTF8Type < VariableSizeBinaryType
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index 183329336e..24a49b3777 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -44,6 +44,8 @@ module WriterTests
ArrowFormat::Float64Type.singleton
when Arrow::BinaryDataType
ArrowFormat::BinaryType.singleton
+ when Arrow::LargeBinaryDataType
+ ArrowFormat::LargeBinaryType.singleton
when Arrow::StringDataType
ArrowFormat::UTF8Type.singleton
else
@@ -229,6 +231,17 @@ module WriterTests
end
end
+ sub_test_case("LargeBinary") do
+ def build_array
+ Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
+ end
+
+ def test_write
+ assert_equal(["Hello".b, nil, "World".b],
+ @values)
+ end
+ end
+
sub_test_case("String") do
def build_array
Arrow::StringArray.new(["Hello", nil, "World"])
diff --git a/ruby/red-arrow/ext/arrow/converters.hpp
b/ruby/red-arrow/ext/arrow/converters.hpp
index 1689a6805b..9525700eba 100644
--- a/ruby/red-arrow/ext/arrow/converters.hpp
+++ b/ruby/red-arrow/ext/arrow/converters.hpp
@@ -153,7 +153,15 @@ namespace red_arrow {
const int64_t i) {
int32_t length;
const auto value = array.GetValue(i, &length);
- // TODO: encoding support
+ return rb_enc_str_new(reinterpret_cast<const char*>(value),
+ length,
+ rb_ascii8bit_encoding());
+ }
+
+ inline VALUE convert(const arrow::LargeBinaryArray& array,
+ const int64_t i) {
+ int64_t length;
+ const auto value = array.GetValue(i, &length);
return rb_enc_str_new(reinterpret_cast<const char*>(value),
length,
rb_ascii8bit_encoding());
diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp
b/ruby/red-arrow/ext/arrow/raw-records.cpp
index bbe421971d..25a95379ef 100644
--- a/ruby/red-arrow/ext/arrow/raw-records.cpp
+++ b/ruby/red-arrow/ext/arrow/raw-records.cpp
@@ -88,6 +88,7 @@ namespace red_arrow {
VISIT(Float)
VISIT(Double)
VISIT(Binary)
+ VISIT(LargeBinary)
VISIT(String)
VISIT(FixedSizeBinary)
VISIT(Date32)
@@ -224,6 +225,7 @@ namespace red_arrow {
VISIT(Float)
VISIT(Double)
VISIT(Binary)
+ VISIT(LargeBinary)
VISIT(String)
VISIT(FixedSizeBinary)
VISIT(Date32)
diff --git a/ruby/red-arrow/ext/arrow/values.cpp
b/ruby/red-arrow/ext/arrow/values.cpp
index cd92e04d56..783cdb3d7d 100644
--- a/ruby/red-arrow/ext/arrow/values.cpp
+++ b/ruby/red-arrow/ext/arrow/values.cpp
@@ -69,6 +69,7 @@ namespace red_arrow {
VISIT(Float)
VISIT(Double)
VISIT(Binary)
+ VISIT(LargeBinary)
VISIT(String)
VISIT(FixedSizeBinary)
VISIT(Date32)
diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
index cf4fdde401..f09b2e8b71 100644
--- a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
@@ -157,6 +157,16 @@ module RawRecordsBasicArraysTests
assert_equal(records, actual_records(target))
end
+ def test_large_binary
+ records = [
+ ["\x00".b],
+ [nil],
+ ["\xff".b],
+ ]
+ target = build({column: :large_binary}, records)
+ assert_equal(records, actual_records(target))
+ end
+
def test_string
records = [
["Ruby"],
diff --git a/ruby/red-arrow/test/values/test-basic-arrays.rb
b/ruby/red-arrow/test/values/test-basic-arrays.rb
index ae469d1bf0..ed96a61bd0 100644
--- a/ruby/red-arrow/test/values/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/values/test-basic-arrays.rb
@@ -147,7 +147,17 @@ module ValuesBasicArraysTests
assert_equal(values, target.values)
end
- def test_tring
+ def test_large_binary
+ values = [
+ "\x00".b,
+ nil,
+ "\xff".b,
+ ]
+ target = build(Arrow::LargeBinaryArray.new(values))
+ assert_equal(values, target.values)
+ end
+
+ def test_string
values = [
"Ruby",
nil,