This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b85f20bdb1 GH-48992: [Ruby] Add support for writing large UTF-8 array
(#48993)
b85f20bdb1 is described below
commit b85f20bdb19d5e7dce0987ba844424ed4a0e47f2
Author: Sutou Kouhei <[email protected]>
AuthorDate: Tue Jan 27 20:02:36 2026 +0900
GH-48992: [Ruby] Add support for writing large UTF-8 array (#48993)
### Rationale for this change
It's a large variant of UTF-8 array.
### What changes are included in this PR?
* Add `ArrowFormat::LargeUTF8Type#to_flatbuffers`
* Add support for large UTF-8 array of `#values` and `#raw_records`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48992
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/type.rb | 4 ++++
ruby/red-arrow-format/test/test-writer.rb | 13 +++++++++++++
ruby/red-arrow/ext/arrow/converters.hpp | 8 ++++++++
ruby/red-arrow/ext/arrow/raw-records.cpp | 2 ++
ruby/red-arrow/ext/arrow/values.cpp | 1 +
ruby/red-arrow/test/raw-records/test-basic-arrays.rb | 10 ++++++++++
ruby/red-arrow/test/values/test-basic-arrays.rb | 10 ++++++++++
7 files changed, 48 insertions(+)
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index b3b5bf7aba..a114ef225b 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -574,6 +574,10 @@ module ArrowFormat
offsets_buffer,
values_buffer)
end
+
+ def to_flatbuffers
+ FB::LargeUtf8::Data.new
+ end
end
class FixedSizeBinaryType < Type
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index 31c2bef299..6eb1273b7a 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -52,6 +52,8 @@ module WriterTests
ArrowFormat::LargeBinaryType.singleton
when Arrow::StringDataType
ArrowFormat::UTF8Type.singleton
+ when Arrow::LargeStringDataType
+ ArrowFormat::LargeUTF8Type.singleton
else
raise "Unsupported type: #{red_arrow_type.inspect}"
end
@@ -298,6 +300,17 @@ module WriterTests
@values)
end
end
+
+ sub_test_case("LargeString") do
+ def build_array
+ Arrow::LargeStringArray.new(["Hello", nil, "World"])
+ end
+
+ def test_write
+ assert_equal(["Hello", nil, "World"],
+ @values)
+ end
+ end
end
end
end
diff --git a/ruby/red-arrow/ext/arrow/converters.hpp
b/ruby/red-arrow/ext/arrow/converters.hpp
index 9525700eba..6a1ceb20b8 100644
--- a/ruby/red-arrow/ext/arrow/converters.hpp
+++ b/ruby/red-arrow/ext/arrow/converters.hpp
@@ -175,6 +175,14 @@ namespace red_arrow {
length);
}
+ inline VALUE convert(const arrow::LargeStringArray& array,
+ const int64_t i) {
+ int64_t length;
+ const auto value = array.GetValue(i, &length);
+ return rb_utf8_str_new(reinterpret_cast<const char*>(value),
+ length);
+ }
+
inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
const int64_t i) {
return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp
b/ruby/red-arrow/ext/arrow/raw-records.cpp
index 25a95379ef..67f1dab13e 100644
--- a/ruby/red-arrow/ext/arrow/raw-records.cpp
+++ b/ruby/red-arrow/ext/arrow/raw-records.cpp
@@ -90,6 +90,7 @@ namespace red_arrow {
VISIT(Binary)
VISIT(LargeBinary)
VISIT(String)
+ VISIT(LargeString)
VISIT(FixedSizeBinary)
VISIT(Date32)
VISIT(Date64)
@@ -227,6 +228,7 @@ namespace red_arrow {
VISIT(Binary)
VISIT(LargeBinary)
VISIT(String)
+ VISIT(LargeString)
VISIT(FixedSizeBinary)
VISIT(Date32)
VISIT(Date64)
diff --git a/ruby/red-arrow/ext/arrow/values.cpp
b/ruby/red-arrow/ext/arrow/values.cpp
index 783cdb3d7d..9a26baf1d5 100644
--- a/ruby/red-arrow/ext/arrow/values.cpp
+++ b/ruby/red-arrow/ext/arrow/values.cpp
@@ -71,6 +71,7 @@ namespace red_arrow {
VISIT(Binary)
VISIT(LargeBinary)
VISIT(String)
+ VISIT(LargeString)
VISIT(FixedSizeBinary)
VISIT(Date32)
VISIT(Date64)
diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
index f09b2e8b71..1c21a493c5 100644
--- a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
@@ -177,6 +177,16 @@ module RawRecordsBasicArraysTests
assert_equal(records, actual_records(target))
end
+ def test_large_string
+ records = [
+ ["Ruby"],
+ [nil],
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
+ ]
+ target = build({column: :large_string}, records)
+ assert_equal(records, actual_records(target))
+ end
+
def test_date32
records = [
[Date.new(1960, 1, 1)],
diff --git a/ruby/red-arrow/test/values/test-basic-arrays.rb
b/ruby/red-arrow/test/values/test-basic-arrays.rb
index ed96a61bd0..ddaaa3db64 100644
--- a/ruby/red-arrow/test/values/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/values/test-basic-arrays.rb
@@ -167,6 +167,16 @@ module ValuesBasicArraysTests
assert_equal(values, target.values)
end
+ def test_large_string
+ values = [
+ "Ruby",
+ nil,
+ "\u3042", # U+3042 HIRAGANA LETTER A
+ ]
+ target = build(Arrow::LargeStringArray.new(values))
+ assert_equal(values, target.values)
+ end
+
def test_date32
values = [
Date.new(1960, 1, 1),