This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b85f20bdb1 GH-48992: [Ruby] Add support for writing large UTF-8 array 
(#48993)
b85f20bdb1 is described below

commit b85f20bdb19d5e7dce0987ba844424ed4a0e47f2
Author: Sutou Kouhei <[email protected]>
AuthorDate: Tue Jan 27 20:02:36 2026 +0900

    GH-48992: [Ruby] Add support for writing large UTF-8 array (#48993)
    
    ### Rationale for this change
    
    It's a large variant of UTF-8 array.
    
    ### What changes are included in this PR?
    
    * Add `ArrowFormat::LargeUTF8Type#to_flatbuffers`
    * Add support for large UTF-8 array of `#values` and `#raw_records`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #48992
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ruby/red-arrow-format/lib/arrow-format/type.rb       |  4 ++++
 ruby/red-arrow-format/test/test-writer.rb            | 13 +++++++++++++
 ruby/red-arrow/ext/arrow/converters.hpp              |  8 ++++++++
 ruby/red-arrow/ext/arrow/raw-records.cpp             |  2 ++
 ruby/red-arrow/ext/arrow/values.cpp                  |  1 +
 ruby/red-arrow/test/raw-records/test-basic-arrays.rb | 10 ++++++++++
 ruby/red-arrow/test/values/test-basic-arrays.rb      | 10 ++++++++++
 7 files changed, 48 insertions(+)

diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index b3b5bf7aba..a114ef225b 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -574,6 +574,10 @@ module ArrowFormat
                          offsets_buffer,
                          values_buffer)
     end
+
+    def to_flatbuffers
+      FB::LargeUtf8::Data.new
+    end
   end
 
   class FixedSizeBinaryType < Type
diff --git a/ruby/red-arrow-format/test/test-writer.rb 
b/ruby/red-arrow-format/test/test-writer.rb
index 31c2bef299..6eb1273b7a 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -52,6 +52,8 @@ module WriterTests
       ArrowFormat::LargeBinaryType.singleton
     when Arrow::StringDataType
       ArrowFormat::UTF8Type.singleton
+    when Arrow::LargeStringDataType
+      ArrowFormat::LargeUTF8Type.singleton
     else
       raise "Unsupported type: #{red_arrow_type.inspect}"
     end
@@ -298,6 +300,17 @@ module WriterTests
                          @values)
           end
         end
+
+        sub_test_case("LargeString") do
+          def build_array
+            Arrow::LargeStringArray.new(["Hello", nil, "World"])
+          end
+
+          def test_write
+            assert_equal(["Hello", nil, "World"],
+                         @values)
+          end
+        end
       end
     end
   end
diff --git a/ruby/red-arrow/ext/arrow/converters.hpp 
b/ruby/red-arrow/ext/arrow/converters.hpp
index 9525700eba..6a1ceb20b8 100644
--- a/ruby/red-arrow/ext/arrow/converters.hpp
+++ b/ruby/red-arrow/ext/arrow/converters.hpp
@@ -175,6 +175,14 @@ namespace red_arrow {
                              length);
     }
 
+    inline VALUE convert(const arrow::LargeStringArray& array,
+                         const int64_t i) {
+      int64_t length;
+      const auto value = array.GetValue(i, &length);
+      return rb_utf8_str_new(reinterpret_cast<const char*>(value),
+                             length);
+    }
+
     inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
                          const int64_t i) {
       return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp 
b/ruby/red-arrow/ext/arrow/raw-records.cpp
index 25a95379ef..67f1dab13e 100644
--- a/ruby/red-arrow/ext/arrow/raw-records.cpp
+++ b/ruby/red-arrow/ext/arrow/raw-records.cpp
@@ -90,6 +90,7 @@ namespace red_arrow {
       VISIT(Binary)
       VISIT(LargeBinary)
       VISIT(String)
+      VISIT(LargeString)
       VISIT(FixedSizeBinary)
       VISIT(Date32)
       VISIT(Date64)
@@ -227,6 +228,7 @@ namespace red_arrow {
       VISIT(Binary)
       VISIT(LargeBinary)
       VISIT(String)
+      VISIT(LargeString)
       VISIT(FixedSizeBinary)
       VISIT(Date32)
       VISIT(Date64)
diff --git a/ruby/red-arrow/ext/arrow/values.cpp 
b/ruby/red-arrow/ext/arrow/values.cpp
index 783cdb3d7d..9a26baf1d5 100644
--- a/ruby/red-arrow/ext/arrow/values.cpp
+++ b/ruby/red-arrow/ext/arrow/values.cpp
@@ -71,6 +71,7 @@ namespace red_arrow {
       VISIT(Binary)
       VISIT(LargeBinary)
       VISIT(String)
+      VISIT(LargeString)
       VISIT(FixedSizeBinary)
       VISIT(Date32)
       VISIT(Date64)
diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb 
b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
index f09b2e8b71..1c21a493c5 100644
--- a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
@@ -177,6 +177,16 @@ module RawRecordsBasicArraysTests
     assert_equal(records, actual_records(target))
   end
 
+  def test_large_string
+    records = [
+      ["Ruby"],
+      [nil],
+      ["\u3042"], # U+3042 HIRAGANA LETTER A
+    ]
+    target = build({column: :large_string}, records)
+    assert_equal(records, actual_records(target))
+  end
+
   def test_date32
     records = [
       [Date.new(1960, 1, 1)],
diff --git a/ruby/red-arrow/test/values/test-basic-arrays.rb 
b/ruby/red-arrow/test/values/test-basic-arrays.rb
index ed96a61bd0..ddaaa3db64 100644
--- a/ruby/red-arrow/test/values/test-basic-arrays.rb
+++ b/ruby/red-arrow/test/values/test-basic-arrays.rb
@@ -167,6 +167,16 @@ module ValuesBasicArraysTests
     assert_equal(values, target.values)
   end
 
+  def test_large_string
+    values = [
+      "Ruby",
+      nil,
+      "\u3042", # U+3042 HIRAGANA LETTER A
+    ]
+    target = build(Arrow::LargeStringArray.new(values))
+    assert_equal(values, target.values)
+  end
+
   def test_date32
     values = [
       Date.new(1960, 1, 1),

Reply via email to