This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0249a2e4ca GH-50225: [Ruby] Move merge implementation to
ColumnContainable (#50226)
0249a2e4ca is described below
commit 0249a2e4ca1f159108359cdae8b8f7f2c8eca557
Author: Aaditya Srinivasan <[email protected]>
AuthorDate: Mon Jun 22 14:00:26 2026 +0530
GH-50225: [Ruby] Move merge implementation to ColumnContainable (#50226)
### Rationale for this change
`Arrow::Table#merge` and `Arrow::RecordBatch#merge` had similar
implementations, resulting in duplicated merge logic.
### What changes are included in this PR?
This PR moves the common merge implementation to `Arrow::ColumnContainable`.
Container-specific behavior such as column conversion and creating the
merged container remains implemented in `Arrow::Table` and `Arrow::RecordBatch`.
### Are these changes tested?
Yes.
The existing `RecordBatch` and `Table` test suites were run and passed
successfully.
### Are there any user-facing changes?
No.
* GitHub Issue: #50225
Authored-by: Aaditya Srinivasan <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow/lib/arrow/column-containable.rb | 65 ++++++++++++++++++++++++++
ruby/red-arrow/lib/arrow/record-batch.rb | 64 +++----------------------
ruby/red-arrow/lib/arrow/table.rb | 54 ---------------------
3 files changed, 71 insertions(+), 112 deletions(-)
diff --git a/ruby/red-arrow/lib/arrow/column-containable.rb
b/ruby/red-arrow/lib/arrow/column-containable.rb
index 32cdb7b372..150c126b0c 100644
--- a/ruby/red-arrow/lib/arrow/column-containable.rb
+++ b/ruby/red-arrow/lib/arrow/column-containable.rb
@@ -152,5 +152,70 @@ module Arrow
def column_names
@column_names ||= columns.collect(&:name)
end
+
+ # Merges columns from the given container or Hash and creates
+ # a new container.
+ #
+ # @param other [Hash, self]
+ # The columns to be merged.
+ #
+ # @return [self]
+ def merge(other)
+ added_columns = {}
+ removed_columns = {}
+
+ case other
+ when Hash
+ other.each do |name, value|
+ name = name.to_s
+ if value
+ added_columns[name] = ensure_raw_column(name, value)
+ else
+ removed_columns[name] = true
+ end
+ end
+ when self.class
+ other.columns.each do |column|
+ name = column.name
+ added_columns[name] = ensure_raw_column(name, column)
+ end
+ else
+ message = "merge target must be Hash or #{self.class}: " +
+ "<#{other.inspect}>: #{inspect}"
+ raise ArgumentError, message
+ end
+
+ new_columns = []
+
+ columns.each do |column|
+ column_name = column.name
+ new_column = added_columns.delete(column_name)
+
+ if new_column
+ new_columns << new_column
+ next
+ end
+
+ next if removed_columns.key?(column_name)
+
+ new_columns << ensure_raw_column(column_name, column)
+ end
+
+ added_columns.each_value do |new_column|
+ new_columns << new_column
+ end
+
+ new_fields = []
+ new_arrays = []
+
+ new_columns.each do |new_column|
+ new_fields << new_column[:field]
+ new_arrays << new_column[:data]
+ end
+
+ merged = self.class.new(new_fields, new_arrays)
+ share_input(merged)
+ merged
+ end
end
end
diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb
b/ruby/red-arrow/lib/arrow/record-batch.rb
index 0a2ba359b3..b765edc32e 100644
--- a/ruby/red-arrow/lib/arrow/record-batch.rb
+++ b/ruby/red-arrow/lib/arrow/record-batch.rb
@@ -37,7 +37,12 @@ module Arrow
super(schema, n_rows, values)
when 2
schema, data = args
- RecordBatchBuilder.build(schema, data)
+ schema = Schema.new(schema) unless schema.is_a?(Schema)
+ if !data.empty? and data.all? {|array| array.is_a?(Arrow::Array)}
+ super(schema, data[0].size, data)
+ else
+ RecordBatchBuilder.build(schema, data)
+ end
when 3
super
else
@@ -63,63 +68,6 @@ module Arrow
table
end
- def merge(other)
- added_columns = {}
- removed_columns = {}
-
- case other
- when Hash
- other.each do |name, value|
- name = name.to_s
- if value
- added_columns[name] = ensure_raw_column(name, value)
- else
- removed_columns[name] = true
- end
- end
- when RecordBatch
- other.columns.each do |column|
- name = column.name
- added_columns[name] = ensure_raw_column(name, column)
- end
- else
- message = "merge target must be Hash or Arrow::RecordBatch: " +
- "<#{other.inspect}>: #{inspect}"
- raise ArgumentError, message
- end
-
- new_columns = []
- columns.each do |column|
- column_name = column.name
- new_column = added_columns.delete(column_name)
- if new_column
- new_columns << new_column
- next
- end
- next if removed_columns.key?(column_name)
- new_columns << ensure_raw_column(column_name, column)
- end
-
- added_columns.each_value do |new_column|
- new_columns << new_column
- end
-
- new_fields = []
- new_arrays = []
- new_columns.each do |new_column|
- new_fields << new_column[:field]
- new_arrays << new_column[:data]
- end
-
- record_batch = self.class.new(
- Schema.new(new_fields),
- n_rows,
- new_arrays,
- )
- share_input(record_batch)
- record_batch
- end
-
def respond_to_missing?(name, include_private)
return true if find_column(name)
super
diff --git a/ruby/red-arrow/lib/arrow/table.rb
b/ruby/red-arrow/lib/arrow/table.rb
index 0ce5962fce..45d54b7109 100644
--- a/ruby/red-arrow/lib/arrow/table.rb
+++ b/ruby/red-arrow/lib/arrow/table.rb
@@ -354,60 +354,6 @@ module Arrow
sliced_table
end
- # TODO
- #
- # @return [Arrow::Table]
- def merge(other)
- added_columns = {}
- removed_columns = {}
-
- case other
- when Hash
- other.each do |name, value|
- name = name.to_s
- if value
- added_columns[name] = ensure_raw_column(name, value)
- else
- removed_columns[name] = true
- end
- end
- when Table
- added_columns = {}
- other.columns.each do |column|
- name = column.name
- added_columns[name] = ensure_raw_column(name, column)
- end
- else
- message = "merge target must be Hash or Arrow::Table: " +
- "<#{other.inspect}>: #{inspect}"
- raise ArgumentError, message
- end
-
- new_columns = []
- columns.each do |column|
- column_name = column.name
- new_column = added_columns.delete(column_name)
- if new_column
- new_columns << new_column
- next
- end
- next if removed_columns.key?(column_name)
- new_columns << ensure_raw_column(column_name, column)
- end
- added_columns.each do |name, new_column|
- new_columns << new_column
- end
- new_fields = []
- new_arrays = []
- new_columns.each do |new_column|
- new_fields << new_column[:field]
- new_arrays << new_column[:data]
- end
- table = self.class.new(new_fields, new_arrays)
- share_input(table)
- table
- end
-
alias_method :remove_column_raw, :remove_column
def remove_column(name_or_index)
case name_or_index