This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ecfb807  ARROW-5155: [GLib][Ruby] Add support for building union 
arrays from data type
ecfb807 is described below

commit ecfb807458bfe909ecc8940bd840fc9c6169dd51
Author: Kenta Murata <m...@mrkn.jp>
AuthorDate: Thu Apr 25 15:36:07 2019 +0900

    ARROW-5155: [GLib][Ruby] Add support for building union arrays from data 
type
    
    This is separated from #3723.
    This should be merged after #3723.
    
    Author: Kenta Murata <m...@mrkn.jp>
    Author: Kouhei Sutou <k...@clear-code.com>
    
    Closes #4127 from mrkn/glib_ruby_make_union_array_with_field_names and 
squashes the following commits:
    
    e6255567 <Kouhei Sutou> Fix test data
    f82ac3d1 <Kenta Murata>  Fix test cases
    d550dc97 <Kenta Murata>  Fix comment
    f1bfa07b <Kenta Murata>  Stop copying a type_code vector
    606a04c1 <Kenta Murata>  Use new constructors of union arrays
    5ad55722 <Kenta Murata>  Add garrow_dense_union_array_new_data_type
    c8793d5c <Kenta Murata>  Add garrow_sparse_union_array_new_data_type
---
 c_glib/arrow-glib/composite-array.cpp              | 97 ++++++++++++++++++++++
 c_glib/arrow-glib/composite-array.h                | 11 +++
 c_glib/test/test-dense-union-array.rb              | 90 ++++++++++++++------
 c_glib/test/test-sparse-union-array.rb             | 87 +++++++++++++------
 .../record-batch/test-dense-union-array.rb         |  8 +-
 .../record-batch/test-sparse-union-array.rb        |  7 +-
 6 files changed, 238 insertions(+), 62 deletions(-)

diff --git a/c_glib/arrow-glib/composite-array.cpp 
b/c_glib/arrow-glib/composite-array.cpp
index b202fb4..4fba813 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -366,6 +366,53 @@ garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
   }
 }
 
+/**
+ * garrow_sparse_union_array_new_data_type:
+ * @data_type: The data type for the sparse array.
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ *   as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ *   or %NULL on error.
+ *
+ * Since: 0.14.0
+ */
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
+                                        GArrowInt8Array *type_ids,
+                                        GList *fields,
+                                        GError **error)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_union_data_type =
+    std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+  std::vector<std::string> arrow_field_names;
+  for (const auto &arrow_field : arrow_union_data_type->children()) {
+    arrow_field_names.push_back(arrow_field->name());
+  }
+  auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto *field = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(field));
+  }
+  std::shared_ptr<arrow::Array> arrow_union_array;
+  auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids,
+                                              arrow_fields,
+                                              arrow_field_names,
+                                              
arrow_union_data_type->type_codes(),
+                                              &arrow_union_array);
+  if (garrow_error_check(error,
+                         status,
+                         "[sparse-union-array][new][data-type]")) {
+    return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+  } else {
+    return NULL;
+  }
+}
+
 
 G_DEFINE_TYPE(GArrowDenseUnionArray,
               garrow_dense_union_array,
@@ -420,6 +467,56 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids,
   }
 }
 
+/**
+ * garrow_dense_union_array_new_data_type:
+ * @data_type: The data type for the dense array.
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @value_offsets: The value offsets for each value as #GArrowInt32Array.
+ *   Each offset is counted for each type.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ *   as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ *   or %NULL on error.
+ *
+ * Since: 0.14.0
+ */
+GArrowDenseUnionArray *
+garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
+                                       GArrowInt8Array *type_ids,
+                                       GArrowInt32Array *value_offsets,
+                                       GList *fields,
+                                       GError **error)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_union_data_type =
+    std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+  std::vector<std::string> arrow_field_names;
+  for (const auto &arrow_field : arrow_union_data_type->children()) {
+    arrow_field_names.push_back(arrow_field->name());
+  }
+  auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+  auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto *field = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(field));
+  }
+  std::shared_ptr<arrow::Array> arrow_union_array;
+  auto status = arrow::UnionArray::MakeDense(*arrow_type_ids,
+                                             *arrow_value_offsets,
+                                             arrow_fields,
+                                             arrow_field_names,
+                                             
arrow_union_data_type->type_codes(),
+                                             &arrow_union_array);
+  if (garrow_error_check(error, status, 
"[dense-union-array][new][data-type]")) {
+    return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+  } else {
+    return NULL;
+  }
+}
+
 
 G_DEFINE_TYPE(GArrowDictionaryArray,
               garrow_dictionary_array,
diff --git a/c_glib/arrow-glib/composite-array.h 
b/c_glib/arrow-glib/composite-array.h
index a181ffc..c54c2f8 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -108,6 +108,11 @@ GArrowSparseUnionArray *
 garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
                               GList *fields,
                               GError **error);
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
+                                        GArrowInt8Array *type_ids,
+                                        GList *fields,
+                                        GError **error);
 
 
 #define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
@@ -126,6 +131,12 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids,
                              GArrowInt32Array *value_offsets,
                              GList *fields,
                              GError **error);
+GArrowDenseUnionArray *
+garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
+                                       GArrowInt8Array *type_ids,
+                                       GArrowInt32Array *value_offsets,
+                                       GList *fields,
+                                       GError **error);
 
 
 #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
diff --git a/c_glib/test/test-dense-union-array.rb 
b/c_glib/test/test-dense-union-array.rb
index fa73f8d..aec12b4 100644
--- a/c_glib/test/test-dense-union-array.rb
+++ b/c_glib/test/test-dense-union-array.rb
@@ -18,33 +18,71 @@
 class TestDenseUnionArray < Test::Unit::TestCase
   include Helper::Buildable
 
-  def setup
-    type_ids = build_int8_array([0, 1, nil, 1, 1])
-    value_offsets = build_int32_array([0, 0, 0, 1, 2])
-    fields = [
-      build_int16_array([1]),
-      build_string_array(["a", "b", "c"]),
-    ]
-    @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
-  end
+  sub_test_case(".new") do
+    sub_test_case("default") do
+      def setup
+        type_ids = build_int8_array([0, 1, nil, 1, 1])
+        value_offsets = build_int32_array([0, 0, 0, 1, 2])
+        fields = [
+          build_int16_array([1]),
+          build_string_array(["a", "b", "c"]),
+        ]
+        @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
+      end
 
-  def test_value_data_type
-    fields = [
-      Arrow::Field.new("0", Arrow::Int16DataType.new),
-      Arrow::Field.new("1", Arrow::StringDataType.new),
-    ]
-    assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
-                 @array.value_data_type)
-  end
+      def test_value_data_type
+        fields = [
+          Arrow::Field.new("0", Arrow::Int16DataType.new),
+          Arrow::Field.new("1", Arrow::StringDataType.new),
+        ]
+        assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
+                     @array.value_data_type)
+      end
+
+      def test_field
+        assert_equal([
+                       build_int16_array([1]),
+                       build_string_array(["a", "b", "c"]),
+                     ],
+                     [
+                       @array.get_field(0),
+                       @array.get_field(1),
+                     ])
+      end
+    end
+
+    sub_test_case("DataType") do
+      def setup
+        data_type_fields = [
+          Arrow::Field.new("number", Arrow::Int16DataType.new),
+          Arrow::Field.new("text", Arrow::StringDataType.new),
+        ]
+        type_codes = [11, 13]
+        @data_type = Arrow::DenseUnionDataType.new(data_type_fields, 
type_codes)
+        type_ids = build_int8_array([11, 13, nil, 13, 13])
+        value_offsets = build_int32_array([0, 0, 0, 1, 2])
+        fields = [
+          build_int16_array([1]),
+          build_string_array(["a", "b", "c"])
+        ]
+        @array = Arrow::DenseUnionArray.new(@data_type, type_ids, 
value_offsets, fields)
+      end
+
+      def test_value_data_type
+        assert_equal(@data_type,
+                     @array.value_data_type)
+      end
 
-  def test_field
-    assert_equal([
-                   build_int16_array([1]),
-                   build_string_array(["a", "b", "c"]),
-                 ],
-                 [
-                   @array.get_field(0),
-                   @array.get_field(1),
-                 ])
+      def test_field
+        assert_equal([
+                       build_int16_array([1]),
+                       build_string_array(["a", "b", "c"]),
+                     ],
+                     [
+                       @array.get_field(0),
+                       @array.get_field(1),
+                     ])
+      end
+    end
   end
 end
diff --git a/c_glib/test/test-sparse-union-array.rb 
b/c_glib/test/test-sparse-union-array.rb
index 721f95c..62b7b3d 100644
--- a/c_glib/test/test-sparse-union-array.rb
+++ b/c_glib/test/test-sparse-union-array.rb
@@ -18,32 +18,69 @@
 class TestSparseUnionArray < Test::Unit::TestCase
   include Helper::Buildable
 
-  def setup
-    type_ids = build_int8_array([0, 1, nil, 1, 0])
-    fields = [
-      build_int16_array([1, nil, nil, nil, 5]),
-      build_string_array([nil, "b", nil, "d", nil]),
-    ]
-    @array = Arrow::SparseUnionArray.new(type_ids, fields)
-  end
+  sub_test_case(".new") do
+    sub_test_case("default") do
+      def setup
+        type_ids = build_int8_array([0, 1, nil, 1, 0])
+        fields = [
+          build_int16_array([1, nil, nil, nil, 5]),
+          build_string_array([nil, "b", nil, "d", nil]),
+        ]
+        @array = Arrow::SparseUnionArray.new(type_ids, fields)
+      end
 
-  def test_value_data_type
-    fields = [
-      Arrow::Field.new("0", Arrow::Int16DataType.new),
-      Arrow::Field.new("1", Arrow::StringDataType.new),
-    ]
-    assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
-                 @array.value_data_type)
-  end
+      def test_value_data_type
+        fields = [
+          Arrow::Field.new("0", Arrow::Int16DataType.new),
+          Arrow::Field.new("1", Arrow::StringDataType.new),
+        ]
+        assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
+                     @array.value_data_type)
+      end
+
+      def test_field
+        assert_equal([
+                       build_int16_array([1, nil, nil, nil, 5]),
+                       build_string_array([nil, "b", nil, "d", nil]),
+                     ],
+                     [
+                       @array.get_field(0),
+                       @array.get_field(1),
+                     ])
+      end
+    end
+
+    sub_test_case("DataType") do
+      def setup
+        data_type_fields = [
+          Arrow::Field.new("number", Arrow::Int16DataType.new),
+          Arrow::Field.new("text", Arrow::StringDataType.new),
+        ]
+        type_codes = [11, 13]
+        @data_type = Arrow::SparseUnionDataType.new(data_type_fields, 
type_codes)
+        type_ids = build_int8_array([11, 13, nil, 13, 11])
+        fields = [
+          build_int16_array([1, nil, nil, nil, 5]),
+          build_string_array([nil, "b", nil, "d", nil]),
+        ]
+        @array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields)
+      end
+
+      def test_value_data_type
+        assert_equal(@data_type,
+                     @array.value_data_type)
+      end
 
-  def test_field
-    assert_equal([
-                   build_int16_array([1, nil, nil, nil, 5]),
-                   build_string_array([nil, "b", nil, "d", nil]),
-                 ],
-                 [
-                   @array.get_field(0),
-                   @array.get_field(1),
-                 ])
+      def test_field
+        assert_equal([
+                       build_int16_array([1, nil, nil, nil, 5]),
+                       build_string_array([nil, "b", nil, "d", nil]),
+                     ],
+                     [
+                       @array.get_field(0),
+                       @array.get_field(1),
+                     ])
+      end
+    end
   end
 end
diff --git 
a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb 
b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
index 3520eba..91477fb 100644
--- a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
@@ -69,12 +69,8 @@ class RawRecordsRecordBatchDenseUnionArrayTest < 
Test::Unit::TestCase
         offsets << (type_ids.count(type_id) - 1)
       end
     end
-    # TODO
-    # union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
-    #                                          Arrow::Int8Array.new(type_ids),
-    #                                          Arrow::Int32Array.new(offsets),
-    #                                          arrays)
-    union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids),
+    union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
+                                             Arrow::Int8Array.new(type_ids),
                                              Arrow::Int32Array.new(offsets),
                                              arrays)
     schema = Arrow::Schema.new(column: union_array.value_data_type)
diff --git 
a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb 
b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
index f963494..c1947b8 100644
--- a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
@@ -59,11 +59,8 @@ class RawRecordsRecordBatchSparseUnionArrayTest < 
Test::Unit::TestCase
         type_ids << type_codes[1]
       end
     end
-    # TODO
-    # union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
-    #                                           Arrow::Int8Array.new(type_ids),
-    #                                           arrays)
-    union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids),
+    union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
+                                              Arrow::Int8Array.new(type_ids),
                                               arrays)
     schema = Arrow::Schema.new(column: union_array.value_data_type)
     Arrow::RecordBatch.new(schema,

Reply via email to