This is an automated email from the ASF dual-hosted git repository. kou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new ecfb807 ARROW-5155: [GLib][Ruby] Add support for building union arrays from data type ecfb807 is described below commit ecfb807458bfe909ecc8940bd840fc9c6169dd51 Author: Kenta Murata <m...@mrkn.jp> AuthorDate: Thu Apr 25 15:36:07 2019 +0900 ARROW-5155: [GLib][Ruby] Add support for building union arrays from data type This is separated from #3723. This should be merged after #3723. Author: Kenta Murata <m...@mrkn.jp> Author: Kouhei Sutou <k...@clear-code.com> Closes #4127 from mrkn/glib_ruby_make_union_array_with_field_names and squashes the following commits: e6255567 <Kouhei Sutou> Fix test data f82ac3d1 <Kenta Murata> Fix test cases d550dc97 <Kenta Murata> Fix comment f1bfa07b <Kenta Murata> Stop copying a type_code vector 606a04c1 <Kenta Murata> Use new constructors of union arrays 5ad55722 <Kenta Murata> Add garrow_dense_union_array_new_data_type c8793d5c <Kenta Murata> Add garrow_sparse_union_array_new_data_type --- c_glib/arrow-glib/composite-array.cpp | 97 ++++++++++++++++++++++ c_glib/arrow-glib/composite-array.h | 11 +++ c_glib/test/test-dense-union-array.rb | 90 ++++++++++++++------ c_glib/test/test-sparse-union-array.rb | 87 +++++++++++++------ .../record-batch/test-dense-union-array.rb | 8 +- .../record-batch/test-sparse-union-array.rb | 7 +- 6 files changed, 238 insertions(+), 62 deletions(-) diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp index b202fb4..4fba813 100644 --- a/c_glib/arrow-glib/composite-array.cpp +++ b/c_glib/arrow-glib/composite-array.cpp @@ -366,6 +366,53 @@ garrow_sparse_union_array_new(GArrowInt8Array *type_ids, } } +/** + * garrow_sparse_union_array_new_data_type: + * @data_type: The data type for the sparse array. + * @type_ids: The field type IDs for each value as #GArrowInt8Array. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowSparseUnionArray + * or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowSparseUnionArray * +garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GList *fields, + GError **error) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_union_data_type = + std::static_pointer_cast<arrow::UnionType>(arrow_data_type); + std::vector<std::string> arrow_field_names; + for (const auto &arrow_field : arrow_union_data_type->children()) { + arrow_field_names.push_back(arrow_field->name()); + } + auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids)); + std::vector<std::shared_ptr<arrow::Array>> arrow_fields; + for (auto node = fields; node; node = node->next) { + auto *field = GARROW_ARRAY(node->data); + arrow_fields.push_back(garrow_array_get_raw(field)); + } + std::shared_ptr<arrow::Array> arrow_union_array; + auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids, + arrow_fields, + arrow_field_names, + arrow_union_data_type->type_codes(), + &arrow_union_array); + if (garrow_error_check(error, + status, + "[sparse-union-array][new][data-type]")) { + return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array)); + } else { + return NULL; + } +} + G_DEFINE_TYPE(GArrowDenseUnionArray, garrow_dense_union_array, @@ -420,6 +467,56 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids, } } +/** + * garrow_dense_union_array_new_data_type: + * @data_type: The data type for the dense array. + * @type_ids: The field type IDs for each value as #GArrowInt8Array. + * @value_offsets: The value offsets for each value as #GArrowInt32Array. + * Each offset is counted for each type. + * @fields: (element-type GArrowArray): The arrays for each field + * as #GList of #GArrowArray. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowSparseUnionArray + * or %NULL on error. + * + * Since: 0.14.0 + */ +GArrowDenseUnionArray * +garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error) +{ + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_union_data_type = + std::static_pointer_cast<arrow::UnionType>(arrow_data_type); + std::vector<std::string> arrow_field_names; + for (const auto &arrow_field : arrow_union_data_type->children()) { + arrow_field_names.push_back(arrow_field->name()); + } + auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids)); + auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets)); + std::vector<std::shared_ptr<arrow::Array>> arrow_fields; + for (auto node = fields; node; node = node->next) { + auto *field = GARROW_ARRAY(node->data); + arrow_fields.push_back(garrow_array_get_raw(field)); + } + std::shared_ptr<arrow::Array> arrow_union_array; + auto status = arrow::UnionArray::MakeDense(*arrow_type_ids, + *arrow_value_offsets, + arrow_fields, + arrow_field_names, + arrow_union_data_type->type_codes(), + &arrow_union_array); + if (garrow_error_check(error, status, "[dense-union-array][new][data-type]")) { + return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array)); + } else { + return NULL; + } +} + G_DEFINE_TYPE(GArrowDictionaryArray, garrow_dictionary_array, diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index a181ffc..c54c2f8 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -108,6 +108,11 @@ GArrowSparseUnionArray * garrow_sparse_union_array_new(GArrowInt8Array *type_ids, GList *fields, GError **error); +GArrowSparseUnionArray * +garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GList *fields, + GError **error); #define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type()) @@ -126,6 +131,12 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); +GArrowDenseUnionArray * +garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, + GArrowInt8Array *type_ids, + GArrowInt32Array *value_offsets, + GList *fields, + GError **error); #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type()) diff --git a/c_glib/test/test-dense-union-array.rb b/c_glib/test/test-dense-union-array.rb index fa73f8d..aec12b4 100644 --- a/c_glib/test/test-dense-union-array.rb +++ b/c_glib/test/test-dense-union-array.rb @@ -18,33 +18,71 @@ class TestDenseUnionArray < Test::Unit::TestCase include Helper::Buildable - def setup - type_ids = build_int8_array([0, 1, nil, 1, 1]) - value_offsets = build_int32_array([0, 0, 0, 1, 2]) - fields = [ - build_int16_array([1]), - build_string_array(["a", "b", "c"]), - ] - @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields) - end + sub_test_case(".new") do + sub_test_case("default") do + def setup + type_ids = build_int8_array([0, 1, nil, 1, 1]) + value_offsets = build_int32_array([0, 0, 0, 1, 2]) + fields = [ + build_int16_array([1]), + build_string_array(["a", "b", "c"]), + ] + @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields) + end - def test_value_data_type - fields = [ - Arrow::Field.new("0", Arrow::Int16DataType.new), - Arrow::Field.new("1", Arrow::StringDataType.new), - ] - assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]), - @array.value_data_type) - end + def test_value_data_type + fields = [ + Arrow::Field.new("0", Arrow::Int16DataType.new), + Arrow::Field.new("1", Arrow::StringDataType.new), + ] + assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]), + @array.value_data_type) + end + + def test_field + assert_equal([ + build_int16_array([1]), + build_string_array(["a", "b", "c"]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end + + sub_test_case("DataType") do + def setup + data_type_fields = [ + Arrow::Field.new("number", Arrow::Int16DataType.new), + Arrow::Field.new("text", Arrow::StringDataType.new), + ] + type_codes = [11, 13] + @data_type = Arrow::DenseUnionDataType.new(data_type_fields, type_codes) + type_ids = build_int8_array([11, 13, nil, 13, 13]) + value_offsets = build_int32_array([0, 0, 0, 1, 2]) + fields = [ + build_int16_array([1]), + build_string_array(["a", "b", "c"]) + ] + @array = Arrow::DenseUnionArray.new(@data_type, type_ids, value_offsets, fields) + end + + def test_value_data_type + assert_equal(@data_type, + @array.value_data_type) + end - def test_field - assert_equal([ - build_int16_array([1]), - build_string_array(["a", "b", "c"]), - ], - [ - @array.get_field(0), - @array.get_field(1), - ]) + def test_field + assert_equal([ + build_int16_array([1]), + build_string_array(["a", "b", "c"]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end end end diff --git a/c_glib/test/test-sparse-union-array.rb b/c_glib/test/test-sparse-union-array.rb index 721f95c..62b7b3d 100644 --- a/c_glib/test/test-sparse-union-array.rb +++ b/c_glib/test/test-sparse-union-array.rb @@ -18,32 +18,69 @@ class TestSparseUnionArray < Test::Unit::TestCase include Helper::Buildable - def setup - type_ids = build_int8_array([0, 1, nil, 1, 0]) - fields = [ - build_int16_array([1, nil, nil, nil, 5]), - build_string_array([nil, "b", nil, "d", nil]), - ] - @array = Arrow::SparseUnionArray.new(type_ids, fields) - end + sub_test_case(".new") do + sub_test_case("default") do + def setup + type_ids = build_int8_array([0, 1, nil, 1, 0]) + fields = [ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ] + @array = Arrow::SparseUnionArray.new(type_ids, fields) + end - def test_value_data_type - fields = [ - Arrow::Field.new("0", Arrow::Int16DataType.new), - Arrow::Field.new("1", Arrow::StringDataType.new), - ] - assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]), - @array.value_data_type) - end + def test_value_data_type + fields = [ + Arrow::Field.new("0", Arrow::Int16DataType.new), + Arrow::Field.new("1", Arrow::StringDataType.new), + ] + assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]), + @array.value_data_type) + end + + def test_field + assert_equal([ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end + + sub_test_case("DataType") do + def setup + data_type_fields = [ + Arrow::Field.new("number", Arrow::Int16DataType.new), + Arrow::Field.new("text", Arrow::StringDataType.new), + ] + type_codes = [11, 13] + @data_type = Arrow::SparseUnionDataType.new(data_type_fields, type_codes) + type_ids = build_int8_array([11, 13, nil, 13, 11]) + fields = [ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ] + @array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields) + end + + def test_value_data_type + assert_equal(@data_type, + @array.value_data_type) + end - def test_field - assert_equal([ - build_int16_array([1, nil, nil, nil, 5]), - build_string_array([nil, "b", nil, "d", nil]), - ], - [ - @array.get_field(0), - @array.get_field(1), - ]) + def test_field + assert_equal([ + build_int16_array([1, nil, nil, nil, 5]), + build_string_array([nil, "b", nil, "d", nil]), + ], + [ + @array.get_field(0), + @array.get_field(1), + ]) + end + end end end diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb index 3520eba..91477fb 100644 --- a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb +++ b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb @@ -69,12 +69,8 @@ class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase offsets << (type_ids.count(type_id) - 1) end end - # TODO - # union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type, - # Arrow::Int8Array.new(type_ids), - # Arrow::Int32Array.new(offsets), - # arrays) - union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids), + union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), Arrow::Int32Array.new(offsets), arrays) schema = Arrow::Schema.new(column: union_array.value_data_type) diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb index f963494..c1947b8 100644 --- a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb +++ b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb @@ -59,11 +59,8 @@ class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase type_ids << type_codes[1] end end - # TODO - # union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type, - # Arrow::Int8Array.new(type_ids), - # arrays) - union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids), + union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), arrays) schema = Arrow::Schema.new(column: union_array.value_data_type) Arrow::RecordBatch.new(schema,