This is an automated email from the ASF dual-hosted git repository. emkornfield pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 19b9ccc ARROW-1279: [Integration] Enable MapType integration tests 19b9ccc is described below commit 19b9ccc3add079768e98e84737eefd7357764150 Author: Bryan Cutler <cutl...@gmail.com> AuthorDate: Thu Jun 27 00:26:59 2019 -0700 ARROW-1279: [Integration] Enable MapType integration tests This PR enables MapType integration tests for C++ and Java, and fixes remaining issues to pass. Previously, C++ created a MapType with a StructType field that is nullable, which is required to be non-nullable from the spec. The struct field has the name "entries" and the struct data pairs are named "key" and "value." Author: Bryan Cutler <cutl...@gmail.com> Closes #4684 from BryanCutler/map-type-integration-tests-ARROW-1279 and squashes the following commits: 6259a4574 <Bryan Cutler> rename MapType fields for internal test 30777804f <Bryan Cutler> rename MapType struct field to 'entries' 3185af9fa <Bryan Cutler> Fix MapType to create a non-nullable struct field --- cpp/src/arrow/ipc/json-internal.cc | 4 +-- cpp/src/arrow/ipc/json-test.cc | 2 +- cpp/src/arrow/ipc/metadata-internal.cc | 4 +-- cpp/src/arrow/ipc/test-common.cc | 2 +- cpp/src/arrow/type.cc | 7 +++-- integration/integration_test.py | 12 ++++----- .../org/apache/arrow/vector/complex/MapVector.java | 31 ++++++++++++++++++++-- 7 files changed, 44 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 42663c0..1352965 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -1274,8 +1274,8 @@ class ArrayReader { Status Visit(const MapType& type) { auto list_type = std::make_shared<ListType>(field( - "item", - struct_({field("key", type.key_type(), false), field("item", type.item_type())}), + "entries", + struct_({field("key", type.key_type(), false), field("value", type.item_type())}), false)); std::shared_ptr<Array> list_array; RETURN_NOT_OK(CreateList(list_type, &list_array)); diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc index fb57fa7..338552d 100644 --- a/cpp/src/arrow/ipc/json-test.cc +++ b/cpp/src/arrow/ipc/json-test.cc @@ -204,7 +204,7 @@ TEST(TestJsonArrayWriter, NestedTypes) { TestArrayRoundTrip(list_array); - // List + // Map auto map_type = map(utf8(), int32()); auto keys_array = ArrayFromJSON(utf8(), R"(["a", "b", "c", "d", "a", "b", "c"])"); diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 4b349cb..4e1a157 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -320,9 +320,7 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data, if (children.size() != 1) { return Status::Invalid("Map must have exactly 1 child field"); } - if ( // FIXME(bkietz) temporarily disabled: this field is sometimes read nullable - // children[0]->nullable() || - children[0]->type()->id() != Type::STRUCT || + if (children[0]->nullable() || children[0]->type()->id() != Type::STRUCT || children[0]->type()->num_children() != 2) { return Status::Invalid("Map's key-item pairs must be non-nullable structs"); } diff --git a/cpp/src/arrow/ipc/test-common.cc b/cpp/src/arrow/ipc/test-common.cc index 12adebc..47c3076 100644 --- a/cpp/src/arrow/ipc/test-common.cc +++ b/cpp/src/arrow/ipc/test-common.cc @@ -120,7 +120,7 @@ Status MakeRandomMapArray(const std::shared_ptr<Array>& key_array, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) { auto pair_type = struct_( - {field("key", key_array->type(), false), field("item", item_array->type())}); + {field("key", key_array->type(), false), field("value", item_array->type())}); auto pair_array = std::make_shared<StructArray>(pair_type, num_maps, ArrayVector{key_array, item_array}); diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index b215331..b3fcb0c 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -153,8 +153,11 @@ std::string ListType::ToString() const { MapType::MapType(const std::shared_ptr<DataType>& key_type, const std::shared_ptr<DataType>& item_type, bool keys_sorted) - : ListType(struct_({std::make_shared<Field>("key", key_type, false), - std::make_shared<Field>("item", item_type)})), + : ListType(std::make_shared<Field>( + "entries", + struct_({std::make_shared<Field>("key", key_type, false), + std::make_shared<Field>("value", item_type)}), + false)), keys_sorted_(keys_sorted) { id_ = type_id; } diff --git a/integration/integration_test.py b/integration/integration_test.py index aca0574..dbc03c7 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -714,7 +714,7 @@ class MapType(DataType): assert not key_type.nullable self.key_type = key_type self.item_type = item_type - self.pair_type = StructType('item', [key_type, item_type], False) + self.pair_type = StructType('entries', [key_type, item_type], False) self.keysSorted = keysSorted def _get_type(self): @@ -1060,10 +1060,10 @@ def generate_interval_case(): def generate_map_case(): # TODO(bkietz): separated from nested_case so it can be - # independently skipped, consolidate after Java supports map + # independently skipped, consolidate after JS supports map fields = [ MapType('map_nullable', get_field('key', 'utf8', False), - get_field('item', 'int32')), + get_field('value', 'int32')), ] batch_sizes = [7, 10] @@ -1220,12 +1220,10 @@ class IntegrationRunner(object): file_id = guid()[:8] - if (('JS' in (producer.name, consumer.name) or - 'Java' in (producer.name, consumer.name)) and + if ('JS' in (producer.name, consumer.name) and "map" in test_case.name): print('TODO(ARROW-1279): Enable map tests ' + - ' for Java and JS once Java supports them and JS\'' + - ' are unbroken') + ' for JS once they are unbroken') continue if ('JS' in (producer.name, consumer.name) and diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 340fb2a..a367150 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -25,14 +25,17 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; /** * A MapVector is used to store entries of key/value pairs. It is a container vector that is @@ -46,6 +49,9 @@ public class MapVector extends ListVector { public static final String KEY_NAME = "key"; public static final String VALUE_NAME = "value"; + // TODO: this is only used for addOrGetVector because ListVector declares it private + protected CallBack callBack; + /** * Construct an empty MapVector with no data. Child vectors must be added subsequently. * @@ -68,6 +74,7 @@ public class MapVector extends ListVector { */ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, fieldType, callBack); + this.callBack = callBack; reader = new UnionMapReader(this); } @@ -121,9 +128,29 @@ public class MapVector extends ListVector { */ @Override public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) { - AddOrGetResult<T> result = super.addOrGetVector(fieldType); + + // TODO: can call super method once DATA_VECTOR_NAME is configurable + boolean created = false; + if (vector instanceof ZeroVector) { + vector = fieldType.createNewSingleVector("entries", allocator, callBack); + // returned vector must have the same field + created = true; + if (callBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowTypeID.Null)) { + callBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + reader = new UnionMapReader(this); - return result; + + return new AddOrGetResult<>((T) vector, created); } /**