This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 19b9ccc  ARROW-1279: [Integration] Enable MapType integration tests
19b9ccc is described below

commit 19b9ccc3add079768e98e84737eefd7357764150
Author: Bryan Cutler <cutl...@gmail.com>
AuthorDate: Thu Jun 27 00:26:59 2019 -0700

    ARROW-1279: [Integration] Enable MapType integration tests
    
    This PR enables MapType integration tests for C++ and Java, and fixes 
remaining issues to pass. Previously, C++ created a MapType with a StructType 
field that is nullable, which is required to be non-nullable from the spec. The 
struct field has the name "entries" and the struct data pairs are named "key" 
and "value."
    
    Author: Bryan Cutler <cutl...@gmail.com>
    
    Closes #4684 from BryanCutler/map-type-integration-tests-ARROW-1279 and 
squashes the following commits:
    
    6259a4574 <Bryan Cutler> rename MapType fields for internal test
    30777804f <Bryan Cutler> rename MapType struct field to 'entries'
    3185af9fa <Bryan Cutler> Fix MapType to create a non-nullable struct field
---
 cpp/src/arrow/ipc/json-internal.cc                 |  4 +--
 cpp/src/arrow/ipc/json-test.cc                     |  2 +-
 cpp/src/arrow/ipc/metadata-internal.cc             |  4 +--
 cpp/src/arrow/ipc/test-common.cc                   |  2 +-
 cpp/src/arrow/type.cc                              |  7 +++--
 integration/integration_test.py                    | 12 ++++-----
 .../org/apache/arrow/vector/complex/MapVector.java | 31 ++++++++++++++++++++--
 7 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/ipc/json-internal.cc 
b/cpp/src/arrow/ipc/json-internal.cc
index 42663c0..1352965 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -1274,8 +1274,8 @@ class ArrayReader {
 
   Status Visit(const MapType& type) {
     auto list_type = std::make_shared<ListType>(field(
-        "item",
-        struct_({field("key", type.key_type(), false), field("item", 
type.item_type())}),
+        "entries",
+        struct_({field("key", type.key_type(), false), field("value", 
type.item_type())}),
         false));
     std::shared_ptr<Array> list_array;
     RETURN_NOT_OK(CreateList(list_type, &list_array));
diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc
index fb57fa7..338552d 100644
--- a/cpp/src/arrow/ipc/json-test.cc
+++ b/cpp/src/arrow/ipc/json-test.cc
@@ -204,7 +204,7 @@ TEST(TestJsonArrayWriter, NestedTypes) {
 
   TestArrayRoundTrip(list_array);
 
-  // List
+  // Map
   auto map_type = map(utf8(), int32());
   auto keys_array = ArrayFromJSON(utf8(), R"(["a", "b", "c", "d", "a", "b", 
"c"])");
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc 
b/cpp/src/arrow/ipc/metadata-internal.cc
index 4b349cb..4e1a157 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -320,9 +320,7 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const 
void* type_data,
       if (children.size() != 1) {
         return Status::Invalid("Map must have exactly 1 child field");
       }
-      if (  // FIXME(bkietz) temporarily disabled: this field is sometimes 
read nullable
-            // children[0]->nullable() ||
-          children[0]->type()->id() != Type::STRUCT ||
+      if (children[0]->nullable() || children[0]->type()->id() != Type::STRUCT 
||
           children[0]->type()->num_children() != 2) {
         return Status::Invalid("Map's key-item pairs must be non-nullable 
structs");
       }
diff --git a/cpp/src/arrow/ipc/test-common.cc b/cpp/src/arrow/ipc/test-common.cc
index 12adebc..47c3076 100644
--- a/cpp/src/arrow/ipc/test-common.cc
+++ b/cpp/src/arrow/ipc/test-common.cc
@@ -120,7 +120,7 @@ Status MakeRandomMapArray(const std::shared_ptr<Array>& 
key_array,
                           bool include_nulls, MemoryPool* pool,
                           std::shared_ptr<Array>* out) {
   auto pair_type = struct_(
-      {field("key", key_array->type(), false), field("item", 
item_array->type())});
+      {field("key", key_array->type(), false), field("value", 
item_array->type())});
 
   auto pair_array = std::make_shared<StructArray>(pair_type, num_maps,
                                                   ArrayVector{key_array, 
item_array});
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index b215331..b3fcb0c 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -153,8 +153,11 @@ std::string ListType::ToString() const {
 
 MapType::MapType(const std::shared_ptr<DataType>& key_type,
                  const std::shared_ptr<DataType>& item_type, bool keys_sorted)
-    : ListType(struct_({std::make_shared<Field>("key", key_type, false),
-                        std::make_shared<Field>("item", item_type)})),
+    : ListType(std::make_shared<Field>(
+          "entries",
+          struct_({std::make_shared<Field>("key", key_type, false),
+                   std::make_shared<Field>("value", item_type)}),
+          false)),
       keys_sorted_(keys_sorted) {
   id_ = type_id;
 }
diff --git a/integration/integration_test.py b/integration/integration_test.py
index aca0574..dbc03c7 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -714,7 +714,7 @@ class MapType(DataType):
         assert not key_type.nullable
         self.key_type = key_type
         self.item_type = item_type
-        self.pair_type = StructType('item', [key_type, item_type], False)
+        self.pair_type = StructType('entries', [key_type, item_type], False)
         self.keysSorted = keysSorted
 
     def _get_type(self):
@@ -1060,10 +1060,10 @@ def generate_interval_case():
 
 def generate_map_case():
     # TODO(bkietz): separated from nested_case so it can be
-    # independently skipped, consolidate after Java supports map
+    # independently skipped, consolidate after JS supports map
     fields = [
         MapType('map_nullable', get_field('key', 'utf8', False),
-                get_field('item', 'int32')),
+                get_field('value', 'int32')),
     ]
 
     batch_sizes = [7, 10]
@@ -1220,12 +1220,10 @@ class IntegrationRunner(object):
 
             file_id = guid()[:8]
 
-            if (('JS' in (producer.name, consumer.name) or
-                    'Java' in (producer.name, consumer.name)) and
+            if ('JS' in (producer.name, consumer.name) and
                     "map" in test_case.name):
                 print('TODO(ARROW-1279): Enable map tests ' +
-                      ' for Java and JS once Java supports them and JS\'' +
-                      ' are unbroken')
+                      ' for JS once they are unbroken')
                 continue
 
             if ('JS' in (producer.name, consumer.name) and
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index 340fb2a..a367150 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -25,14 +25,17 @@ import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.AddOrGetResult;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
 import org.apache.arrow.vector.complex.impl.UnionMapReader;
 import org.apache.arrow.vector.complex.impl.UnionMapWriter;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
 import org.apache.arrow.vector.types.pojo.ArrowType.Map;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
 
 /**
  * A MapVector is used to store entries of key/value pairs. It is a container 
vector that is
@@ -46,6 +49,9 @@ public class MapVector extends ListVector {
   public static final String KEY_NAME = "key";
   public static final String VALUE_NAME = "value";
 
+  // TODO: this is only used for addOrGetVector because ListVector declares it 
private
+  protected CallBack callBack;
+
   /**
    * Construct an empty MapVector with no data. Child vectors must be added 
subsequently.
    *
@@ -68,6 +74,7 @@ public class MapVector extends ListVector {
    */
   public MapVector(String name, BufferAllocator allocator, FieldType 
fieldType, CallBack callBack) {
     super(name, allocator, fieldType, callBack);
+    this.callBack = callBack;
     reader = new UnionMapReader(this);
   }
 
@@ -121,9 +128,29 @@ public class MapVector extends ListVector {
    */
   @Override
   public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType 
fieldType) {
-    AddOrGetResult<T> result = super.addOrGetVector(fieldType);
+
+    // TODO: can call super method once DATA_VECTOR_NAME is configurable
+    boolean created = false;
+    if (vector instanceof ZeroVector) {
+      vector = fieldType.createNewSingleVector("entries", allocator, callBack);
+      // returned vector must have the same field
+      created = true;
+      if (callBack != null &&
+              // not a schema change if changing from ZeroVector to ZeroVector
+              (fieldType.getType().getTypeID() != ArrowTypeID.Null)) {
+        callBack.doWork();
+      }
+    }
+
+    if (vector.getField().getType().getTypeID() != 
fieldType.getType().getTypeID()) {
+      final String msg = String.format("Inner vector type mismatch. Requested 
type: [%s], actual type: [%s]",
+              fieldType.getType().getTypeID(), 
vector.getField().getType().getTypeID());
+      throw new SchemaChangeRuntimeException(msg);
+    }
+
     reader = new UnionMapReader(this);
-    return result;
+
+    return new AddOrGetResult<>((T) vector, created);
   }
 
   /**

Reply via email to