This is an automated email from the ASF dual-hosted git repository.

arina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new f1ccdc2  DRILL-7665: Add UNION to schema parser
f1ccdc2 is described below

commit f1ccdc2091a26c7680d4e0999f2acbd607386c09
Author: Arina Ielchiieva <[email protected]>
AuthorDate: Wed Mar 25 19:31:23 2020 +0200

    DRILL-7665: Add UNION to schema parser
---
 .../record/metadata/schema/parser/SchemaLexer.g4   |  1 +
 .../record/metadata/schema/parser/SchemaParser.g4  |  8 ++++-
 .../metadata/schema/parser/SchemaVisitor.java      | 20 +++++++++++++
 .../metadata/schema/parser/TestSchemaParser.java   | 34 ++++++++++++++++++++--
 4 files changed, 60 insertions(+), 3 deletions(-)

diff --git 
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
 
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
index c531356..c5d1fe1 100644
--- 
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
+++ 
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
@@ -57,6 +57,7 @@ SECOND: 'SECOND';
 MAP: 'MAP';
 STRUCT: 'STRUCT';
 ARRAY: 'ARRAY';
+UNION: 'UNION';
 
 // additional data types, primary used for Parquet
 UINT1: 'UINT1';
diff --git 
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
 
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
index 1ff90ec..5196986 100644
--- 
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
+++ 
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
@@ -31,7 +31,7 @@ columns: column_def (COMMA column_def)*;
 
 column_def: column property_values?;
 
-column: (primitive_column | struct_column | map_column | simple_array_column | 
complex_array_column);
+column: (primitive_column | struct_column | map_column | simple_array_column | 
complex_array_column | union_column);
 
 primitive_column: column_id simple_type nullability? format_value? 
default_value?;
 
@@ -43,6 +43,8 @@ map_column: column_id map_type nullability?;
 
 complex_array_column: column_id complex_array_type nullability?;
 
+union_column: column_id union_type nullability?;
+
 column_id
 : ID # id
 | QUOTED_ID # quoted_id
@@ -71,6 +73,8 @@ simple_type
 | SMALLINT # smallint
 ;
 
+union_type: UNION;
+
 array_type: (simple_array_type | complex_array_type);
 
 simple_array_type: ARRAY LEFT_ANGLE_BRACKET simple_array_value_type 
RIGHT_ANGLE_BRACKET;
@@ -79,6 +83,7 @@ simple_array_value_type
 : simple_type # array_simple_type_def
 | struct_type # array_struct_type_def
 | map_type # array_map_type_def
+| union_type # array_union_type_def
 ;
 
 complex_array_type: ARRAY LEFT_ANGLE_BRACKET array_type RIGHT_ANGLE_BRACKET;
@@ -100,6 +105,7 @@ map_value_type
 | struct_type # map_value_struct_type_def
 | map_type # map_value_map_type_def
 | array_type # map_value_array_type_def
+| union_type # map_value_union_type_def
 ;
 
 nullability: NOT NULL;
diff --git 
a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
 
b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
index 18ed651..9db9dd0 100644
--- 
a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
+++ 
b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
@@ -28,6 +28,7 @@ import org.apache.drill.exec.record.metadata.MetadataUtils;
 import org.apache.drill.exec.record.metadata.RepeatedListBuilder;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.record.metadata.TupleSchema;
+import org.apache.drill.exec.record.metadata.VariantColumnMetadata;
 import org.apache.drill.exec.vector.complex.DictVector;
 import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
 
@@ -127,6 +128,13 @@ public class SchemaVisitor extends 
SchemaParserBaseVisitor<TupleMetadata> {
       builder.addColumn(child);
       return builder.buildColumn();
     }
+
+    @Override
+    public ColumnMetadata visitUnion_column(SchemaParser.Union_columnContext 
ctx) {
+      String name = ctx.column_id().accept(new IdVisitor());
+      // nullability for UNION types are ignored, since they can hold any value
+      return VariantColumnMetadata.union(name);
+    }
   }
 
   /**
@@ -387,6 +395,11 @@ public class SchemaVisitor extends 
SchemaParserBaseVisitor<TupleMetadata> {
     public ColumnMetadata 
visitMap_value_array_type_def(SchemaParser.Map_value_array_type_defContext ctx) 
{
       return ctx.array_type().accept(new 
ArrayTypeVisitor(DictVector.FIELD_VALUE_NAME));
     }
+
+    @Override
+    public ColumnMetadata 
visitMap_value_union_type_def(SchemaParser.Map_value_union_type_defContext ctx) 
{
+      return VariantColumnMetadata.union(DictVector.FIELD_VALUE_NAME);
+    }
   }
 
   /**
@@ -421,9 +434,11 @@ public class SchemaVisitor extends 
SchemaParserBaseVisitor<TupleMetadata> {
    */
   private static class SimpleArrayValueTypeVisitor extends 
SchemaParserBaseVisitor<ColumnMetadata> {
 
+    private final String name;
     private final TypeVisitor typeVisitor;
 
     SimpleArrayValueTypeVisitor(String name) {
+      this.name = name;
       this.typeVisitor = new TypeVisitor(name, TypeProtos.DataMode.REPEATED);
     }
 
@@ -441,6 +456,11 @@ public class SchemaVisitor extends 
SchemaParserBaseVisitor<TupleMetadata> {
     public ColumnMetadata 
visitArray_map_type_def(SchemaParser.Array_map_type_defContext ctx) {
       return ctx.map_type().accept(typeVisitor);
     }
+
+    @Override
+    public ColumnMetadata 
visitArray_union_type_def(SchemaParser.Array_union_type_defContext ctx) {
+      return VariantColumnMetadata.list(name);
+    }
   }
 
   /**
diff --git 
a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
 
b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
index 201a2dc..42b604e 100644
--- 
a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
+++ 
b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
@@ -225,6 +225,10 @@ public class TestSchemaParser extends BaseTest {
           .nullableValue(TypeProtos.MinorType.INT)
         .resumeList()
       .resumeSchema()
+      .addList("union_array")
+        .addType(TypeProtos.MinorType.BIGINT)
+        .addType(TypeProtos.MinorType.DATE)
+      .resumeSchema()
       .buildSchema();
 
     checkSchema("simple_array array<int>"
@@ -232,7 +236,8 @@ public class TestSchemaParser extends BaseTest {
         + ", struct_array array<struct<s1 int, s2 varchar>>"
         + ", nested_array_struct array<array<struct<ns1 int, ns2 varchar>>>"
         + ", map_array array<map<varchar, int>>"
-        + ", nested_map_array array<array<map<varchar, int>>>",
+        + ", nested_map_array array<array<map<varchar, int>>>"
+        + ", union_array array<union>",
       schema);
   }
 
@@ -249,6 +254,9 @@ public class TestSchemaParser extends BaseTest {
         .addDict("map_col", TypeProtos.MinorType.VARCHAR)
           .nullableValue(TypeProtos.MinorType.INT)
         .resumeMap()
+        .addUnion("union_col")
+          .addType(TypeProtos.MinorType.INT)
+        .resumeMap()
       .resumeSchema()
       .buildSchema();
 
@@ -256,6 +264,7 @@ public class TestSchemaParser extends BaseTest {
       + ", array_col array<int>"
       + ", nested_struct struct<s1 int, s2 varchar>"
       + ", map_col map<varchar, int>"
+      + ", union_col union"
       + ">", schema);
   }
 
@@ -295,17 +304,38 @@ public class TestSchemaParser extends BaseTest {
             .nullableValue(TypeProtos.MinorType.FLOAT8)
           .resumeDict()
         .resumeSchema()
+        .addDict("dict_col_union", TypeProtos.MinorType.BIGINT)
+          .unionValue()
+            .addType(TypeProtos.MinorType.INT)
+         .resumeDict()
+        .resumeSchema()
       .buildSchema();
 
     checkSchema("dict_col_simple map<varchar, int>"
       + ", dict_col_simple_ps map<varchar(50), decimal(10, 2) not null>"
       + ", dict_col_struct map<int, struct<sb boolean not null, si int>>"
       + ", dict_col_dict map<varchar, map<int, boolean>>"
-      + ", dict_col_array map<bigint, array<map<date, double>>>",
+      + ", dict_col_array map<bigint, array<map<date, double>>>"
+      + ", dict_col_union map<bigint, union>",
       schema);
   }
 
   @Test
+  public void testUnion() throws Exception {
+    TupleMetadata schema = new SchemaBuilder()
+      .addUnion("col_union_not_null")
+        .addType(TypeProtos.MinorType.INT)
+        .addType(TypeProtos.MinorType.VARCHAR)
+      .resumeSchema()
+      .addUnion("col_union_null")
+        .addType(TypeProtos.MinorType.INT)
+      .resumeSchema()
+      .buildSchema();
+
+    checkSchema("col_union_not_null union not null, col_union_null union", 
schema);
+  }
+
+  @Test
   public void testModeForSimpleType() throws Exception {
     TupleMetadata schema = SchemaExprParser.parseSchema("id int not null, name 
varchar");
     assertFalse(schema.metadata("id").isNullable());

Reply via email to