This is an automated email from the ASF dual-hosted git repository.
arina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new f1ccdc2 DRILL-7665: Add UNION to schema parser
f1ccdc2 is described below
commit f1ccdc2091a26c7680d4e0999f2acbd607386c09
Author: Arina Ielchiieva <[email protected]>
AuthorDate: Wed Mar 25 19:31:23 2020 +0200
DRILL-7665: Add UNION to schema parser
---
.../record/metadata/schema/parser/SchemaLexer.g4 | 1 +
.../record/metadata/schema/parser/SchemaParser.g4 | 8 ++++-
.../metadata/schema/parser/SchemaVisitor.java | 20 +++++++++++++
.../metadata/schema/parser/TestSchemaParser.java | 34 ++++++++++++++++++++--
4 files changed, 60 insertions(+), 3 deletions(-)
diff --git
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
index c531356..c5d1fe1 100644
---
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
+++
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaLexer.g4
@@ -57,6 +57,7 @@ SECOND: 'SECOND';
MAP: 'MAP';
STRUCT: 'STRUCT';
ARRAY: 'ARRAY';
+UNION: 'UNION';
// additional data types, primary used for Parquet
UINT1: 'UINT1';
diff --git
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
index 1ff90ec..5196986 100644
---
a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
+++
b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4
@@ -31,7 +31,7 @@ columns: column_def (COMMA column_def)*;
column_def: column property_values?;
-column: (primitive_column | struct_column | map_column | simple_array_column |
complex_array_column);
+column: (primitive_column | struct_column | map_column | simple_array_column |
complex_array_column | union_column);
primitive_column: column_id simple_type nullability? format_value?
default_value?;
@@ -43,6 +43,8 @@ map_column: column_id map_type nullability?;
complex_array_column: column_id complex_array_type nullability?;
+union_column: column_id union_type nullability?;
+
column_id
: ID # id
| QUOTED_ID # quoted_id
@@ -71,6 +73,8 @@ simple_type
| SMALLINT # smallint
;
+union_type: UNION;
+
array_type: (simple_array_type | complex_array_type);
simple_array_type: ARRAY LEFT_ANGLE_BRACKET simple_array_value_type
RIGHT_ANGLE_BRACKET;
@@ -79,6 +83,7 @@ simple_array_value_type
: simple_type # array_simple_type_def
| struct_type # array_struct_type_def
| map_type # array_map_type_def
+| union_type # array_union_type_def
;
complex_array_type: ARRAY LEFT_ANGLE_BRACKET array_type RIGHT_ANGLE_BRACKET;
@@ -100,6 +105,7 @@ map_value_type
| struct_type # map_value_struct_type_def
| map_type # map_value_map_type_def
| array_type # map_value_array_type_def
+| union_type # map_value_union_type_def
;
nullability: NOT NULL;
diff --git
a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
index 18ed651..9db9dd0 100644
---
a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
+++
b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java
@@ -28,6 +28,7 @@ import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.RepeatedListBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.record.metadata.TupleSchema;
+import org.apache.drill.exec.record.metadata.VariantColumnMetadata;
import org.apache.drill.exec.vector.complex.DictVector;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
@@ -127,6 +128,13 @@ public class SchemaVisitor extends
SchemaParserBaseVisitor<TupleMetadata> {
builder.addColumn(child);
return builder.buildColumn();
}
+
+ @Override
+ public ColumnMetadata visitUnion_column(SchemaParser.Union_columnContext
ctx) {
+ String name = ctx.column_id().accept(new IdVisitor());
+ // nullability for UNION types are ignored, since they can hold any value
+ return VariantColumnMetadata.union(name);
+ }
}
/**
@@ -387,6 +395,11 @@ public class SchemaVisitor extends
SchemaParserBaseVisitor<TupleMetadata> {
public ColumnMetadata
visitMap_value_array_type_def(SchemaParser.Map_value_array_type_defContext ctx)
{
return ctx.array_type().accept(new
ArrayTypeVisitor(DictVector.FIELD_VALUE_NAME));
}
+
+ @Override
+ public ColumnMetadata
visitMap_value_union_type_def(SchemaParser.Map_value_union_type_defContext ctx)
{
+ return VariantColumnMetadata.union(DictVector.FIELD_VALUE_NAME);
+ }
}
/**
@@ -421,9 +434,11 @@ public class SchemaVisitor extends
SchemaParserBaseVisitor<TupleMetadata> {
*/
private static class SimpleArrayValueTypeVisitor extends
SchemaParserBaseVisitor<ColumnMetadata> {
+ private final String name;
private final TypeVisitor typeVisitor;
SimpleArrayValueTypeVisitor(String name) {
+ this.name = name;
this.typeVisitor = new TypeVisitor(name, TypeProtos.DataMode.REPEATED);
}
@@ -441,6 +456,11 @@ public class SchemaVisitor extends
SchemaParserBaseVisitor<TupleMetadata> {
public ColumnMetadata
visitArray_map_type_def(SchemaParser.Array_map_type_defContext ctx) {
return ctx.map_type().accept(typeVisitor);
}
+
+ @Override
+ public ColumnMetadata
visitArray_union_type_def(SchemaParser.Array_union_type_defContext ctx) {
+ return VariantColumnMetadata.list(name);
+ }
}
/**
diff --git
a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
index 201a2dc..42b604e 100644
---
a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
+++
b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java
@@ -225,6 +225,10 @@ public class TestSchemaParser extends BaseTest {
.nullableValue(TypeProtos.MinorType.INT)
.resumeList()
.resumeSchema()
+ .addList("union_array")
+ .addType(TypeProtos.MinorType.BIGINT)
+ .addType(TypeProtos.MinorType.DATE)
+ .resumeSchema()
.buildSchema();
checkSchema("simple_array array<int>"
@@ -232,7 +236,8 @@ public class TestSchemaParser extends BaseTest {
+ ", struct_array array<struct<s1 int, s2 varchar>>"
+ ", nested_array_struct array<array<struct<ns1 int, ns2 varchar>>>"
+ ", map_array array<map<varchar, int>>"
- + ", nested_map_array array<array<map<varchar, int>>>",
+ + ", nested_map_array array<array<map<varchar, int>>>"
+ + ", union_array array<union>",
schema);
}
@@ -249,6 +254,9 @@ public class TestSchemaParser extends BaseTest {
.addDict("map_col", TypeProtos.MinorType.VARCHAR)
.nullableValue(TypeProtos.MinorType.INT)
.resumeMap()
+ .addUnion("union_col")
+ .addType(TypeProtos.MinorType.INT)
+ .resumeMap()
.resumeSchema()
.buildSchema();
@@ -256,6 +264,7 @@ public class TestSchemaParser extends BaseTest {
+ ", array_col array<int>"
+ ", nested_struct struct<s1 int, s2 varchar>"
+ ", map_col map<varchar, int>"
+ + ", union_col union"
+ ">", schema);
}
@@ -295,17 +304,38 @@ public class TestSchemaParser extends BaseTest {
.nullableValue(TypeProtos.MinorType.FLOAT8)
.resumeDict()
.resumeSchema()
+ .addDict("dict_col_union", TypeProtos.MinorType.BIGINT)
+ .unionValue()
+ .addType(TypeProtos.MinorType.INT)
+ .resumeDict()
+ .resumeSchema()
.buildSchema();
checkSchema("dict_col_simple map<varchar, int>"
+ ", dict_col_simple_ps map<varchar(50), decimal(10, 2) not null>"
+ ", dict_col_struct map<int, struct<sb boolean not null, si int>>"
+ ", dict_col_dict map<varchar, map<int, boolean>>"
- + ", dict_col_array map<bigint, array<map<date, double>>>",
+ + ", dict_col_array map<bigint, array<map<date, double>>>"
+ + ", dict_col_union map<bigint, union>",
schema);
}
@Test
+ public void testUnion() throws Exception {
+ TupleMetadata schema = new SchemaBuilder()
+ .addUnion("col_union_not_null")
+ .addType(TypeProtos.MinorType.INT)
+ .addType(TypeProtos.MinorType.VARCHAR)
+ .resumeSchema()
+ .addUnion("col_union_null")
+ .addType(TypeProtos.MinorType.INT)
+ .resumeSchema()
+ .buildSchema();
+
+ checkSchema("col_union_not_null union not null, col_union_null union",
schema);
+ }
+
+ @Test
public void testModeForSimpleType() throws Exception {
TupleMetadata schema = SchemaExprParser.parseSchema("id int not null, name
varchar");
assertFalse(schema.metadata("id").isNullable());