This is an automated email from the ASF dual-hosted git repository. ihuzenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 4b857856b975c2fba87c13417f4ee6fed872cddc Author: Arina Ielchiieva <[email protected]> AuthorDate: Tue Dec 31 15:31:35 2019 +0200 DRILL-7361: Support MAP (DICT) type in schema file closes #1967 --- .../java/org/apache/drill/TestSchemaCommands.java | 11 +- .../record/metadata/schema/parser/SchemaParser.g4 | 33 ++++- .../record/metadata/AbstractMapColumnMetadata.java | 38 ++---- .../exec/record/metadata/DictColumnMetadata.java | 30 ++++- .../exec/record/metadata/MapColumnMetadata.java | 10 +- .../metadata/schema/parser/SchemaVisitor.java | 118 +++++++++++++++-- .../schema/parser/TestParserErrorHandling.java | 25 +++- .../metadata/schema/parser/TestSchemaParser.java | 141 +++++++++++++++++---- pom.xml | 2 +- 9 files changed, 333 insertions(+), 75 deletions(-) diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestSchemaCommands.java b/exec/java-exec/src/test/java/org/apache/drill/TestSchemaCommands.java index b6b9618..748a8fc 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestSchemaCommands.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestSchemaCommands.java @@ -708,8 +708,15 @@ public class TestSchemaCommands extends ClusterTest { String statement = "CREATE OR REPLACE SCHEMA \n" + "(\n" - + "`col1` DATE FORMAT 'yyyy-MM-dd' DEFAULT '-1', \n" - + "`col2` INT NOT NULL FORMAT 'yyyy-MM-dd' PROPERTIES { 'drill.strict' = 'true', 'some_column_prop' = 'some_column_val' }\n" + + "`col_date` DATE FORMAT 'yyyy-MM-dd' DEFAULT '-1', \n" + + "`col_int` INT NOT NULL FORMAT 'yyyy-MM-dd' PROPERTIES { 'drill.strict' = 'true', 'some_column_prop' = 'some_column_val' }, \n" + + "`col_array_int` ARRAY<INT>, \n" + + "`col_nested_array_int` ARRAY<ARRAY<INT>>, \n" + + "`col_map_required` MAP<INT, VARCHAR NOT NULL>, \n" + + "`col_map_optional` MAP<INT, VARCHAR>, \n" + + "`col_map_array` ARRAY<MAP<INT, VARCHAR>>, \n" + + "`col_struct` STRUCT<`s1` INT, `s2` VARCHAR NOT NULL>, \n" + + "`col_struct_array` ARRAY<STRUCT<`s1` INT, `s2` VARCHAR NOT NULL>>\n" + ") \n" + "FOR TABLE dfs.tmp.`table_describe_statement` \n" + "PROPERTIES (\n" diff --git a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4 b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4 index 3be11ca..5bd55a7 100644 --- a/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4 +++ b/exec/vector/src/main/antlr4/org/apache/drill/exec/record/metadata/schema/parser/SchemaParser.g4 @@ -31,7 +31,7 @@ columns: column_def (COMMA column_def)*; column_def: column property_values?; -column: (primitive_column | struct_column | simple_array_column | complex_array_column); +column: (primitive_column | struct_column | map_column | simple_array_column | complex_array_column); primitive_column: column_id simple_type nullability? format_value? default_value?; @@ -39,6 +39,8 @@ simple_array_column: column_id simple_array_type nullability?; struct_column: column_id struct_type nullability?; +map_column: column_id map_type nullability?; + complex_array_column: column_id complex_array_type nullability?; column_id @@ -63,14 +65,37 @@ simple_type | INTERVAL # interval ; -complex_type: (simple_array_type | complex_array_type); +array_type: (simple_array_type | complex_array_type); + +simple_array_type: ARRAY LEFT_ANGLE_BRACKET simple_array_value_type RIGHT_ANGLE_BRACKET; -simple_array_type: ARRAY LEFT_ANGLE_BRACKET (simple_type | struct_type) RIGHT_ANGLE_BRACKET; +simple_array_value_type +: simple_type # array_simple_type_def +| struct_type # array_struct_type_def +| map_type # array_map_type_def +; -complex_array_type: ARRAY LEFT_ANGLE_BRACKET complex_type RIGHT_ANGLE_BRACKET; +complex_array_type: ARRAY LEFT_ANGLE_BRACKET array_type RIGHT_ANGLE_BRACKET; struct_type: STRUCT LEFT_ANGLE_BRACKET columns RIGHT_ANGLE_BRACKET; +map_type: MAP LEFT_ANGLE_BRACKET map_key_type_def COMMA map_value_type_def RIGHT_ANGLE_BRACKET; + +map_key_type_def: map_key_type nullability?; + +map_key_type +: simple_type # map_key_simple_type_def +; + +map_value_type_def: map_value_type nullability?; + +map_value_type +: simple_type # map_value_simple_type_def +| struct_type # map_value_struct_type_def +| map_type # map_value_map_type_def +| array_type # map_value_array_type_def +; + nullability: NOT NULL; format_value: FORMAT string_value; diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java index cecfdd9..0ea2d74 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/AbstractMapColumnMetadata.java @@ -22,8 +22,6 @@ import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.MaterializedField; -import java.util.stream.Collectors; - /** * Describes a base column type for map, dict, repeated map and repeated dict. All are tuples that have a tuple * schema as part of the column definition. @@ -100,43 +98,29 @@ public abstract class AbstractMapColumnMetadata extends AbstractColumnMetadata { @Override public MaterializedField schema() { MaterializedField field = emptySchema(); - for (MaterializedField member : schema.toFieldList()) { - field.addChild(member); - } + schema.toFieldList().forEach(field::addChild); return field; } @Override public MaterializedField emptySchema() { return MaterializedField.create(name, - MajorType.newBuilder() - .setMinorType(type) - .setMode(mode) - .build()); + MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .build()); } @Override public String typeString() { - StringBuilder builder = new StringBuilder(); - if (isArray()) { - builder.append("ARRAY<"); - } - builder.append(getStringType()) - .append("<").append( - tupleSchema().toMetadataList().stream() - .map(ColumnMetadata::columnString) - .collect(Collectors.joining(", ")) - ) - .append(">"); - if (isArray()) { - builder.append(">"); - } - return builder.toString(); + String typeString = internalTypeString(); + return isArray() ? "ARRAY<" + typeString + ">" : typeString; } /** - * Returns string representation of type like {@code "STRUCT"} or {@code "MAP"} - * @return column type + * Returns specific type string representation of the type that extends this class. + * + * @return type string representation */ - protected abstract String getStringType(); + protected abstract String internalTypeString(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java index 66ba63f..09a268a 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/DictColumnMetadata.java @@ -21,6 +21,8 @@ import org.apache.drill.common.types.TypeProtos; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.vector.complex.DictVector; +import java.util.stream.Collectors; + public class DictColumnMetadata extends AbstractMapColumnMetadata { /** @@ -79,8 +81,32 @@ public class DictColumnMetadata extends AbstractMapColumnMetadata { } @Override - protected String getStringType() { - return "MAP"; + protected String internalTypeString() { + StringBuilder builder = new StringBuilder() + .append("MAP<"); + + ColumnMetadata key = keyColumnMetadata(); + ColumnMetadata value = valueColumnMetadata(); + + // sometimes dict key and value are added after creating metadata class, + // and if `typeString` method was called prematurely, for example, in case of error + // add whatever was added in a form of columns with key / value names + if (key == null || value == null) { + builder.append(tupleSchema().toMetadataList().stream() + .map(ColumnMetadata::columnString) + .collect(Collectors.joining(", "))); + } else { + builder.append(key.typeString()) + .append(", ") + .append(value.typeString()); + + if (TypeProtos.DataMode.REQUIRED == value.mode()) { + builder.append(" NOT NULL"); + } + } + + builder.append(">"); + return builder.toString(); } @Override diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java index 1efea91..1883451 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java @@ -21,6 +21,8 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.MaterializedField; +import java.util.stream.Collectors; + /** * Describes a map and repeated map. Both are tuples that have a tuple * schema as part of the column definition. @@ -71,7 +73,11 @@ public class MapColumnMetadata extends AbstractMapColumnMetadata { } @Override - protected String getStringType() { - return "STRUCT"; + protected String internalTypeString() { + return "STRUCT<" + + tupleSchema().toMetadataList().stream() + .map(ColumnMetadata::columnString) + .collect(Collectors.joining(", ")) + + ">"; } } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java index 274b6ef..cd5da79 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/schema/parser/SchemaVisitor.java @@ -22,11 +22,13 @@ import org.apache.drill.common.types.TypeProtos; import org.apache.drill.common.types.Types; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.DictBuilder; import org.apache.drill.exec.record.metadata.MapBuilder; import org.apache.drill.exec.record.metadata.MetadataUtils; import org.apache.drill.exec.record.metadata.RepeatedListBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.record.metadata.TupleSchema; +import org.apache.drill.exec.vector.complex.DictVector; import org.apache.drill.shaded.guava.com.google.common.base.Preconditions; import java.util.LinkedHashMap; @@ -74,11 +76,10 @@ public class SchemaVisitor extends SchemaParserBaseVisitor<TupleMetadata> { } return columnMetadata; } - } /** - * Visits various types of columns (primitive, map, array) and stores their metadata + * Visits various types of columns (primitive, struct, map, array) and stores their metadata * into {@link ColumnMetadata} class. */ public static class ColumnVisitor extends SchemaParserBaseVisitor<ColumnMetadata> { @@ -112,14 +113,20 @@ public class SchemaVisitor extends SchemaParserBaseVisitor<TupleMetadata> { } @Override + public ColumnMetadata visitMap_column(SchemaParser.Map_columnContext ctx) { + String name = ctx.column_id().accept(new IdVisitor()); + // Drill does not distinguish between nullable and not null maps, by default they are not null + return ctx.map_type().accept(new TypeVisitor(name, TypeProtos.DataMode.REQUIRED)); + } + + @Override public ColumnMetadata visitComplex_array_column(SchemaParser.Complex_array_columnContext ctx) { String name = ctx.column_id().accept(new IdVisitor()); - ColumnMetadata child = ctx.complex_array_type().complex_type().accept(new ArrayTypeVisitor(name)); + ColumnMetadata child = ctx.complex_array_type().array_type().accept(new ArrayTypeVisitor(name)); RepeatedListBuilder builder = new RepeatedListBuilder(null, name); builder.addColumn(child); return builder.buildColumn(); } - } /** @@ -156,7 +163,7 @@ public class SchemaVisitor extends SchemaParserBaseVisitor<TupleMetadata> { } /** - * Visits simple and map types, storing their metadata into {@link ColumnMetadata} holder. + * Visits simple, struct and map types and stores their metadata into {@link ColumnMetadata} holder. */ private static class TypeVisitor extends SchemaParserBaseVisitor<ColumnMetadata> { @@ -273,8 +280,7 @@ public class SchemaVisitor extends SchemaParserBaseVisitor<TupleMetadata> { @Override public ColumnMetadata visitStruct_type(SchemaParser.Struct_typeContext ctx) { - // internally Drill refers to structs as maps and currently does not have true map notion - // Drill maps will be renamed to structs in future + // internally Drill refers to structs as maps MapBuilder builder = new MapBuilder(null, name, mode); ColumnDefVisitor visitor = new ColumnDefVisitor(); ctx.columns().column_def().forEach( @@ -283,15 +289,78 @@ public class SchemaVisitor extends SchemaParserBaseVisitor<TupleMetadata> { return builder.buildColumn(); } + @Override + public ColumnMetadata visitMap_type(SchemaParser.Map_typeContext ctx) { + // internally Drill refers to maps as dicts + DictBuilder builder = new DictBuilder(null, name, mode); + builder.key(ctx.map_key_type_def().map_key_type().accept(MapKeyTypeVisitor.INSTANCE)); + + SchemaParser.Map_value_type_defContext valueDef = ctx.map_value_type_def(); + TypeProtos.DataMode valueMode = valueDef.nullability() == null ? TypeProtos.DataMode.OPTIONAL : TypeProtos.DataMode.REQUIRED; + builder.addColumn(valueDef.map_value_type().accept(new MapValueTypeVisitor(valueMode))); + return builder.buildColumn(); + } + private ColumnMetadata constructColumn(TypeProtos.MajorType type) { MaterializedField field = MaterializedField.create(name, type); return MetadataUtils.fromField(field); } + } + + /** + * Visits map key type and returns its {@link TypeProtos.MajorType} definition. + */ + private static class MapKeyTypeVisitor extends SchemaParserBaseVisitor<TypeProtos.MajorType> { + + // map key is always required + private static final TypeVisitor KEY_VISITOR = new TypeVisitor(DictVector.FIELD_KEY_NAME, TypeProtos.DataMode.REQUIRED); + + static final MapKeyTypeVisitor INSTANCE = new MapKeyTypeVisitor(); + + @Override + public TypeProtos.MajorType visitMap_key_simple_type_def(SchemaParser.Map_key_simple_type_defContext ctx) { + return ctx.simple_type().accept(KEY_VISITOR).majorType(); + } + } + + /** + * Visits map value type and stores its metadata into {@link ColumnMetadata} holder. + */ + private static class MapValueTypeVisitor extends SchemaParserBaseVisitor<ColumnMetadata> { + + private final TypeProtos.DataMode mode; + + MapValueTypeVisitor (TypeProtos.DataMode mode) { + this.mode = mode; + } + @Override + public ColumnMetadata visitMap_value_simple_type_def(SchemaParser.Map_value_simple_type_defContext ctx) { + return ctx.simple_type().accept(new TypeVisitor(DictVector.FIELD_VALUE_NAME, mode)); + } + + @Override + public ColumnMetadata visitMap_value_struct_type_def(SchemaParser.Map_value_struct_type_defContext ctx) { + // Drill does not distinguish between nullable and not null structs, by default they are not null + TypeProtos.DataMode structMode = TypeProtos.DataMode.REPEATED == mode ? mode : TypeProtos.DataMode.REQUIRED; + return ctx.struct_type().accept(new TypeVisitor(DictVector.FIELD_VALUE_NAME, structMode)); + } + + @Override + public ColumnMetadata visitMap_value_map_type_def(SchemaParser.Map_value_map_type_defContext ctx) { + // Drill does not distinguish between nullable and not null maps, by default they are not null + TypeProtos.DataMode mapMode = TypeProtos.DataMode.REPEATED == mode ? mode : TypeProtos.DataMode.REQUIRED; + return ctx.map_type().accept(new TypeVisitor(DictVector.FIELD_VALUE_NAME, mapMode)); + } + + @Override + public ColumnMetadata visitMap_value_array_type_def(SchemaParser.Map_value_array_type_defContext ctx) { + return ctx.array_type().accept(new ArrayTypeVisitor(DictVector.FIELD_VALUE_NAME)); + } } /** - * Visits array type: simple (which has only on nested element: array<int>) + * Visits array type: simple (which has only one nested element: array<int>) * or complex (which has several nested elements: array<int<int>>). */ private static class ArrayTypeVisitor extends SchemaParserBaseVisitor<ColumnMetadata> { @@ -304,20 +373,47 @@ public class SchemaVisitor extends SchemaParserBaseVisitor<TupleMetadata> { @Override public ColumnMetadata visitSimple_array_type(SchemaParser.Simple_array_typeContext ctx) { - TypeVisitor visitor = new TypeVisitor(name, TypeProtos.DataMode.REPEATED); - return ctx.struct_type() == null ? ctx.simple_type().accept(visitor) : ctx.struct_type().accept(visitor); + SimpleArrayValueTypeVisitor visitor = new SimpleArrayValueTypeVisitor(name); + return ctx.simple_array_value_type().accept(visitor); } @Override public ColumnMetadata visitComplex_array_type(SchemaParser.Complex_array_typeContext ctx) { RepeatedListBuilder childBuilder = new RepeatedListBuilder(null, name); - ColumnMetadata child = ctx.complex_type().accept(new ArrayTypeVisitor(name)); + ColumnMetadata child = ctx.array_type().accept(new ArrayTypeVisitor(name)); childBuilder.addColumn(child); return childBuilder.buildColumn(); } } /** + * Visits simple array value type and stores its metadata into {@link ColumnMetadata} holder. + */ + private static class SimpleArrayValueTypeVisitor extends SchemaParserBaseVisitor<ColumnMetadata> { + + private final TypeVisitor typeVisitor; + + SimpleArrayValueTypeVisitor(String name) { + this.typeVisitor = new TypeVisitor(name, TypeProtos.DataMode.REPEATED); + } + + @Override + public ColumnMetadata visitArray_simple_type_def(SchemaParser.Array_simple_type_defContext ctx) { + return ctx.simple_type().accept(typeVisitor); + } + + @Override + public ColumnMetadata visitArray_struct_type_def(SchemaParser.Array_struct_type_defContext ctx) { + return ctx.struct_type().accept(typeVisitor); + } + + @Override + public ColumnMetadata visitArray_map_type_def(SchemaParser.Array_map_type_defContext ctx) { + return ctx.map_type().accept(typeVisitor); + } + } + + /** * Visits schema or column properties. * Properties must be identified as key values pairs separated by equals sign. * Properties pairs must be separated by comma. diff --git a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestParserErrorHandling.java b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestParserErrorHandling.java index 1d5a84b..e9e7966 100644 --- a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestParserErrorHandling.java +++ b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestParserErrorHandling.java @@ -122,6 +122,30 @@ public class TestParserErrorHandling extends BaseTest { } @Test + public void testIncorrectMapKeyType() throws Exception { + String schema = "col map<array<int>, varchar>"; + thrown.expect(IOException.class); + thrown.expectMessage("mismatched input 'array' expecting {'INT', 'INTEGER',"); + SchemaExprParser.parseSchema(schema); + } + + @Test + public void testMapKeyWithName() throws Exception { + String schema = "col map<`key` int, `value` varchar>"; + thrown.expect(IOException.class); + thrown.expectMessage("extraneous input '`key`' expecting {'INT', 'INTEGER',"); + SchemaExprParser.parseSchema(schema); + } + + @Test + public void testMapMissingComma() throws Exception { + String schema = "col map<int varchar>"; + thrown.expect(IOException.class); + thrown.expectMessage("missing ',' at 'varchar'"); + SchemaExprParser.parseSchema(schema); + } + + @Test public void testMissingNotBeforeNull() throws Exception { String schema = "col int null"; thrown.expect(IOException.class); @@ -152,5 +176,4 @@ public class TestParserErrorHandling extends BaseTest { thrown.expectMessage("extraneous input '2' expecting ')'"); SchemaExprParser.parseSchema(schema); } - } diff --git a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java index 2e41986..a0acf10 100644 --- a/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java +++ b/exec/vector/src/test/java/org/apache/drill/exec/record/metadata/schema/parser/TestSchemaParser.java @@ -19,6 +19,7 @@ package org.apache.drill.exec.record.metadata.schema.parser; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.DictColumnMetadata; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.BaseTest; @@ -191,23 +192,33 @@ public class TestSchemaParser extends BaseTest { .addArray(TypeProtos.MinorType.INT) .resumeSchema() .addMapArray("struct_array") - .addNullable("m1", TypeProtos.MinorType.INT) - .addNullable("m2", TypeProtos.MinorType.VARCHAR) + .addNullable("s1", TypeProtos.MinorType.INT) + .addNullable("s2", TypeProtos.MinorType.VARCHAR) .resumeSchema() .addRepeatedList("nested_array_struct") .addMapArray() - .addNullable("nm1", TypeProtos.MinorType.INT) - .addNullable("nm2", TypeProtos.MinorType.VARCHAR) + .addNullable("ns1", TypeProtos.MinorType.INT) + .addNullable("ns2", TypeProtos.MinorType.VARCHAR) + .resumeList() + .resumeSchema() + .addDictArray("map_array", TypeProtos.MinorType.VARCHAR) + .nullableValue(TypeProtos.MinorType.INT) + .resumeSchema() + .addRepeatedList("nested_map_array") + .addDictArray() + .key(TypeProtos.MinorType.VARCHAR) + .nullableValue(TypeProtos.MinorType.INT) .resumeList() .resumeSchema() .buildSchema(); checkSchema("simple_array array<int>" + ", nested_array array<array<int>>" - + ", struct_array array<struct<m1 int, m2 varchar>>" - + ", nested_array_struct array<array<struct<nm1 int, nm2 varchar>>>", + + ", struct_array array<struct<s1 int, s2 varchar>>" + + ", nested_array_struct array<array<struct<ns1 int, ns2 varchar>>>" + + ", map_array array<map<varchar, int>>" + + ", nested_map_array array<array<map<varchar, int>>>", schema); - } @Test @@ -217,13 +228,66 @@ public class TestSchemaParser extends BaseTest { .addNullable("int_col", TypeProtos.MinorType.INT) .addArray("array_col", TypeProtos.MinorType.INT) .addMap("nested_struct") - .addNullable("m1", TypeProtos.MinorType.INT) - .addNullable("m2", TypeProtos.MinorType.VARCHAR) + .addNullable("s1", TypeProtos.MinorType.INT) + .addNullable("s2", TypeProtos.MinorType.VARCHAR) + .resumeMap() + .addDict("map_col", TypeProtos.MinorType.VARCHAR) + .nullableValue(TypeProtos.MinorType.INT) .resumeMap() .resumeSchema() .buildSchema(); - checkSchema("struct_col struct<int_col int, array_col array<int>, nested_struct struct<m1 int, m2 varchar>>", schema); + checkSchema("struct_col struct<int_col int" + + ", array_col array<int>" + + ", nested_struct struct<s1 int, s2 varchar>" + + ", map_col map<varchar, int>" + + ">", schema); + } + + @Test + public void testMap() throws Exception { + TupleMetadata schema = new SchemaBuilder() + .addDict("dict_col_simple", TypeProtos.MinorType.VARCHAR) + .nullableValue(TypeProtos.MinorType.INT) + .resumeSchema() + .addDict("dict_col_simple_ps", TypeProtos.MajorType.newBuilder() + .setMinorType(TypeProtos.MinorType.VARCHAR) + .setPrecision(50) + .setMode(TypeProtos.DataMode.REQUIRED) + .build()) + .value(TypeProtos.MajorType.newBuilder() + .setMinorType(TypeProtos.MinorType.VARDECIMAL) + .setPrecision(10) + .setScale(2) + .setMode(TypeProtos.DataMode.REQUIRED) + .build()) + .resumeSchema() + .addDict("dict_col_struct", TypeProtos.MinorType.INT) + .mapValue() + .add("sb", TypeProtos.MinorType.BIT) + .addNullable("si", TypeProtos.MinorType.INT) + .resumeDict() + .resumeSchema() + .addDict("dict_col_dict", TypeProtos.MinorType.VARCHAR) + .dictValue() + .key(TypeProtos.MinorType.INT) + .nullableValue(TypeProtos.MinorType.BIT) + .resumeDict() + .resumeSchema() + .addDict("dict_col_array", TypeProtos.MinorType.BIGINT) + .dictArrayValue() + .key(TypeProtos.MinorType.DATE) + .nullableValue(TypeProtos.MinorType.FLOAT8) + .resumeDict() + .resumeSchema() + .buildSchema(); + + checkSchema("dict_col_simple map<varchar, int>" + + ", dict_col_simple_ps map<varchar(50), decimal(10, 2) not null>" + + ", dict_col_struct map<int, struct<sb boolean not null, si int>>" + + ", dict_col_dict map<varchar, map<int, boolean>>" + + ", dict_col_array map<bigint, array<map<date, double>>>", + schema); } @Test @@ -235,20 +299,41 @@ public class TestSchemaParser extends BaseTest { @Test public void testModeForStructType() throws Exception { - TupleMetadata schema = SchemaExprParser.parseSchema("m struct<m1 int not null, m2 varchar>"); - ColumnMetadata map = schema.metadata("m"); - assertTrue(map.isMap()); - assertEquals(TypeProtos.DataMode.REQUIRED, map.mode()); - - TupleMetadata mapSchema = map.tupleSchema(); - assertFalse(mapSchema.metadata("m1").isNullable()); - assertTrue(mapSchema.metadata("m2").isNullable()); + TupleMetadata schema = SchemaExprParser.parseSchema("s struct<s1 int not null, s2 varchar>"); + ColumnMetadata struct = schema.metadata("s"); + assertTrue(struct.isMap()); + assertEquals(TypeProtos.DataMode.REQUIRED, struct.mode()); + + TupleMetadata mapSchema = struct.tupleSchema(); + assertFalse(mapSchema.metadata("s1").isNullable()); + assertTrue(mapSchema.metadata("s2").isNullable()); + } + + @Test + public void testModeForMapType() throws Exception { + TupleMetadata schema = SchemaExprParser.parseSchema("m1 map<varchar, int>, m2 map<varchar not null, int not null>"); + + ColumnMetadata mapOptional = schema.metadata("m1"); + assertTrue(mapOptional.isDict()); + assertEquals(TypeProtos.DataMode.REQUIRED, mapOptional.mode()); + DictColumnMetadata dictOptional = (DictColumnMetadata) mapOptional; + assertEquals(TypeProtos.DataMode.REQUIRED, dictOptional.keyColumnMetadata().mode()); + assertEquals(TypeProtos.DataMode.OPTIONAL, dictOptional.valueColumnMetadata().mode()); + + ColumnMetadata mapRequired = schema.metadata("m2"); + assertTrue(mapRequired.isDict()); + assertEquals(TypeProtos.DataMode.REQUIRED, mapRequired.mode()); + DictColumnMetadata dictRequired = (DictColumnMetadata) mapRequired; + assertEquals(TypeProtos.DataMode.REQUIRED, dictRequired.keyColumnMetadata().mode()); + assertEquals(TypeProtos.DataMode.REQUIRED, dictRequired.valueColumnMetadata().mode()); } @Test public void testModeForRepeatedType() throws Exception { - TupleMetadata schema = SchemaExprParser.parseSchema( - "a array<int>, aa array<array<int>>, ma array<struct<m1 int not null, m2 varchar>>"); + TupleMetadata schema = SchemaExprParser.parseSchema("a array<int>" + + ", aa array<array<int>>" + + ", sa array<struct<s1 int not null, s2 varchar>>" + + ", ma array<map<varchar, array<int>>>"); assertTrue(schema.metadata("a").isArray()); @@ -256,12 +341,19 @@ public class TestSchemaParser extends BaseTest { assertTrue(nestedArray.isArray()); assertTrue(nestedArray.childSchema().isArray()); + ColumnMetadata structArray = schema.metadata("sa"); + assertTrue(structArray.isArray()); + assertTrue(structArray.isMap()); + TupleMetadata structSchema = structArray.tupleSchema(); + assertFalse(structSchema.metadata("s1").isNullable()); + assertTrue(structSchema.metadata("s2").isNullable()); + ColumnMetadata mapArray = schema.metadata("ma"); assertTrue(mapArray.isArray()); - assertTrue(mapArray.isMap()); - TupleMetadata mapSchema = mapArray.tupleSchema(); - assertFalse(mapSchema.metadata("m1").isNullable()); - assertTrue(mapSchema.metadata("m2").isNullable()); + assertTrue(mapArray.isDict()); + DictColumnMetadata dictMetadata = (DictColumnMetadata) mapArray; + assertFalse(dictMetadata.keyColumnMetadata().isNullable()); + assertTrue(dictMetadata.valueColumnMetadata().isArray()); } @Test @@ -360,5 +452,4 @@ public class TestSchemaParser extends BaseTest { } ); } - } diff --git a/pom.xml b/pom.xml index 6ac0cb7..f26c218 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,7 @@ <rat.skip>true</rat.skip> <license.skip>true</license.skip> <docker.repository>apache/drill</docker.repository> - <antlr.version>4.7.2</antlr.version> + <antlr.version>4.8-1</antlr.version> <lowestMavenVersion>3.3.3</lowestMavenVersion> <commons.net.version>3.6</commons.net.version> <commons.validator.version>1.6</commons.validator.version>
