DRILL-6114: Metadata revisions Support for union vectors, list vectors, repeated list vectors. Refactored metadata classes.
closes #1112 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/cf2478f7 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/cf2478f7 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/cf2478f7 Branch: refs/heads/master Commit: cf2478f7a48e66cbb0a7f29750c8a4360a271e9b Parents: 69a5f3a Author: Paul Rogers <[email protected]> Authored: Mon Feb 5 20:18:18 2018 -0800 Committer: Arina Ielchiieva <[email protected]> Committed: Sat Mar 3 19:47:55 2018 +0200 ---------------------------------------------------------------------- .../exec/physical/rowSet/ResultSetLoader.java | 2 +- .../exec/physical/rowSet/impl/ColumnState.java | 2 +- .../rowSet/impl/NullableVectorState.java | 2 +- .../physical/rowSet/impl/OptionBuilder.java | 2 +- .../rowSet/impl/RepeatedVectorState.java | 2 +- .../rowSet/impl/ResultSetLoaderImpl.java | 2 +- .../physical/rowSet/impl/RowSetLoaderImpl.java | 2 +- .../physical/rowSet/impl/SingleVectorState.java | 2 +- .../exec/physical/rowSet/impl/TupleState.java | 11 +- .../rowSet/impl/VectorContainerBuilder.java | 6 +- .../physical/rowSet/model/BaseTupleModel.java | 8 +- .../physical/rowSet/model/MetadataProvider.java | 6 +- .../physical/rowSet/model/SchemaInference.java | 15 +- .../exec/physical/rowSet/model/TupleModel.java | 6 +- .../model/single/BuildVectorsFromMetadata.java | 4 +- .../rowSet/model/single/VectorAllocator.java | 4 +- .../apache/drill/exec/record/TupleSchema.java | 534 ------------------- .../record/metadata/AbstractColumnMetadata.java | 206 +++++++ .../exec/record/metadata/MapColumnMetadata.java | 126 +++++ .../exec/record/metadata/MetadataUtils.java | 165 ++++++ .../metadata/PrimitiveColumnMetadata.java | 131 +++++ .../metadata/RepeatedListColumnMetadata.java | 100 ++++ .../drill/exec/record/metadata/TupleSchema.java | 196 +++++++ .../record/metadata/VariantColumnMetadata.java | 136 +++++ .../exec/record/metadata/VariantSchema.java | 210 ++++++++ .../physical/impl/xsort/TestExternalSort.java | 20 +- .../impl/xsort/managed/SortTestUtilities.java | 2 +- .../impl/TestResultSetLoaderMapArray.java | 2 +- .../rowSet/impl/TestResultSetLoaderMaps.java | 2 +- .../impl/TestResultSetLoaderOmittedValues.java | 2 +- .../impl/TestResultSetLoaderOverflow.java | 2 +- .../impl/TestResultSetLoaderProjection.java | 4 +- .../impl/TestResultSetLoaderProtocol.java | 57 +- .../rowSet/impl/TestResultSetLoaderTorture.java | 2 +- .../drill/exec/physical/unit/TestMiniPlan.java | 10 +- .../drill/exec/record/TestTupleSchema.java | 264 +++++++-- .../exec/store/easy/text/compliant/TestCsv.java | 3 +- .../org/apache/drill/test/DrillTestWrapper.java | 6 +- .../org/apache/drill/test/OperatorFixture.java | 17 +- .../drill/test/rowSet/AbstractRowSet.java | 2 +- .../drill/test/rowSet/AbstractSingleRowSet.java | 2 +- .../apache/drill/test/rowSet/DirectRowSet.java | 8 +- .../drill/test/rowSet/HyperRowSetImpl.java | 2 +- .../org/apache/drill/test/rowSet/RowSet.java | 2 +- .../apache/drill/test/rowSet/RowSetBuilder.java | 6 +- .../apache/drill/test/rowSet/RowSetPrinter.java | 4 +- .../drill/test/rowSet/RowSetReaderImpl.java | 2 +- .../drill/test/rowSet/RowSetWriterImpl.java | 2 +- .../apache/drill/test/rowSet/SchemaBuilder.java | 11 +- .../drill/test/rowSet/test/DummyWriterTest.java | 2 +- .../drill/test/rowSet/test/PerformanceTool.java | 8 +- .../drill/test/rowSet/test/RowSetTest.java | 2 +- .../drill/test/rowSet/test/TestFillEmpties.java | 2 +- .../drill/exec/record/ColumnMetadata.java | 114 ---- .../apache/drill/exec/record/TupleMetadata.java | 88 --- .../drill/exec/record/TupleNameSpace.java | 89 ---- .../exec/record/metadata/ColumnMetadata.java | 196 +++++++ .../exec/record/metadata/ProjectionType.java | 27 + .../exec/record/metadata/TupleMetadata.java | 90 ++++ .../exec/record/metadata/TupleNameSpace.java | 89 ++++ .../exec/record/metadata/VariantMetadata.java | 177 ++++++ .../exec/vector/accessor/ObjectWriter.java | 2 +- .../drill/exec/vector/accessor/TupleReader.java | 2 +- .../drill/exec/vector/accessor/TupleWriter.java | 4 +- .../accessor/reader/AbstractTupleReader.java | 2 +- .../exec/vector/accessor/reader/MapReader.java | 2 +- .../accessor/writer/AbstractArrayWriter.java | 2 +- .../accessor/writer/AbstractObjectWriter.java | 2 +- .../accessor/writer/AbstractScalarWriter.java | 2 +- .../accessor/writer/AbstractTupleWriter.java | 4 +- .../accessor/writer/ColumnWriterFactory.java | 2 +- .../exec/vector/accessor/writer/MapWriter.java | 2 +- .../accessor/writer/NullableScalarWriter.java | 2 +- .../accessor/writer/ScalarArrayWriter.java | 2 +- 74 files changed, 2221 insertions(+), 1007 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java index a4b260b..0bfb948 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java @@ -17,8 +17,8 @@ */ package org.apache.drill.exec.physical.rowSet; -import org.apache.drill.exec.record.TupleMetadata; import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.vector.BaseValueVector; import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java index f3626d9..33d3ffe 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java @@ -22,7 +22,7 @@ import java.util.ArrayList; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.OffsetVectorState; import org.apache.drill.exec.physical.rowSet.impl.TupleState.MapState; -import org.apache.drill.exec.record.ColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.vector.UInt4Vector; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java index bf91032..7e8080f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java @@ -18,7 +18,7 @@ package org.apache.drill.exec.physical.rowSet.impl; import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.ValuesVectorState; -import org.apache.drill.exec.record.ColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.vector.FixedWidthVector; import org.apache.drill.exec.vector.NullableVector; import org.apache.drill.exec.vector.ValueVector; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java index a743052..fa62a05 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java @@ -22,7 +22,7 @@ import java.util.Collection; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.physical.rowSet.ResultVectorCache; import org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions; -import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.vector.BaseValueVector; import org.apache.drill.exec.vector.ValueVector; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java index 98b6beb..9bd1ef2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java @@ -19,7 +19,7 @@ package org.apache.drill.exec.physical.rowSet.impl; import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.OffsetVectorState; import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.ValuesVectorState; -import org.apache.drill.exec.record.ColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter; import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java index b875e7e..cc50729 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java @@ -26,8 +26,8 @@ import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.ResultVectorCache; import org.apache.drill.exec.physical.rowSet.RowSetLoader; import org.apache.drill.exec.physical.rowSet.impl.TupleState.RowState; -import org.apache.drill.exec.record.TupleMetadata; import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java index ec61ae7..dca749c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.RowSetLoader; -import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java index f6bc5f3..e813a70 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java @@ -18,7 +18,7 @@ package org.apache.drill.exec.physical.rowSet.impl; import org.apache.drill.exec.expr.TypeHelper; -import org.apache.drill.exec.record.ColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; import org.apache.drill.exec.vector.FixedWidthVector; import org.apache.drill.exec.vector.UInt4Vector; import org.apache.drill.exec.vector.ValueVector; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java index de41ee4..82f0437 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java @@ -23,11 +23,12 @@ import java.util.List; import org.apache.drill.exec.physical.rowSet.impl.ColumnState.BaseMapColumnState; import org.apache.drill.exec.physical.rowSet.impl.ColumnState.MapArrayColumnState; import org.apache.drill.exec.physical.rowSet.impl.ColumnState.MapColumnState; -import org.apache.drill.exec.record.ColumnMetadata; import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.TupleMetadata; -import org.apache.drill.exec.record.TupleSchema; -import org.apache.drill.exec.record.TupleSchema.AbstractColumnMetadata; +import org.apache.drill.exec.record.metadata.AbstractColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.MetadataUtils; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.ObjectType; import org.apache.drill.exec.vector.accessor.ObjectWriter; @@ -182,7 +183,7 @@ public abstract class TupleState implements TupleWriterListener { @Override public ObjectWriter addColumn(TupleWriter tupleWriter, MaterializedField column) { - return addColumn(tupleWriter, TupleSchema.fromField(column)); + return addColumn(tupleWriter, MetadataUtils.fromField(column)); } @Override http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java index faa68cb..7411176 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java @@ -22,9 +22,9 @@ import java.util.List; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.physical.rowSet.impl.ColumnState.BaseMapColumnState; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; -import org.apache.drill.exec.record.ColumnMetadata; -import org.apache.drill.exec.record.TupleMetadata; -import org.apache.drill.exec.record.TupleSchema; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.vector.UInt4Vector; import org.apache.drill.exec.vector.ValueVector; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java index 40da4ec..ff2481f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java @@ -20,10 +20,10 @@ package org.apache.drill.exec.physical.rowSet.model; import java.util.ArrayList; import java.util.List; -import org.apache.drill.exec.record.ColumnMetadata; -import org.apache.drill.exec.record.TupleMetadata; -import org.apache.drill.exec.record.TupleSchema; -import org.apache.drill.exec.record.TupleSchema.AbstractColumnMetadata; +import org.apache.drill.exec.record.metadata.AbstractColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; import org.apache.drill.exec.record.VectorContainer; /** http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java index bb5e18e..27e3c3d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java @@ -17,10 +17,10 @@ */ package org.apache.drill.exec.physical.rowSet.model; -import org.apache.drill.exec.record.ColumnMetadata; import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.TupleMetadata; -import org.apache.drill.exec.record.TupleSchema; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; /** * Interface for retrieving and/or creating metadata given http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java index 3db01dd..9096ec2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java @@ -21,11 +21,12 @@ import java.util.ArrayList; import java.util.List; import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.record.ColumnMetadata; import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.TupleMetadata; -import org.apache.drill.exec.record.TupleSchema; import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.MetadataUtils; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; /** * Produce a metadata schema from a vector container. Used when given a @@ -40,14 +41,14 @@ public class SchemaInference { MaterializedField field = container.getValueVector(i).getField(); columns.add(inferVector(field)); } - return TupleSchema.fromColumns(columns); + return MetadataUtils.fromColumns(columns); } private ColumnMetadata inferVector(MaterializedField field) { if (field.getType().getMinorType() == MinorType.MAP) { - return TupleSchema.newMap(field, inferMapSchema(field)); + return MetadataUtils.newMap(field, inferMapSchema(field)); } else { - return TupleSchema.fromField(field); + return MetadataUtils.fromField(field); } } @@ -56,6 +57,6 @@ public class SchemaInference { for (MaterializedField child : field.getChildren()) { columns.add(inferVector(child)); } - return TupleSchema.fromColumns(columns); + return MetadataUtils.fromColumns(columns); } } http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java index 5fcba73..124119d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java @@ -19,11 +19,11 @@ package org.apache.drill.exec.physical.rowSet.model; import javax.sql.RowSet; -import org.apache.drill.exec.record.ColumnMetadata; import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.TupleMetadata; -import org.apache.drill.exec.record.TupleSchema; import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; +import org.apache.drill.exec.record.metadata.TupleSchema; import org.apache.drill.exec.vector.complex.AbstractMapVector; /** http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java index 30f60b3..50568db 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java @@ -20,9 +20,9 @@ package org.apache.drill.exec.physical.rowSet.model.single; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; -import org.apache.drill.exec.record.ColumnMetadata; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.TupleMetadata; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.complex.AbstractMapVector; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java index e29a5cb..f4fc5d4 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java @@ -23,9 +23,9 @@ import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataCreator; import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval; -import org.apache.drill.exec.record.ColumnMetadata; -import org.apache.drill.exec.record.TupleMetadata; import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.metadata.ColumnMetadata; +import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.vector.AllocationHelper; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.complex.AbstractMapVector; http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java deleted file mode 100644 index 27a88f0..0000000 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java +++ /dev/null @@ -1,534 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.record; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.drill.common.types.TypeProtos.DataMode; -import org.apache.drill.common.types.TypeProtos.MajorType; -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.expr.TypeHelper; -import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; - -/** - * Defines the schema of a tuple: either the top-level row or a nested - * "map" (really structure). A schema is a collection of columns (backed - * by vectors in the loader itself.) Columns are accessible by name or - * index. New columns may be added at any time; the new column takes the - * next available index. - */ - -public class TupleSchema implements TupleMetadata { - - /** - * Abstract definition of column metadata. Allows applications to create - * specialized forms of a column metadata object by extending from this - * abstract class. - * <p> - * Note that, by design, primitive columns do not have a link to their - * tuple parent, or their index within that parent. This allows the same - * metadata to be shared between two views of a tuple, perhaps physical - * and projected views. This restriction does not apply to map columns, - * since maps (and the row itself) will, by definition, differ between - * the two views. - */ - - public static abstract class AbstractColumnMetadata implements ColumnMetadata { - - protected MaterializedField schema; - protected boolean projected = true; - - /** - * Predicted number of elements per array entry. Default is - * taken from the often hard-coded value of 10. - */ - - protected int expectedElementCount = 1; - - public AbstractColumnMetadata(MaterializedField schema) { - this.schema = schema; - if (isArray()) { - expectedElementCount = DEFAULT_ARRAY_SIZE; - } - } - - public AbstractColumnMetadata(AbstractColumnMetadata from) { - schema = from.schema; - expectedElementCount = from.expectedElementCount; - } - - protected void bind(TupleSchema parentTuple) { } - - @Override - public MaterializedField schema() { return schema; } - - public void replaceField(MaterializedField field) { - this.schema = field; - } - @Override - public String name() { return schema().getName(); } - - @Override - public MajorType majorType() { return schema().getType(); } - - @Override - public MinorType type() { return schema().getType().getMinorType(); } - - @Override - public DataMode mode() { return schema().getDataMode(); } - - @Override - public boolean isNullable() { return mode() == DataMode.OPTIONAL; } - - @Override - public boolean isArray() { return mode() == DataMode.REPEATED; } - - @Override - public boolean isList() { return false; } - - @Override - public boolean isVariableWidth() { - MinorType type = type(); - return type == MinorType.VARCHAR || type == MinorType.VAR16CHAR || type == MinorType.VARBINARY; - } - - @Override - public boolean isEquivalent(ColumnMetadata other) { - return schema().isEquivalent(other.schema()); - } - - @Override - public int expectedWidth() { return 0; } - - @Override - public void setExpectedWidth(int width) { } - - @Override - public void setExpectedElementCount(int childCount) { - // The allocation utilities don't like an array size of zero, so set to - // 1 as the minimum. Adjusted to avoid trivial errors if the caller - // makes an error. - - if (isArray()) { - expectedElementCount = Math.max(1, childCount); - } - } - - @Override - public int expectedElementCount() { return expectedElementCount; } - - @Override - public void setProjected(boolean projected) { - this.projected = projected; - } - - @Override - public boolean isProjected() { return projected; } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder() - .append("[") - .append(getClass().getSimpleName()) - .append(" ") - .append(schema().toString()) - .append(",") - .append(projected ? "" : "not ") - .append("projected"); - if (isArray()) { - buf.append(", cardinality: ") - .append(expectedElementCount); - } - return buf - .append("]") - .toString(); - } - - public abstract AbstractColumnMetadata copy(); - } - - /** - * Primitive (non-map) column. Describes non-nullable, nullable and - * array types (which differ only in mode, but not in metadata structure.) - */ - - public static class PrimitiveColumnMetadata extends AbstractColumnMetadata { - - protected int expectedWidth; - - public PrimitiveColumnMetadata(MaterializedField schema) { - super(schema); - expectedWidth = TypeHelper.getSize(majorType()); - if (isVariableWidth()) { - - // The above getSize() method uses the deprecated getWidth() - // method to get the expected VarChar size. If zero (which - // it will be), try the revised precision field. - - int precision = majorType().getPrecision(); - if (precision > 0) { - expectedWidth = precision; - } else { - // TypeHelper includes the offset vector width - - expectedWidth = expectedWidth - 4; - } - } - } - - public PrimitiveColumnMetadata(PrimitiveColumnMetadata from) { - super(from); - expectedWidth = from.expectedWidth; - } - - @Override - public AbstractColumnMetadata copy() { - return new PrimitiveColumnMetadata(this); - } - - @Override - public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.PRIMITIVE; } - - @Override - public TupleMetadata mapSchema() { return null; } - - @Override - public boolean isMap() { return false; } - - @Override - public int expectedWidth() { return expectedWidth; } - - @Override - public void setExpectedWidth(int width) { - // The allocation utilities don't like a width of zero, so set to - // 1 as the minimum. Adjusted to avoid trivial errors if the caller - // makes an error. - - if (isVariableWidth()) { - expectedWidth = Math.max(1, width); - } - } - - @Override - public ColumnMetadata cloneEmpty() { - return new PrimitiveColumnMetadata(this); - } - } - - /** - * Describes a map and repeated map. Both are tuples that have a tuple - * schema as part of the column definition. - */ - - public static class MapColumnMetadata extends AbstractColumnMetadata { - private TupleMetadata parentTuple; - private final TupleSchema mapSchema; - - /** - * Build a new map column from the field provided - * - * @param schema materialized field description of the map - */ - - public MapColumnMetadata(MaterializedField schema) { - this(schema, null); - } - - /** - * Build a map column metadata by cloning the type information (but not - * the children) of the materialized field provided. Use the hints - * provided. - * - * @param schema the schema to use - * @param hints metadata hints for this column - */ - - private MapColumnMetadata(MaterializedField schema, TupleSchema mapSchema) { - super(schema); - if (mapSchema == null) { - this.mapSchema = new TupleSchema(); - } else { - this.mapSchema = mapSchema; - } - this.mapSchema.bind(this); - } - - @Override - public AbstractColumnMetadata copy() { - return new MapColumnMetadata(schema, (TupleSchema) mapSchema.copy()); - } - - @Override - protected void bind(TupleSchema parentTuple) { - this.parentTuple = parentTuple; - } - - @Override - public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.TUPLE; } - - @Override - public TupleMetadata mapSchema() { return mapSchema; } - - @Override - public int expectedWidth() { return 0; } - - @Override - public boolean isMap() { return true; } - - public TupleMetadata parentTuple() { return parentTuple; } - - public TupleSchema mapSchemaImpl() { return mapSchema; } - - @Override - public ColumnMetadata cloneEmpty() { - return new MapColumnMetadata(schema().cloneEmpty(), null); - } - } - - private MapColumnMetadata parentMap; - private final TupleNameSpace<ColumnMetadata> nameSpace = new TupleNameSpace<>(); - - public void bind(MapColumnMetadata parentMap) { - this.parentMap = parentMap; - } - - public static TupleSchema fromFields(Iterable<MaterializedField> fields) { - TupleSchema tuple = new TupleSchema(); - for (MaterializedField field : fields) { - tuple.add(field); - } - return tuple; - } - - public TupleMetadata copy() { - TupleMetadata tuple = new TupleSchema(); - for (ColumnMetadata md : this) { - tuple.addColumn(((AbstractColumnMetadata) md).copy()); - } - return tuple; - } - - /** - * Create a column metadata object that holds the given - * {@link MaterializedField}. The type of the object will be either a - * primitive or map column, depending on the field's type. - * - * @param field the materialized field to wrap - * @return the column metadata that wraps the field - */ - - public static AbstractColumnMetadata fromField(MaterializedField field) { - if (field.getType().getMinorType() == MinorType.MAP) { - return newMap(field); - } else { - return new PrimitiveColumnMetadata(field); - } - } - - public static AbstractColumnMetadata fromView(MaterializedField field) { - if (field.getType().getMinorType() == MinorType.MAP) { - return new MapColumnMetadata(field, null); - } else { - return new PrimitiveColumnMetadata(field); - } - } - - /** - * Create a tuple given the list of columns that make up the tuple. - * Creates nested maps as needed. - * - * @param columns list of columns that make up the tuple - * @return a tuple metadata object that contains the columns - */ - - public static TupleSchema fromColumns(List<ColumnMetadata> columns) { - TupleSchema tuple = new TupleSchema(); - for (ColumnMetadata column : columns) { - tuple.add((AbstractColumnMetadata) column); - } - return tuple; - } - - /** - * Create a column metadata object for a map column, given the - * {@link MaterializedField} that describes the column, and a list - * of column metadata objects that describe the columns in the map. - * - * @param field the materialized field that describes the map column - * @param schema metadata that describes the tuple of columns in - * the map - * @return a map column metadata for the map - */ - - public static MapColumnMetadata newMap(MaterializedField field, TupleSchema schema) { - return new MapColumnMetadata(field, schema); - } - - public static MapColumnMetadata newMap(MaterializedField field) { - return new MapColumnMetadata(field, fromFields(field.getChildren())); - } - - @Override - public ColumnMetadata add(MaterializedField field) { - AbstractColumnMetadata md = fromField(field); - add(md); - return md; - } - - public ColumnMetadata addView(MaterializedField field) { - AbstractColumnMetadata md = fromView(field); - add(md); - return md; - } - - /** - * Add a column metadata column created by the caller. Used for specialized - * cases beyond those handled by {@link #add(MaterializedField)}. - * - * @param md the custom column metadata which must have the correct - * index set (from {@link #size()} - */ - - public void add(AbstractColumnMetadata md) { - md.bind(this); - nameSpace.add(md.name(), md); - if (parentMap != null) { - parentMap.schema.addChild(md.schema()); - } - } - - @Override - public int addColumn(ColumnMetadata column) { - add((AbstractColumnMetadata) column); - return size() - 1; - } - - @Override - public MaterializedField column(String name) { - ColumnMetadata md = metadata(name); - return md == null ? null : md.schema(); - } - - @Override - public ColumnMetadata metadata(String name) { - return nameSpace.get(name); - } - - @Override - public int index(String name) { - return nameSpace.indexOf(name); - } - - @Override - public MaterializedField column(int index) { - return metadata(index).schema(); - } - - @Override - public ColumnMetadata metadata(int index) { - return nameSpace.get(index); - } - - @Override - public MapColumnMetadata parent() { return parentMap; } - - @Override - public int size() { return nameSpace.count(); } - - @Override - public boolean isEmpty() { return nameSpace.count( ) == 0; } - - @Override - public Iterator<ColumnMetadata> iterator() { - return nameSpace.iterator(); - } - - @Override - public boolean isEquivalent(TupleMetadata other) { - TupleSchema otherSchema = (TupleSchema) other; - if (nameSpace.count() != otherSchema.nameSpace.count()) { - return false; - } - for (int i = 0; i < nameSpace.count(); i++) { - if (! nameSpace.get(i).isEquivalent(otherSchema.nameSpace.get(i))) { - return false; - } - } - return true; - } - - @Override - public List<MaterializedField> toFieldList() { - List<MaterializedField> cols = new ArrayList<>(); - for (ColumnMetadata md : nameSpace) { - cols.add(md.schema()); - } - return cols; - } - - public BatchSchema toBatchSchema(SelectionVectorMode svMode) { - return new BatchSchema(svMode, toFieldList()); - } - - @Override - public String fullName(int index) { - return fullName(metadata(index)); - } - - @Override - public String fullName(ColumnMetadata column) { - String quotedName = column.name(); - if (quotedName.contains(".")) { - quotedName = "`" + quotedName + "`"; - } - if (isRoot()) { - return column.name(); - } else { - return fullName() + "." + quotedName; - } - } - - public String fullName() { - if (isRoot()) { - return "<root>"; - } else { - return parentMap.parentTuple().fullName(parentMap); - } - } - - public boolean isRoot() { return parentMap == null; } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder() - .append("[") - .append(getClass().getSimpleName()) - .append(" "); - boolean first = true; - for (ColumnMetadata md : nameSpace) { - if (! first) { - buf.append(", "); - } - buf.append(md.toString()); - } - buf.append("]"); - return buf.toString(); - } -} http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java new file mode 100644 index 0000000..2073c80 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; + +/** + * Abstract definition of column metadata. Allows applications to create + * specialized forms of a column metadata object by extending from this + * abstract class. + * <p> + * Note that, by design, primitive columns do not have a link to their + * tuple parent, or their index within that parent. This allows the same + * metadata to be shared between two views of a tuple, perhaps physical + * and projected views. This restriction does not apply to map columns, + * since maps (and the row itself) will, by definition, differ between + * the two views. + */ + +public abstract class AbstractColumnMetadata implements ColumnMetadata { + + // Capture the key schema information. We cannot use the MaterializedField + // or MajorType because then encode child information that we encode here + // as a child schema. Keeping the two in sync is nearly impossible. + + protected final String name; + protected final MinorType type; + protected final DataMode mode; + protected final int precision; + protected final int scale; + protected boolean projected = true; + + /** + * Predicted number of elements per array entry. Default is + * taken from the often hard-coded value of 10. + */ + + protected int expectedElementCount = 1; + + public AbstractColumnMetadata(MaterializedField schema) { + name = schema.getName(); + MajorType majorType = schema.getType(); + type = majorType.getMinorType(); + mode = majorType.getMode(); + precision = majorType.getPrecision(); + scale = majorType.getScale(); + if (isArray()) { + expectedElementCount = DEFAULT_ARRAY_SIZE; + } + } + + public AbstractColumnMetadata(String name, MinorType type, DataMode mode) { + this.name = name; + this.type = type; + this.mode = mode; + precision = 0; + scale = 0; + if (isArray()) { + expectedElementCount = DEFAULT_ARRAY_SIZE; + } + } + + public AbstractColumnMetadata(AbstractColumnMetadata from) { + name = from.name; + type = from.type; + mode = from.mode; + precision = from.precision; + scale = from.scale; + expectedElementCount = from.expectedElementCount; + } + + protected void bind(TupleSchema parentTuple) { } + + @Override + public String name() { return name; } + + @Override + public MinorType type() { return type; } + + @Override + public MajorType majorType() { + return MajorType.newBuilder() + .setMinorType(type()) + .setMode(mode()) + .build(); + } + + @Override + public DataMode mode() { return mode; } + + @Override + public boolean isNullable() { return mode() == DataMode.OPTIONAL; } + + @Override + public boolean isArray() { return mode() == DataMode.REPEATED; } + + @Override + public int dimensions() { return isArray() ? 1 : 0; } + + @Override + public boolean isMap() { return false; } + + @Override + public boolean isVariant() { return false; } + + @Override + public TupleMetadata mapSchema() { return null; } + + @Override + public VariantMetadata variantSchema() { return null; } + + @Override + public ColumnMetadata childSchema() { return null; } + + @Override + public boolean isVariableWidth() { + MinorType type = type(); + return type == MinorType.VARCHAR || type == MinorType.VAR16CHAR || type == MinorType.VARBINARY; + } + + @Override + public boolean isEquivalent(ColumnMetadata other) { + return schema().isEquivalent(other.schema()); + } + + @Override + public int expectedWidth() { return 0; } + + @Override + public void setExpectedWidth(int width) { } + + @Override + public int precision() { return 0; } + + @Override + public int scale() { return 0; } + + @Override + public void setExpectedElementCount(int childCount) { + // The allocation utilities don't like an array size of zero, so set to + // 1 as the minimum. Adjusted to avoid trivial errors if the caller + // makes an error. + + if (isArray()) { + expectedElementCount = Math.max(1, childCount); + } + } + + @Override + public int expectedElementCount() { return expectedElementCount; } + + @Override + public void setProjected(boolean projected) { + this.projected = projected; + } + + @Override + public boolean isProjected() { return projected; } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder() + .append("[") + .append(getClass().getSimpleName()) + .append(" ") + .append(schema().toString()) + .append(", ") + .append(projected ? "" : "not ") + .append("projected"); + if (isArray()) { + buf.append(", cardinality: ") + .append(expectedElementCount); + } + if (variantSchema() != null) { + buf.append(", variant: ") + .append(variantSchema().toString()); + } + if (mapSchema() != null) { + buf.append(", schema: ") + .append(mapSchema().toString()); + } + return buf + .append("]") + .toString(); + } + + public abstract AbstractColumnMetadata copy(); +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java new file mode 100644 index 0000000..795fd7f --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MapColumnMetadata.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; + +/** + * Describes a map and repeated map. Both are tuples that have a tuple + * schema as part of the column definition. + */ + +public class MapColumnMetadata extends AbstractColumnMetadata { + private TupleMetadata parentTuple; + private final TupleSchema mapSchema; + + /** + * Build a new map column from the field provided + * + * @param schema materialized field description of the map + */ + + public MapColumnMetadata(MaterializedField schema) { + this(schema, null); + } + + /** + * Build a map column metadata by cloning the type information (but not + * the children) of the materialized field provided. Use the hints + * provided. + * + * @param schema the schema to use + * @param hints metadata hints for this column + */ + + MapColumnMetadata(MaterializedField schema, TupleSchema mapSchema) { + super(schema); + if (mapSchema == null) { + this.mapSchema = new TupleSchema(); + } else { + this.mapSchema = mapSchema; + } + this.mapSchema.bind(this); + } + + public MapColumnMetadata(MapColumnMetadata from) { + super(from); + mapSchema = (TupleSchema) from.mapSchema.copy(); + } + + public MapColumnMetadata(String name, DataMode mode, + TupleSchema mapSchema) { + super(name, MinorType.MAP, mode); + if (mapSchema == null) { + this.mapSchema = new TupleSchema(); + } else { + this.mapSchema = mapSchema; + } + } + + @Override + public AbstractColumnMetadata copy() { + return new MapColumnMetadata(this); + } + + @Override + protected void bind(TupleSchema parentTuple) { + this.parentTuple = parentTuple; + } + + @Override + public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.TUPLE; } + + @Override + public TupleMetadata mapSchema() { return mapSchema; } + + @Override + public int expectedWidth() { return 0; } + + @Override + public boolean isMap() { return true; } + + public TupleMetadata parentTuple() { return parentTuple; } + + public TupleSchema mapSchemaImpl() { return mapSchema; } + + @Override + public ColumnMetadata cloneEmpty() { + return new MapColumnMetadata(name, mode, new TupleSchema()); + } + + @Override + public MaterializedField schema() { + MaterializedField field = emptySchema(); + for (MaterializedField member : mapSchema.toFieldList()) { + field.addChild(member); + } + return field; + } + + @Override + public MaterializedField emptySchema() { + return MaterializedField.create(name, + MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .build()); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java new file mode 100644 index 0000000..8c8dea7 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/MetadataUtils.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.MaterializedField; + +public class MetadataUtils { + + public static TupleSchema fromFields(Iterable<MaterializedField> fields) { + TupleSchema tuple = new TupleSchema(); + for (MaterializedField field : fields) { + tuple.add(field); + } + return tuple; + } + + /** + * Create a column metadata object that holds the given + * {@link MaterializedField}. The type of the object will be either a + * primitive or map column, depending on the field's type. The logic + * here mimics the code as written, which is very messy in some places. + * + * @param field the materialized field to wrap + * @return the column metadata that wraps the field + */ + + public static AbstractColumnMetadata fromField(MaterializedField field) { + MinorType type = field.getType().getMinorType(); + switch (type) { + case MAP: + return MetadataUtils.newMap(field); + case UNION: + if (field.getType().getMode() != DataMode.OPTIONAL) { + throw new UnsupportedOperationException(type.name() + " type must be nullable"); + } + return new VariantColumnMetadata(field); + case LIST: + switch (field.getType().getMode()) { + case OPTIONAL: + return new VariantColumnMetadata(field); + case REPEATED: + + // Not a list at all, but rather the second (or third...) + // dimension on a repeated type. + + return new RepeatedListColumnMetadata(field); + default: + + // List of unions (or a degenerate union of a single type.) + // Not supported in Drill. + + throw new UnsupportedOperationException( + String.format("Unsupported mode %s for type %s", + field.getType().getMode().name(), + type.name())); + } + default: + return new PrimitiveColumnMetadata(field); + } + } + + public static AbstractColumnMetadata fromView(MaterializedField field) { + if (field.getType().getMinorType() == MinorType.MAP) { + return new MapColumnMetadata(field, null); + } else { + return new PrimitiveColumnMetadata(field); + } + } + + /** + * Create a tuple given the list of columns that make up the tuple. + * Creates nested maps as needed. + * + * @param columns list of columns that make up the tuple + * @return a tuple metadata object that contains the columns + */ + + public static TupleSchema fromColumns(List<ColumnMetadata> columns) { + TupleSchema tuple = new TupleSchema(); + for (ColumnMetadata column : columns) { + tuple.add((AbstractColumnMetadata) column); + } + return tuple; + } + + public static TupleMetadata fromBatchSchema(BatchSchema batchSchema) { + TupleSchema tuple = new TupleSchema(); + for (MaterializedField field : batchSchema) { + tuple.add(fromView(field)); + } + return tuple; + } + + /** + * Create a column metadata object for a map column, given the + * {@link MaterializedField} that describes the column, and a list + * of column metadata objects that describe the columns in the map. + * + * @param field the materialized field that describes the map column + * @param schema metadata that describes the tuple of columns in + * the map + * @return a map column metadata for the map + */ + + public static MapColumnMetadata newMap(MaterializedField field, TupleSchema schema) { + return new MapColumnMetadata(field, schema); + } + + public static MapColumnMetadata newMap(MaterializedField field) { + return new MapColumnMetadata(field, fromFields(field.getChildren())); + } + + public static MapColumnMetadata newMap(String name, TupleMetadata schema) { + return new MapColumnMetadata(name, DataMode.REQUIRED, (TupleSchema) schema); + } + + public static VariantColumnMetadata newVariant(MaterializedField field, VariantSchema schema) { + return new VariantColumnMetadata(field, schema); + } + + public static VariantColumnMetadata newVariant(String name, DataMode cardinality) { + switch (cardinality) { + case OPTIONAL: + return new VariantColumnMetadata(name, MinorType.UNION, new VariantSchema()); + case REPEATED: + return new VariantColumnMetadata(name, MinorType.LIST, new VariantSchema()); + default: + throw new IllegalArgumentException(); + } + } + + public static RepeatedListColumnMetadata newRepeatedList(String name, AbstractColumnMetadata child) { + return new RepeatedListColumnMetadata(name, child); + } + + public static AbstractColumnMetadata newMapArray(String name, TupleMetadata schema) { + return new MapColumnMetadata(name, DataMode.REPEATED, (TupleSchema) schema); + } + + public static PrimitiveColumnMetadata newScalar(String name, MinorType type, + DataMode mode) { + assert type != MinorType.MAP && type != MinorType.UNION && type != MinorType.LIST; + return new PrimitiveColumnMetadata(name, type, mode); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java new file mode 100644 index 0000000..f0c25eb --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.common.types.Types; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.record.MaterializedField; + +/** + * Primitive (non-map) column. Describes non-nullable, nullable and + * array types (which differ only in mode, but not in metadata structure.) + */ + +public class PrimitiveColumnMetadata extends AbstractColumnMetadata { + + protected int expectedWidth; + + public PrimitiveColumnMetadata(MaterializedField schema) { + super(schema); + expectedWidth = estimateWidth(schema.getType()); + } + + public PrimitiveColumnMetadata(String name, MinorType type, DataMode mode) { + super(name, type, mode); + expectedWidth = estimateWidth(Types.withMode(type, mode)); + } + + private int estimateWidth(MajorType majorType) { + if (type() == MinorType.NULL || type() == MinorType.LATE) { + return 0; + } else if (isVariableWidth()) { + + // The above getSize() method uses the deprecated getWidth() + // method to get the expected VarChar size. If zero (which + // it will be), try the revised precision field. + + int precision = majorType.getPrecision(); + if (precision > 0) { + return precision; + } else { + // TypeHelper includes the offset vector width + + return TypeHelper.getSize(majorType) - 4; + } + } else { + return TypeHelper.getSize(majorType); + } + } + + public PrimitiveColumnMetadata(PrimitiveColumnMetadata from) { + super(from); + expectedWidth = from.expectedWidth; + } + + @Override + public AbstractColumnMetadata copy() { + return new PrimitiveColumnMetadata(this); + } + + @Override + public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.PRIMITIVE; } + + @Override + public int expectedWidth() { return expectedWidth; } + + @Override + public int precision() { return precision; } + + @Override + public int scale() { return scale; } + + @Override + public void setExpectedWidth(int width) { + // The allocation utilities don't like a width of zero, so set to + // 1 as the minimum. Adjusted to avoid trivial errors if the caller + // makes an error. + + if (isVariableWidth()) { + expectedWidth = Math.max(1, width); + } + } + + @Override + public ColumnMetadata cloneEmpty() { + return new PrimitiveColumnMetadata(this); + } + + public ColumnMetadata mergeWith(MaterializedField field) { + PrimitiveColumnMetadata merged = new PrimitiveColumnMetadata(field); + merged.setExpectedElementCount(expectedElementCount); + merged.setExpectedWidth(Math.max(expectedWidth, field.getPrecision())); + merged.setProjected(projected); + return merged; + } + + @Override + public MajorType majorType() { + return MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .setPrecision(precision) + .setScale(scale) + .build(); + } + + @Override + public MaterializedField schema() { + return MaterializedField.create(name, majorType()); + } + + @Override + public MaterializedField emptySchema() { return schema(); } +} http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/RepeatedListColumnMetadata.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/RepeatedListColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/RepeatedListColumnMetadata.java new file mode 100644 index 0000000..a126965 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/RepeatedListColumnMetadata.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; + +import com.google.common.base.Preconditions; + +public class RepeatedListColumnMetadata extends AbstractColumnMetadata { + + /** + * Indicates we don't know the number of dimensions. + */ + + public static final int UNKNOWN_DIMENSIONS = -1; + + private AbstractColumnMetadata childSchema; + + public RepeatedListColumnMetadata(MaterializedField field) { + super(field); + Preconditions.checkArgument(field.getType().getMinorType() == MinorType.LIST); + Preconditions.checkArgument(field.getType().getMode() == DataMode.REPEATED); + Preconditions.checkArgument(field.getChildren().size() <= 1); + if (! field.getChildren().isEmpty()) { + childSchema = MetadataUtils.fromField(field.getChildren().iterator().next()); + Preconditions.checkArgument(childSchema.isArray()); + } + } + + public RepeatedListColumnMetadata(String name, AbstractColumnMetadata childSchema) { + super(name, MinorType.LIST, DataMode.REPEATED); + if (childSchema != null) { + Preconditions.checkArgument(childSchema.isArray()); + } + this.childSchema = childSchema; + } + + public void childSchema(ColumnMetadata childMetadata) { + Preconditions.checkState(childSchema == null); + Preconditions.checkArgument(childMetadata.mode() == DataMode.REPEATED); + childSchema = (AbstractColumnMetadata) childMetadata; + } + + @Override + public StructureType structureType() { return StructureType.MULTI_ARRAY; } + + @Override + public MaterializedField schema() { + MaterializedField field = emptySchema(); + if (childSchema != null) { + field.addChild(childSchema.schema()); + } + return field; + } + + @Override + public MaterializedField emptySchema() { + return MaterializedField.create(name(), majorType()); + } + + @Override + public ColumnMetadata cloneEmpty() { + return new RepeatedListColumnMetadata(name, null); + } + + @Override + public AbstractColumnMetadata copy() { + return new RepeatedListColumnMetadata(name, childSchema); + } + + @Override + public ColumnMetadata childSchema() { return childSchema; } + + @Override + public int dimensions() { + + // If there is no child, then we don't know the + // dimensionality. + + return childSchema == null ? UNKNOWN_DIMENSIONS + : childSchema.dimensions() + 1; + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java new file mode 100644 index 0000000..0c69dbc --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/TupleSchema.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; + +/** + * Defines the schema of a tuple: either the top-level row or a nested + * "map" (really structure). A schema is a collection of columns (backed + * by vectors in the loader itself.) Columns are accessible by name or + * index. New columns may be added at any time; the new column takes the + * next available index. + */ + +public class TupleSchema implements TupleMetadata { + + private MapColumnMetadata parentMap; + private final TupleNameSpace<ColumnMetadata> nameSpace = new TupleNameSpace<>(); + + public void bind(MapColumnMetadata parentMap) { + this.parentMap = parentMap; + } + + public TupleMetadata copy() { + TupleMetadata tuple = new TupleSchema(); + for (ColumnMetadata md : this) { + tuple.addColumn(((AbstractColumnMetadata) md).copy()); + } + return tuple; + } + + @Override + public ColumnMetadata add(MaterializedField field) { + AbstractColumnMetadata md = MetadataUtils.fromField(field); + add(md); + return md; + } + + public ColumnMetadata addView(MaterializedField field) { + AbstractColumnMetadata md = MetadataUtils.fromView(field); + add(md); + return md; + } + + /** + * Add a column metadata column created by the caller. Used for specialized + * cases beyond those handled by {@link #add(MaterializedField)}. + * + * @param md the custom column metadata which must have the correct + * index set (from {@link #size()} + */ + + public void add(AbstractColumnMetadata md) { + md.bind(this); + nameSpace.add(md.name(), md); + } + + @Override + public int addColumn(ColumnMetadata column) { + add((AbstractColumnMetadata) column); + return size() - 1; + } + + @Override + public MaterializedField column(String name) { + ColumnMetadata md = metadata(name); + return md == null ? null : md.schema(); + } + + @Override + public ColumnMetadata metadata(String name) { + return nameSpace.get(name); + } + + @Override + public int index(String name) { + return nameSpace.indexOf(name); + } + + @Override + public MaterializedField column(int index) { + return metadata(index).schema(); + } + + @Override + public ColumnMetadata metadata(int index) { + return nameSpace.get(index); + } + + @Override + public MapColumnMetadata parent() { return parentMap; } + + @Override + public int size() { return nameSpace.count(); } + + @Override + public boolean isEmpty() { return nameSpace.count( ) == 0; } + + @Override + public Iterator<ColumnMetadata> iterator() { + return nameSpace.iterator(); + } + + @Override + public boolean isEquivalent(TupleMetadata other) { + TupleSchema otherSchema = (TupleSchema) other; + if (nameSpace.count() != otherSchema.nameSpace.count()) { + return false; + } + for (int i = 0; i < nameSpace.count(); i++) { + if (! nameSpace.get(i).isEquivalent(otherSchema.nameSpace.get(i))) { + return false; + } + } + return true; + } + + @Override + public List<MaterializedField> toFieldList() { + List<MaterializedField> cols = new ArrayList<>(); + for (ColumnMetadata md : nameSpace) { + cols.add(md.schema()); + } + return cols; + } + + public BatchSchema toBatchSchema(SelectionVectorMode svMode) { + return new BatchSchema(svMode, toFieldList()); + } + + @Override + public String fullName(int index) { + return fullName(metadata(index)); + } + + @Override + public String fullName(ColumnMetadata column) { + String quotedName = column.name(); + if (quotedName.contains(".")) { + quotedName = "`" + quotedName + "`"; + } + if (isRoot()) { + return column.name(); + } else { + return fullName() + "." + quotedName; + } + } + + public String fullName() { + if (isRoot()) { + return "<root>"; + } else { + return parentMap.parentTuple().fullName(parentMap); + } + } + + public boolean isRoot() { return parentMap == null; } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder() + .append("[") + .append(getClass().getSimpleName()) + .append(" "); + boolean first = true; + for (ColumnMetadata md : nameSpace) { + if (! first) { + buf.append(", "); + } + buf.append(md.toString()); + } + buf.append("]"); + return buf.toString(); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantColumnMetadata.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantColumnMetadata.java new file mode 100644 index 0000000..b417107 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantColumnMetadata.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; + +public class VariantColumnMetadata extends AbstractColumnMetadata { + + private final VariantSchema variantSchema; + + public VariantColumnMetadata(MaterializedField schema) { + super(schema); + variantSchema = new VariantSchema(); + variantSchema.bind(this); + List<MinorType> types = null; + if (type() == MinorType.UNION) { + types = schema.getType().getSubTypeList(); + } else { + assert type() == MinorType.LIST; + MaterializedField child; + MinorType childType; + if (schema.getChildren().isEmpty()) { + child = null; + childType = MinorType.LATE; + } else { + child = schema.getChildren().iterator().next(); + childType = child.getType().getMinorType(); + } + switch (childType) { + case UNION: + + // List contains a union. + + types = child.getType().getSubTypeList(); + break; + + case LATE: + + // List has no type. + + return; + + default: + + // List contains a single non-null type. + + variantSchema.addType(MetadataUtils.fromField(child)); + return; + } + } + if (types == null) { + return; + } + for (MinorType type : types) { + variantSchema.addType(type); + } + } + + public VariantColumnMetadata(MaterializedField schema, VariantSchema variantSchema) { + super(schema); + this.variantSchema = variantSchema; + } + + public VariantColumnMetadata(String name, MinorType type, VariantSchema variantSchema) { + super(name, type, DataMode.OPTIONAL); + this.variantSchema = variantSchema == null ? new VariantSchema() : variantSchema; + this.variantSchema.bind(this); + } + + @Override + public StructureType structureType() { + return StructureType.VARIANT; + } + + @Override + public boolean isVariant() { return true; } + + @Override + public boolean isArray() { return type() == MinorType.LIST; } + + @Override + public ColumnMetadata cloneEmpty() { + return new VariantColumnMetadata(name, type, variantSchema.cloneEmpty()); + } + + @Override + public AbstractColumnMetadata copy() { + // TODO Auto-generated method stub + assert false; + return null; + } + + @Override + public VariantMetadata variantSchema() { + return variantSchema; + } + + @Override + public MaterializedField schema() { + return MaterializedField.create(name, + MajorType.newBuilder() + .setMinorType(type) + .setMode(DataMode.OPTIONAL) + .addAllSubType(variantSchema.types()) + .build()); + } + + @Override + public MaterializedField emptySchema() { + return MaterializedField.create(name, + MajorType.newBuilder() + .setMinorType(type) + .setMode(DataMode.OPTIONAL) + .build()); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantSchema.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantSchema.java new file mode 100644 index 0000000..cfa8cee --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/VariantSchema.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record.metadata; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.drill.common.types.Types; +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; + +import com.google.common.base.Preconditions; + +public class VariantSchema implements VariantMetadata { + + private final Map<MinorType, ColumnMetadata> types = new HashMap<>(); + private VariantColumnMetadata parent; + private boolean isSimple; + + protected void bind(VariantColumnMetadata parent) { + this.parent = parent; + } + + public static AbstractColumnMetadata memberMetadata(MinorType type) { + String name = Types.typeKey(type); + switch (type) { + case LIST: + return new VariantColumnMetadata(name, type, null); + case MAP: + // Although maps do not have a bits vector, when used in a + // union the map must be marked as optional since the union as a + // whole can be null, implying that the map is null by implication. + // (In fact, the readers have a special mechanism to work out the + // null state in this case. + + return new MapColumnMetadata(name, DataMode.OPTIONAL, null); + case UNION: + throw new IllegalArgumentException("Cannot add a union to a union"); + default: + return new PrimitiveColumnMetadata( + MaterializedField.create( + name, + Types.optional(type))); + } + } + + @Override + public ColumnMetadata addType(MinorType type) { + checkType(type); + AbstractColumnMetadata dummyCol = memberMetadata(type); + types.put(type, dummyCol); + return dummyCol; + } + + @Override + public void addType(ColumnMetadata col) { + checkType(col.type()); + Preconditions.checkArgument(col.name().equals(Types.typeKey(col.type()))); + switch (col.type()) { + case UNION: + throw new IllegalArgumentException("Cannot add a union to a union"); + case LIST: + if (col.mode() == DataMode.REQUIRED) { + throw new IllegalArgumentException("List type column must be OPTIONAL or REPEATED"); + } + break; + default: + if (col.mode() != DataMode.OPTIONAL) { + throw new IllegalArgumentException("Type column must be OPTIONAL"); + } + break; + } + types.put(col.type(), col); + } + + private void checkType(MinorType type) { + if (types.containsKey(type)) { + throw new IllegalArgumentException("Variant already contains type: " + type); + } + } + + @Override + public int size() { return types.size(); } + + @Override + public boolean hasType(MinorType type) { + return types.containsKey(type); + } + + @Override + public ColumnMetadata member(MinorType type) { + return types.get(type); + } + + @Override + public ColumnMetadata parent() { return parent; } + + @Override + public Collection<MinorType> types() { + return types.keySet(); + } + + @Override + public Collection<ColumnMetadata> members() { + return types.values(); + } + + public void addMap(MapColumnMetadata mapCol) { + Preconditions.checkArgument(! mapCol.isArray()); + Preconditions.checkState(! isSimple); + checkType(MinorType.MAP); + types.put(MinorType.MAP, mapCol); + } + + public void addList(VariantColumnMetadata listCol) { + Preconditions.checkArgument(listCol.isArray()); + Preconditions.checkState(! isSimple); + checkType(MinorType.LIST); + types.put(MinorType.LIST, listCol); + } + + public ColumnMetadata addType(MaterializedField field) { + Preconditions.checkState(! isSimple); + MinorType type = field.getType().getMinorType(); + checkType(type); + AbstractColumnMetadata col; + switch (type) { + case LIST: + col = new VariantColumnMetadata(field); + break; + case MAP: + col = new MapColumnMetadata(field); + break; + case UNION: + throw new IllegalArgumentException("Cannot add a union to a union"); + default: + col = new PrimitiveColumnMetadata(field); + break; + } + types.put(type, col); + return col; + } + + @Override + public boolean isSingleType() { + return types.size() == 1; + } + + @Override + public ColumnMetadata listSubtype() { + if (isSingleType()) { + return types.values().iterator().next(); + } + + // At the metadata level, a list always holds a union. But, at the + // implementation layer, a union of a single type is collapsed out + // to leave just a list of that single type. + // + // Make up a synthetic union column to be used when building + // a reader. + + return new VariantColumnMetadata("$data", MinorType.UNION, this); + } + + @Override + public void becomeSimple() { + Preconditions.checkState(types.size() == 1); + isSimple = true; + } + + @Override + public boolean isSimple() { + return isSimple; + } + + @Override + public String toString() { + return new StringBuilder() + .append("[") + .append(getClass().getSimpleName()) + .append(types.toString()) + .append(", simple: ") + .append(isSimple) + .append("]") + .toString(); + } + + public VariantSchema cloneEmpty() { + VariantSchema copy = new VariantSchema(); + copy.isSimple = isSimple; + return copy; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/cf2478f7/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java index a79ecf5..9ef1976 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java @@ -17,14 +17,17 @@ */ package org.apache.drill.exec.physical.impl.xsort; +import java.io.File; +import java.nio.file.Paths; + +import org.apache.drill.categories.OperatorTest; +import org.apache.drill.categories.SlowTest; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.common.types.Types; +import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.test.BaseTestQuery; import org.apache.drill.test.TestBuilder; -import org.apache.drill.categories.OperatorTest; -import org.apache.drill.categories.SlowTest; -import org.apache.drill.exec.ExecConstants; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSetBuilder; import org.apache.drill.test.rowSet.SchemaBuilder; @@ -33,9 +36,6 @@ import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.io.File; -import java.nio.file.Paths; - @Category({SlowTest.class, OperatorTest.class}) public class TestExternalSort extends BaseTestQuery { @@ -206,8 +206,8 @@ public class TestExternalSort extends BaseTestQuery { { final BatchSchema schema = new SchemaBuilder() - .add("a", Types.required(TypeProtos.MinorType.INT)) - .add("b", Types.required(TypeProtos.MinorType.INT)) + .add("a", TypeProtos.MinorType.INT) + .add("b", TypeProtos.MinorType.INT) .build(); final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema); @@ -223,8 +223,8 @@ public class TestExternalSort extends BaseTestQuery { { final BatchSchema schema = new SchemaBuilder() - .add("a", Types.required(TypeProtos.MinorType.INT)) - .add("c", Types.required(TypeProtos.MinorType.INT)) + .add("a", TypeProtos.MinorType.INT) + .add("c", TypeProtos.MinorType.INT) .build(); final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
