[jira] [Commented] (DRILL-5323) Provide test tools to create, populate and compare row sets

ASF GitHub Bot (JIRA) Wed, 05 Apr 2017 17:18:12 -0700

    [ 
https://issues.apache.org/jira/browse/DRILL-5323?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15958084#comment-15958084
 ]


ASF GitHub Bot commented on DRILL-5323:
---------------------------------------

Github user paul-rogers commented on a diff in the pull request:

    https://github.com/apache/drill/pull/785#discussion_r109563707
  
    --- Diff: 
exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java
 ---
    @@ -0,0 +1,158 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.test.rowSet;
    +
    +import org.apache.drill.common.types.TypeProtos.MajorType;
    +import org.apache.drill.common.types.TypeProtos.MinorType;
    +import org.apache.drill.exec.expr.TypeHelper;
    +import org.apache.drill.exec.memory.BufferAllocator;
    +import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer;
    +import org.apache.drill.exec.record.BatchSchema;
    +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
    +import org.apache.drill.exec.record.VectorContainer;
    +import org.apache.drill.exec.record.VectorWrapper;
    +import org.apache.drill.exec.vector.ValueVector;
    +import org.apache.drill.exec.vector.complex.MapVector;
    +import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
    +import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn;
    +import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
    +
    +public abstract class AbstractSingleRowSet extends AbstractRowSet 
implements SingleRowSet {
    +
    +  public abstract static class StructureBuilder {
    +    protected final PhysicalSchema schema;
    +    protected final BufferAllocator allocator;
    +    protected final ValueVector[] valueVectors;
    +    protected final MapVector[] mapVectors;
    +    protected int vectorIndex;
    +    protected int mapIndex;
    +
    +    public StructureBuilder(BufferAllocator allocator, RowSetSchema 
schema) {
    +      this.allocator = allocator;
    +      this.schema = schema.physical();
    +      valueVectors = new ValueVector[schema.access().count()];
    +      if (schema.access().mapCount() == 0) {
    +        mapVectors = null;
    +      } else {
    +        mapVectors = new MapVector[schema.access().mapCount()];
    +      }
    +    }
    +  }
    +
    +  public static class VectorBuilder extends StructureBuilder {
    +
    +    public VectorBuilder(BufferAllocator allocator, RowSetSchema schema) {
    +      super(allocator, schema);
    +    }
    +
    +    public ValueVector[] buildContainer(VectorContainer container) {
    +      for (int i = 0; i < schema.count(); i++) {
    +        LogicalColumn colSchema = schema.column(i);
    +        @SuppressWarnings("resource")
    +        ValueVector v = TypeHelper.getNewVector(colSchema.field, 
allocator, null);
    +        container.add(v);
    +        if (colSchema.field.getType().getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv, colSchema.mapSchema);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +      container.buildSchema(SelectionVectorMode.NONE);
    +      return valueVectors;
    +    }
    +
    +    private void buildMap(MapVector mapVector, PhysicalSchema mapSchema) {
    +      for (int i = 0; i < mapSchema.count(); i++) {
    +        LogicalColumn colSchema = mapSchema.column(i);
    +        MajorType type = colSchema.field.getType();
    +        Class<? extends ValueVector> vectorClass = 
TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode());
    +        @SuppressWarnings("resource")
    +        ValueVector v = mapVector.addOrGet(colSchema.field.getName(), 
type, vectorClass);
    +        if (type.getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv, colSchema.mapSchema);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +    }
    +  }
    +
    +  public static class VectorMapper extends StructureBuilder {
    +
    +    public VectorMapper(BufferAllocator allocator, RowSetSchema schema) {
    +      super(allocator, schema);
    +    }
    +
    +    public ValueVector[] mapContainer(VectorContainer container) {
    +      for (VectorWrapper<?> w : container) {
    +        @SuppressWarnings("resource")
    +        ValueVector v = w.getValueVector();
    +        if (v.getField().getType().getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +      return valueVectors;
    +    }
    +
    +    private void buildMap(MapVector mapVector) {
    +      for (ValueVector v : mapVector) {
    +        if (v.getField().getType().getMinorType() == MinorType.MAP) {
    +          MapVector mv = (MapVector) v;
    +          mapVectors[mapIndex++] = mv;
    +          buildMap(mv);
    +        } else {
    +          valueVectors[vectorIndex++] = v;
    +        }
    +      }
    +    }
    +  }
    +
    +  protected final ValueVector[] valueVectors;
    +
    +  public AbstractSingleRowSet(BufferAllocator allocator, BatchSchema 
schema) {
    +    super(allocator, schema, new VectorContainer());
    +    valueVectors = new VectorBuilder(allocator, 
super.schema).buildContainer(container);
    +  }
    +
    +  public AbstractSingleRowSet(BufferAllocator allocator, VectorContainer 
container) {
    +    super(allocator, container.getSchema(), container);
    +    valueVectors = new VectorMapper(allocator, 
super.schema).mapContainer(container);
    +  }
    +
    +  public AbstractSingleRowSet(AbstractSingleRowSet rowSet) {
    +    super(rowSet.allocator, rowSet.schema.batch(), rowSet.container);
    +    valueVectors = rowSet.valueVectors;
    +  }
    +
    +  @Override
    +  public ValueVector[] vectors() { return valueVectors; }
    +
    +  @Override
    +  public int getSize() {
    +    RecordBatchSizer sizer = new RecordBatchSizer(container);
    +    return sizer.actualSize();
    --- End diff --
    
    Good eyes! This bit is just a bit tricky. We create the batch (container). 
Then, we need to use a `RowSetWriterImpl` to populate the batch. The `done()` 
method of the writer sets the row count.
    
    Or, if we create a row set from an existing container, the container will 
already carry a row count.
    
    Still, it is a good idea to add a unit test to verify this case, which I 
will do.


> Provide test tools to create, populate and compare row sets
> -----------------------------------------------------------
>
>                 Key: DRILL-5323
>                 URL: https://issues.apache.org/jira/browse/DRILL-5323
>             Project: Apache Drill
>          Issue Type: Sub-task
>          Components: Tools, Build & Test
>    Affects Versions: 1.11.0
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>             Fix For: 1.11.0
>
>
> Operators work with individual row sets. A row set is a collection of records 
> stored as column vectors. (Drill uses various terms for this concept. A 
> record batch is a row set with an operator implementation wrapped around it. 
> A vector container is a row set, but with much functionality left as an 
> exercise for the developer. And so on.)
> To simplify tests, we need a {{TestRowSet}} concept that wraps a 
> {{VectorContainer}} and provides easy ways to:
> * Define a schema for the row set.
> * Create a set of vectors that implement the schema.
> * Populate the row set with test data via code.
> * Add an SV2 to the row set.
> * Pass the row set to operator components (such as generated code blocks.)
> * Compare the results of the operation with an expected result set.
> * Dispose of the underling direct memory when work is done.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

[jira] [Commented] (DRILL-5323) Provide test tools to create, populate and compare row sets

Reply via email to