[ 
https://issues.apache.org/jira/browse/DRILL-5657?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16119217#comment-16119217
 ] 

ASF GitHub Bot commented on DRILL-5657:
---------------------------------------

Github user bitblender commented on a diff in the pull request:

    https://github.com/apache/drill/pull/866#discussion_r131554894
  
    --- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java
 ---
    @@ -0,0 +1,412 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.physical.rowSet.impl;
    +
    +import java.util.Collection;
    +
    +import org.apache.drill.common.exceptions.UserException;
    +import org.apache.drill.exec.memory.BufferAllocator;
    +import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
    +import org.apache.drill.exec.physical.rowSet.TupleLoader;
    +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
    +import org.apache.drill.exec.record.VectorContainer;
    +import org.apache.drill.exec.vector.ValueVector;
    +
    +/**
    + * Implementation of the result set loader.
    + * @see {@link ResultSetLoader}
    + */
    +
    +public class ResultSetLoaderImpl implements ResultSetLoader, 
WriterIndexImpl.WriterIndexListener {
    +
    +  public static class ResultSetOptions {
    +    public final int vectorSizeLimit;
    +    public final int rowCountLimit;
    +    public final boolean caseSensitive;
    +    public final ResultVectorCache inventory;
    +    private final Collection<String> selection;
    +
    +    public ResultSetOptions() {
    +      vectorSizeLimit = ValueVector.MAX_BUFFER_SIZE;
    +      rowCountLimit = ValueVector.MAX_ROW_COUNT;
    +      caseSensitive = false;
    +      selection = null;
    +      inventory = null;
    +    }
    +
    +    public ResultSetOptions(OptionBuilder builder) {
    +      this.vectorSizeLimit = builder.vectorSizeLimit;
    +      this.rowCountLimit = builder.rowCountLimit;
    +      this.caseSensitive = builder.caseSensitive;
    +      this.selection = builder.selection;
    +      this.inventory = builder.inventory;
    +    }
    +  }
    +
    +  public static class OptionBuilder {
    +    private int vectorSizeLimit;
    +    private int rowCountLimit;
    +    private boolean caseSensitive;
    +    private Collection<String> selection;
    +    private ResultVectorCache inventory;
    +
    +    public OptionBuilder() {
    +      ResultSetOptions options = new ResultSetOptions();
    +      vectorSizeLimit = options.vectorSizeLimit;
    +      rowCountLimit = options.rowCountLimit;
    +      caseSensitive = options.caseSensitive;
    +    }
    +
    +    public OptionBuilder setCaseSensitive(boolean flag) {
    +      caseSensitive = flag;
    +      return this;
    +    }
    +
    +    public OptionBuilder setRowCountLimit(int limit) {
    +      rowCountLimit = Math.min(limit, ValueVector.MAX_ROW_COUNT);
    +      return this;
    +    }
    +
    +    public OptionBuilder setSelection(Collection<String> selection) {
    +      this.selection = selection;
    +      return this;
    +    }
    +
    +    public OptionBuilder setVectorCache(ResultVectorCache inventory) {
    +      this.inventory = inventory;
    +      return this;
    +    }
    +
    +    // TODO: No setter for vector length yet: is hard-coded
    +    // at present in the value vector.
    +
    +    public ResultSetOptions build() {
    +      return new ResultSetOptions(this);
    +    }
    +  }
    +
    +  public static class VectorContainerBuilder {
    +    private final ResultSetLoaderImpl rowSetMutator;
    +    private int lastUpdateVersion = -1;
    +    private VectorContainer container;
    +
    +    public VectorContainerBuilder(ResultSetLoaderImpl rowSetMutator) {
    +      this.rowSetMutator = rowSetMutator;
    +      container = new VectorContainer(rowSetMutator.allocator);
    +    }
    +
    +    public void update() {
    +      if (lastUpdateVersion < rowSetMutator.schemaVersion()) {
    +        rowSetMutator.rootTuple.buildContainer(this);
    +        container.buildSchema(SelectionVectorMode.NONE);
    +        lastUpdateVersion = rowSetMutator.schemaVersion();
    +      }
    +    }
    +
    +    public VectorContainer container() { return container; }
    +
    +    public int lastUpdateVersion() { return lastUpdateVersion; }
    +
    +    public void add(ValueVector vector) {
    +      container.add(vector);
    +    }
    +  }
    +
    +  private enum State {
    +    /**
    +     * Before the first batch.
    +     */
    +    START,
    +    /**
    +     * Writing to a batch normally.
    +     */
    +    ACTIVE,
    +    /**
    +     * Batch overflowed a vector while writing. Can continue
    +     * to write to a temporary "overflow" batch until the
    +     * end of the current row.
    +     */
    +    OVERFLOW,
    +    /**
    +     * Batch is full due to reaching the row count limit
    +     * when saving a row.
    +     * No more writes allowed until harvesting the current batch.
    +     */
    +    FULL_BATCH,
    +
    +    /**
    +     * Current batch was harvested: data is gone. A lookahead
    +     * row may exist for the next batch.
    +     */
    +    HARVESTED,
    +    /**
    +     * Mutator is closed: no more operations are allowed.
    +     */
    +    CLOSED
    +  }
    +
    +  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(ResultSetLoaderImpl.class);
    +
    +  private final ResultSetOptions options;
    +  private final BufferAllocator allocator;
    +  private final TupleSetImpl rootTuple;
    +  private final TupleLoader rootWriter;
    +  private final WriterIndexImpl writerIndex;
    +  private final ResultVectorCache inventory;
    +  private ResultSetLoaderImpl.State state = State.START;
    +  private int activeSchemaVersion = 0;
    +  private int harvestSchemaVersion = 0;
    --- End diff --
    
    It is not obvious how the schema version is used. Comments would be helpful.


> Implement size-aware result set loader
> --------------------------------------
>
>                 Key: DRILL-5657
>                 URL: https://issues.apache.org/jira/browse/DRILL-5657
>             Project: Apache Drill
>          Issue Type: Improvement
>    Affects Versions: Future
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>             Fix For: Future
>
>
> A recent extension to Drill's set of test tools created a "row set" 
> abstraction to allow us to create, and verify, record batches with very few 
> lines of code. Part of this work involved creating a set of "column 
> accessors" in the vector subsystem. Column readers provide a uniform API to 
> obtain data from columns (vectors), while column writers provide a uniform 
> writing interface.
> DRILL-5211 discusses a set of changes to limit value vectors to 16 MB in size 
> (to avoid memory fragmentation due to Drill's two memory allocators.) The 
> column accessors have proven to be so useful that they will be the basis for 
> the new, size-aware writers used by Drill's record readers.
> A step in that direction is to retrofit the column writers to use the 
> size-aware {{setScalar()}} and {{setArray()}} methods introduced in 
> DRILL-5517.
> Since the test framework row set classes are (at present) the only consumer 
> of the accessors, those classes must also be updated with the changes.
> This then allows us to add a new "row mutator" class that handles size-aware 
> vector writing, including the case in which a vector fills in the middle of a 
> row.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to