[
https://issues.apache.org/jira/browse/DRILL-5657?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16129851#comment-16129851
]
ASF GitHub Bot commented on DRILL-5657:
---------------------------------------
Github user paul-rogers commented on a diff in the pull request:
https://github.com/apache/drill/pull/866#discussion_r133618125
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java
---
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.Collection;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.TupleLoader;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Implementation of the result set loader.
+ * @see {@link ResultSetLoader}
+ */
+
+public class ResultSetLoaderImpl implements ResultSetLoader,
WriterIndexImpl.WriterIndexListener {
+
+ public static class ResultSetOptions {
+ public final int vectorSizeLimit;
+ public final int rowCountLimit;
+ public final boolean caseSensitive;
+ public final ResultVectorCache inventory;
+ private final Collection<String> selection;
+
+ public ResultSetOptions() {
+ vectorSizeLimit = ValueVector.MAX_BUFFER_SIZE;
+ rowCountLimit = ValueVector.MAX_ROW_COUNT;
+ caseSensitive = false;
+ selection = null;
+ inventory = null;
+ }
+
+ public ResultSetOptions(OptionBuilder builder) {
+ this.vectorSizeLimit = builder.vectorSizeLimit;
+ this.rowCountLimit = builder.rowCountLimit;
+ this.caseSensitive = builder.caseSensitive;
+ this.selection = builder.selection;
+ this.inventory = builder.inventory;
+ }
+ }
+
+ public static class OptionBuilder {
+ private int vectorSizeLimit;
+ private int rowCountLimit;
+ private boolean caseSensitive;
+ private Collection<String> selection;
+ private ResultVectorCache inventory;
+
+ public OptionBuilder() {
+ ResultSetOptions options = new ResultSetOptions();
+ vectorSizeLimit = options.vectorSizeLimit;
+ rowCountLimit = options.rowCountLimit;
+ caseSensitive = options.caseSensitive;
+ }
+
+ public OptionBuilder setCaseSensitive(boolean flag) {
+ caseSensitive = flag;
+ return this;
+ }
+
+ public OptionBuilder setRowCountLimit(int limit) {
+ rowCountLimit = Math.min(limit, ValueVector.MAX_ROW_COUNT);
+ return this;
+ }
+
+ public OptionBuilder setSelection(Collection<String> selection) {
+ this.selection = selection;
+ return this;
+ }
+
+ public OptionBuilder setVectorCache(ResultVectorCache inventory) {
+ this.inventory = inventory;
+ return this;
+ }
+
+ // TODO: No setter for vector length yet: is hard-coded
+ // at present in the value vector.
+
+ public ResultSetOptions build() {
+ return new ResultSetOptions(this);
+ }
+ }
+
+ public static class VectorContainerBuilder {
+ private final ResultSetLoaderImpl rowSetMutator;
+ private int lastUpdateVersion = -1;
+ private VectorContainer container;
+
+ public VectorContainerBuilder(ResultSetLoaderImpl rowSetMutator) {
+ this.rowSetMutator = rowSetMutator;
+ container = new VectorContainer(rowSetMutator.allocator);
+ }
+
+ public void update() {
+ if (lastUpdateVersion < rowSetMutator.schemaVersion()) {
+ rowSetMutator.rootTuple.buildContainer(this);
+ container.buildSchema(SelectionVectorMode.NONE);
+ lastUpdateVersion = rowSetMutator.schemaVersion();
+ }
+ }
+
+ public VectorContainer container() { return container; }
+
+ public int lastUpdateVersion() { return lastUpdateVersion; }
+
+ public void add(ValueVector vector) {
+ container.add(vector);
+ }
+ }
+
+ private enum State {
+ /**
+ * Before the first batch.
+ */
+ START,
+ /**
+ * Writing to a batch normally.
+ */
+ ACTIVE,
+ /**
+ * Batch overflowed a vector while writing. Can continue
+ * to write to a temporary "overflow" batch until the
+ * end of the current row.
+ */
+ OVERFLOW,
+ /**
+ * Batch is full due to reaching the row count limit
+ * when saving a row.
+ * No more writes allowed until harvesting the current batch.
+ */
+ FULL_BATCH,
+
+ /**
+ * Current batch was harvested: data is gone. A lookahead
+ * row may exist for the next batch.
+ */
+ HARVESTED,
+ /**
+ * Mutator is closed: no more operations are allowed.
+ */
+ CLOSED
+ }
+
+ private static final org.slf4j.Logger logger =
org.slf4j.LoggerFactory.getLogger(ResultSetLoaderImpl.class);
+
+ private final ResultSetOptions options;
+ private final BufferAllocator allocator;
+ private final TupleSetImpl rootTuple;
+ private final TupleLoader rootWriter;
+ private final WriterIndexImpl writerIndex;
+ private final ResultVectorCache inventory;
+ private ResultSetLoaderImpl.State state = State.START;
+ private int activeSchemaVersion = 0;
+ private int harvestSchemaVersion = 0;
--- End diff --
Added comments. See also the unit tests of this feature.
> Implement size-aware result set loader
> --------------------------------------
>
> Key: DRILL-5657
> URL: https://issues.apache.org/jira/browse/DRILL-5657
> Project: Apache Drill
> Issue Type: Improvement
> Affects Versions: Future
> Reporter: Paul Rogers
> Assignee: Paul Rogers
> Fix For: Future
>
>
> A recent extension to Drill's set of test tools created a "row set"
> abstraction to allow us to create, and verify, record batches with very few
> lines of code. Part of this work involved creating a set of "column
> accessors" in the vector subsystem. Column readers provide a uniform API to
> obtain data from columns (vectors), while column writers provide a uniform
> writing interface.
> DRILL-5211 discusses a set of changes to limit value vectors to 16 MB in size
> (to avoid memory fragmentation due to Drill's two memory allocators.) The
> column accessors have proven to be so useful that they will be the basis for
> the new, size-aware writers used by Drill's record readers.
> A step in that direction is to retrofit the column writers to use the
> size-aware {{setScalar()}} and {{setArray()}} methods introduced in
> DRILL-5517.
> Since the test framework row set classes are (at present) the only consumer
> of the accessors, those classes must also be updated with the changes.
> This then allows us to add a new "row mutator" class that handles size-aware
> vector writing, including the case in which a vector fills in the middle of a
> row.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)