[ https://issues.apache.org/jira/browse/DRILL-5657?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16129852#comment-16129852 ]
ASF GitHub Bot commented on DRILL-5657: --------------------------------------- Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/866#discussion_r133618019 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/LogicalTupleLoader.java --- @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.rowSet.impl; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.drill.exec.physical.rowSet.ColumnLoader; +import org.apache.drill.exec.physical.rowSet.TupleLoader; +import org.apache.drill.exec.physical.rowSet.TupleSchema; +import org.apache.drill.exec.physical.rowSet.TupleSchema.TupleColumnSchema; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.MaterializedField; + +/** + * Shim inserted between an actual tuple loader and the client to remove columns + * that are not projected from input to output. The underlying loader handles only + * the projected columns in order to improve efficiency. This class presents the + * full table schema, but returns null for the non-projected columns. This allows + * the reader to work with the table schema as defined by the data source, but + * skip those columns which are not projected. Skipping non-projected columns avoids + * creating value vectors which are immediately discarded. It may also save the reader + * from reading unwanted data. + */ +public class LogicalTupleLoader implements TupleLoader { + + public static final int UNMAPPED = -1; + + private static class MappedColumn implements TupleColumnSchema { + + private final MaterializedField schema; + private final int mapping; + + public MappedColumn(MaterializedField schema, int mapping) { + this.schema = schema; + this.mapping = mapping; + } + + @Override + public MaterializedField schema() { return schema; } + + @Override + public boolean isSelected() { return mapping != UNMAPPED; } + + @Override + public int vectorIndex() { return mapping; } + } + + /** + * Implementation of the tuple schema that describes the full data source + * schema. The underlying loader schema is a subset of these columns. Note + * that the columns appear in the same order in both schemas, but the loader + * schema is a subset of the table schema. + */ + + private class LogicalTupleSchema implements TupleSchema { + + private final Set<String> selection = new HashSet<>(); + private final TupleSchema physicalSchema; + + private LogicalTupleSchema(TupleSchema physicalSchema, Collection<String> selection) { + this.physicalSchema = physicalSchema; + this.selection.addAll(selection); + } + + @Override + public int columnCount() { return logicalSchema.count(); } + + @Override + public int columnIndex(String colName) { + return logicalSchema.indexOf(rsLoader.toKey(colName)); + } + + @Override + public TupleColumnSchema metadata(int colIndex) { return logicalSchema.get(colIndex); } + + @Override + public MaterializedField column(int colIndex) { return logicalSchema.get(colIndex).schema(); } + + @Override + public TupleColumnSchema metadata(String colName) { return logicalSchema.get(colName); } + + @Override + public MaterializedField column(String colName) { return logicalSchema.get(colName).schema(); } + + @Override + public int addColumn(MaterializedField columnSchema) { + String key = rsLoader.toKey(columnSchema.getName()); + int pIndex; + if (selection.contains(key)) { --- End diff -- Removed this feature. Now use a case-insensitive map for the name space. > Implement size-aware result set loader > -------------------------------------- > > Key: DRILL-5657 > URL: https://issues.apache.org/jira/browse/DRILL-5657 > Project: Apache Drill > Issue Type: Improvement > Affects Versions: Future > Reporter: Paul Rogers > Assignee: Paul Rogers > Fix For: Future > > > A recent extension to Drill's set of test tools created a "row set" > abstraction to allow us to create, and verify, record batches with very few > lines of code. Part of this work involved creating a set of "column > accessors" in the vector subsystem. Column readers provide a uniform API to > obtain data from columns (vectors), while column writers provide a uniform > writing interface. > DRILL-5211 discusses a set of changes to limit value vectors to 16 MB in size > (to avoid memory fragmentation due to Drill's two memory allocators.) The > column accessors have proven to be so useful that they will be the basis for > the new, size-aware writers used by Drill's record readers. > A step in that direction is to retrofit the column writers to use the > size-aware {{setScalar()}} and {{setArray()}} methods introduced in > DRILL-5517. > Since the test framework row set classes are (at present) the only consumer > of the accessors, those classes must also be updated with the changes. > This then allows us to add a new "row mutator" class that handles size-aware > vector writing, including the case in which a vector fills in the middle of a > row. -- This message was sent by Atlassian JIRA (v6.4.14#64029)