pavibhai commented on a change in pull request #635:
URL: https://github.com/apache/orc/pull/635#discussion_r577090098
##########
File path:
java/core/src/java/org/apache/orc/impl/reader/tree/StructBatchReader.java
##########
@@ -15,62 +15,80 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.orc.impl.reader.tree;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.filter.OrcFilterContext;
import org.apache.orc.impl.TreeReaderFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.Set;
public class StructBatchReader extends BatchReader {
+ private static final Logger LOG =
LoggerFactory.getLogger(StructBatchReader.class);
// The reader context including row-filtering details
private final TreeReaderFactory.Context context;
+ private final TreeReaderFactory.StructTreeReader structReader;
+ private final OrcFilterContext fc;
- public StructBatchReader(TreeReaderFactory.StructTreeReader rowReader,
TreeReaderFactory.Context context) {
+ public StructBatchReader(TypeReader rowReader, TreeReaderFactory.Context
context) {
super(rowReader);
this.context = context;
+ this.fc = new
OrcFilterContext(context.getSchemaEvolution().getReaderSchema());
+ if (rowReader instanceof TreeReaderFactory.StructTreeReader) {
+ structReader = (TreeReaderFactory.StructTreeReader) rowReader;
+ } else {
+ structReader = (TreeReaderFactory.StructTreeReader)
((LevelTypeReader)rowReader).getReader();
+ }
}
- private void readBatchColumn(VectorizedRowBatch batch, TypeReader[]
children, int batchSize, int index)
- throws IOException {
+ private void readBatchColumn(VectorizedRowBatch batch,
+ TypeReader[] children,
+ int batchSize,
+ int index,
+ ReadLevel readLevel)
+ throws IOException {
ColumnVector colVector = batch.cols[index];
if (colVector != null) {
colVector.reset();
colVector.ensureSize(batchSize, false);
- children[index].nextVector(colVector, null, batchSize, batch);
+ children[index].nextVector(colVector, null, batchSize, batch, readLevel);
}
}
@Override
- public void nextBatch(VectorizedRowBatch batch, int batchSize) throws
IOException {
- TypeReader[] children = ((TreeReaderFactory.StructTreeReader)
rootType).fields;
- // Early expand fields --> apply filter --> expand remaining fields
- Set<Integer> earlyExpandCols = context.getColumnFilterIds();
+ public void nextBatch(VectorizedRowBatch batch, int batchSize, ReadLevel
readLevel)
+ throws IOException {
+ nextBatchLevel(batch, batchSize, readLevel);
- // Clear selected and early expand columns used in Filter
- batch.selectedInUse = false;
- for (int i = 0; i < children.length && !earlyExpandCols.isEmpty() &&
- (vectorColumnCount == -1 || i < vectorColumnCount); ++i) {
- if (earlyExpandCols.contains(children[i].getColumnId())) {
- readBatchColumn(batch, children, batchSize, i);
+ if (readLevel == ReadLevel.LEAD) {
+ // Apply filter callback to reduce number of # rows selected for
decoding in the next
+ // TreeReaders
+ if (this.context.getColumnFilterCallback() != null) {
+ this.context.getColumnFilterCallback().accept(fc.setBatch(batch));
}
}
- // Since we are going to filter rows based on some column values set
batch.size earlier here
- batch.size = batchSize;
+ }
+
+ private void nextBatchLevel(VectorizedRowBatch batch, int batchSize,
ReadLevel readLevel) throws IOException {
+ TypeReader[] children = structReader.fields;
- // Apply filter callback to reduce number of # rows selected for decoding
in the next TreeReaders
- if (!earlyExpandCols.isEmpty() && this.context.getColumnFilterCallback()
!= null) {
- this.context.getColumnFilterCallback().accept(batch);
+ if (readLevel != ReadLevel.FOLLOW) {
+ // In case of FOLLOW we leave the selectedInUse untouched.
+ batch.selectedInUse = false;
Review comment:
This is coming from the VectorizedRowBatch, it indicates a partial
selection in the batch which requires the use of the selected vector to
determine which rows are valid.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]