Github user parthchandra commented on a diff in the pull request: https://github.com/apache/drill/pull/1060#discussion_r158549859 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VLAbstractEntryReader.java --- @@ -0,0 +1,215 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.drill.exec.store.parquet.columnreaders; + +import java.nio.ByteBuffer; + +import org.apache.drill.exec.store.parquet.columnreaders.VLColumnBulkInput.ColumnPrecisionInfo; +import org.apache.drill.exec.store.parquet.columnreaders.VLColumnBulkInput.PageDataInfo; +import org.apache.drill.exec.util.MemoryUtils; + +/** Abstract class for sub-classes implementing several algorithms for loading a Bulk Entry */ +abstract class VLAbstractEntryReader { + + /** byte buffer used for buffering page data */ + protected final ByteBuffer buffer; + /** Page Data Information */ + protected final PageDataInfo pageInfo; + /** expected precision type: fixed or variable length */ + protected final ColumnPrecisionInfo columnPrecInfo; + /** Bulk entry */ + protected final VLColumnBulkEntry entry; + + /** + * CTOR. + * @param _buffer byte buffer for data buffering (within CPU cache) + * @param _pageInfo page being processed information + * @param _columnPrecInfo column precision information + * @param _entry reusable bulk entry object + */ + VLAbstractEntryReader(ByteBuffer _buffer, + PageDataInfo _pageInfo, + ColumnPrecisionInfo _columnPrecInfo, + VLColumnBulkEntry _entry) { + + this.buffer = _buffer; + this.pageInfo = _pageInfo; + this.columnPrecInfo = _columnPrecInfo; + this.entry = _entry; + } + + /** + * @param valuesToRead maximum values to read within the current page + * @return a bulk entry object + */ + abstract VLColumnBulkEntry getEntry(int valsToReadWithinPage); + + /** + * Indicates whether to use bulk processing + */ + protected final boolean bulkProcess() { + return columnPrecInfo.bulkProcess; + } + + /** + * Loads new data into the buffer if empty or the force flag is set. + * + * @param force flag to force loading new data into the buffer + */ + protected final boolean load(boolean force) { + + if (!force && buffer.hasRemaining()) { + return true; // NOOP + } + + // We're here either because the buffer is empty or we want to force a new load operation. + // In the case of force, there might be unprocessed data (still in the buffer) which is fine + // since the caller updates the page data buffer's offset only for the data it has consumed; this + // means unread data will be loaded again but this time will be positioned in the beginning of the + // buffer. This can happen only for the last entry in the buffer when either of its length or value + // is incomplete. + buffer.clear(); + + int remaining = remainingPageData(); + int toCopy = remaining > buffer.capacity() ? buffer.capacity() : remaining; + + if (toCopy == 0) { + return false; + } + + pageInfo.pageData.getBytes(pageInfo.pageDataOff, buffer.array(), buffer.position(), toCopy); --- End diff -- So seriously, this is faster? I would have expected the copy from direct to java heap memory to be a big issue. There are HDFS APIs to read into ByteBuffer (not DirectByteBuffer) that we could leverage and reduce the memory copy across direct memory and Java heap memory.
---