[
https://issues.apache.org/jira/browse/ARROW-1476?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16254002#comment-16254002
]
ASF GitHub Bot commented on ARROW-1476:
---------------------------------------
icexelloss commented on a change in pull request #1316: ARROW-1476: [JAVA]
Implement Final ValueVector Updates
URL: https://github.com/apache/arrow/pull/1316#discussion_r151226732
##########
File path:
java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java
##########
@@ -0,0 +1,848 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import io.netty.buffer.ArrowBuf;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.BaseAllocator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.schema.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BaseNullableFixedWidthVector provides an abstract interface for
+ * implementing vectors of fixed width values. The vectors are nullable
+ * implying that zero or more elements in the vector could be NULL.
+ */
+public abstract class BaseNullableFixedWidthVector extends BaseValueVector
+ implements FixedWidthVector, FieldVector,
NullableVectorDefinitionSetter {
+ private final byte typeWidth;
+
+ protected int valueAllocationSizeInBytes;
+ protected int validityAllocationSizeInBytes;
+
+ protected final Field field;
+ private int allocationMonitor;
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected int valueCount;
+
+ public BaseNullableFixedWidthVector(final String name, final BufferAllocator
allocator,
+ FieldType fieldType, final byte
typeWidth) {
+ super(name, allocator);
+ this.typeWidth = typeWidth;
+ field = new Field(name, fieldType, null);
+ valueCount = 0;
+ allocationMonitor = 0;
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ if (typeWidth > 0) {
+ valueAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * typeWidth;
+ validityAllocationSizeInBytes =
getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ } else {
+ /* specialized handling for NullableBitVector */
+ valueAllocationSizeInBytes =
getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ validityAllocationSizeInBytes = valueAllocationSizeInBytes;
+ }
+ }
+
+
+ /* TODO:
+ *
+ * see if getNullCount() can be made faster -- O(1)
+ */
+
+ /* TODO:
+ * Once the entire hierarchy has been refactored, move common functions
+ * like getNullCount(), splitAndTransferValidityBuffer to top level
+ * base class BaseValueVector.
+ *
+ * Along with this, some class members (validityBuffer) can also be
+ * abstracted out to top level base class.
+ *
+ * Right now BaseValueVector is the top level base class for other
+ * vector types in ValueVector hierarchy (non-nullable) and those
+ * vectors have not yet been refactored/removed so moving things to
+ * the top class as of now is not a good idea.
+ */
+
+
+ @Override
+ @Deprecated
+ public Mutator getMutator() {
+ throw new UnsupportedOperationException("Mutator is not supported for
writing to vector");
+ }
+
+ @Override
+ @Deprecated
+ public Accessor getAccessor() {
+ throw new UnsupportedOperationException("Accessor is not supported for
reading from vector");
+ }
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return (valueBuffer.memoryAddress());
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return starting address of the buffer
+ * @throws UnsupportedOperationException for fixed width vectors
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException("not supported for fixed-width
vectors");
+ }
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ * @throws UnsupportedOperationException for fixed width vectors
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException("not supported for fixed-width
vectors");
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final long size = (long) valueCount * typeWidth;
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Requested amount of memory is
more than max allowed");
+ }
+ valueAllocationSizeInBytes = (int) size;
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount);
+ }
+
+ /**
+ * Get the current value capacity for the vector
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return Math.min(getValueBufferValueCapacity(),
getValidityBufferValueCapacity());
+ }
+
+ private int getValueBufferValueCapacity() {
+ return (int) ((valueBuffer.capacity() * 1.0) / typeWidth);
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return (int) (validityBuffer.capacity() * 8L);
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ @Override
+ public void zeroVector() {
+ initValidityBuffer();
+ initValueBuffer();
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the data buffer */
+ private void initValueBuffer() {
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ public void reset() {
+ zeroVector();
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}
+ */
+ @Override
+ public void clear() {
+ valueCount = 0;
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ }
+
+ /* used to step down the memory allocation */
+ protected void incrementAllocationMonitor() {
+ if (allocationMonitor < 0) {
+ allocationMonitor = 0;
+ }
+ allocationMonitor++;
+ }
+
+ /* used to step up the memory allocation */
+ protected void decrementAllocationMonitor() {
+ if (allocationMonitor > 0) {
+ allocationMonitor = 0;
+ }
+ allocationMonitor--;
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory.");
+ }
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ long curAllocationSizeValue = valueAllocationSizeInBytes;
+ long curAllocationSizeValidity = validityAllocationSizeInBytes;
+
+ if (curAllocationSizeValue > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Requested amount of memory
exceeds limit");
+ }
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(curAllocationSizeValue, curAllocationSizeValidity);
+ } catch (Exception e) {
+ e.printStackTrace();
+ clear();
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided
number of
+ * elements in the vector. This method must be called prior to using the
ValueVector.
+ *
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException
+ */
+ public void allocateNew(int valueCount) {
+ long valueBufferSize = valueCount * typeWidth;
+ long validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ if (typeWidth == 0) {
+ /* specialized handling for NullableBitVector */
+ valueBufferSize = validityBufferSize;
+ }
+
+ if (valueBufferSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Requested amount of memory is
more than max allowed");
+ }
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(valueBufferSize, validityBufferSize);
+ } catch (Exception e) {
+ e.printStackTrace();
+ clear();
+ throw e;
+ }
+ }
+
+ /**
+ * Actual memory allocation is done by this function. All the calculations
+ * and knowledge about what size to allocate is upto the callers of this
+ * method.
+ * Callers appropriately handle errors if memory allocation fails here.
+ * Callers should also take care of determining that desired size is
+ * within the bounds of max allocation allowed and any other error
+ * conditions.
+ */
+ private void allocateBytes(final long valueBufferSize, final long
validityBufferSize) {
+ /* allocate data buffer */
+ int curSize = (int) valueBufferSize;
+ valueBuffer = allocator.buffer(curSize);
+ valueBuffer.readerIndex(0);
+ valueAllocationSizeInBytes = curSize;
+ /* allocate validity buffer */
+ allocateValidityBuffer((int) validityBufferSize);
+ zeroVector();
+ }
+
+ /**
+ * During splitAndTransfer, if we splitting from a random position within a
byte,
+ * we can't just slice the source buffer so we have to explicitly allocate
the
+ * validityBuffer of the target vector. This is unlike the databuffer which
we can
+ * always slice for the target vector.
+ */
+ private void allocateValidityBuffer(final int validityBufferSize) {
+ validityBuffer = allocator.buffer(validityBufferSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = validityBufferSize;
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param count desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return (count * typeWidth) + getValidityBufferSizeFromCount(count);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return (valueCount * typeWidth) +
getValidityBufferSizeFromCount(valueCount);
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this
doesn't
+ * impact the reference counts for this buffer so it only should be used for
in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will
still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link io.netty.buffer.ArrowBuf buffers} that is
used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
Review comment:
My previous comments are squashed because of code change.
Why do we need both
```
public ArrowBuf[] getBuffers(boolean clear)
```
and
```
public List<ArrowBuf> getFieldBuffers()
```
Both as public API? They look very similar to me and might confuse users.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [JAVA] Implement final ValueVector updates
> ------------------------------------------
>
> Key: ARROW-1476
> URL: https://issues.apache.org/jira/browse/ARROW-1476
> Project: Apache Arrow
> Issue Type: Sub-task
> Reporter: Jacques Nadeau
> Assignee: Siddharth Teotia
> Labels: pull-request-available
>
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)