Github user davies commented on a diff in the pull request:

    https://github.com/apache/spark/pull/10628#discussion_r49045624
  
    --- Diff: 
sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
 ---
    @@ -0,0 +1,165 @@
    +package org.apache.spark.sql.execution.vectorized;
    +
    +import org.apache.spark.sql.types.DataType;
    +import org.apache.spark.sql.types.DoubleType;
    +import org.apache.spark.sql.types.IntegerType;
    +import org.apache.spark.unsafe.Platform;
    +
    +import java.nio.ByteBuffer;
    +import java.nio.DoubleBuffer;
    +
    +/**
    + * A column backed by an in memory JVM array. This stores the NULLs as a 
byte per value
    + * and a java array for the values.
    + */
    +public final class OnHeapColumnVector extends ColumnVector {
    +  // The data stored in these arrays need to maintain binary compatible. 
We can
    +  // directly pass this buffer to external components.
    +
    +  // This is faster than a boolean array and we optimize this over memory 
footprint.
    +  private byte[] nulls;
    +
    +  // Array for each type. Only 1 is populated for any type.
    +  private int[] intData;
    +  private double[] doubleData;
    +
    +  protected OnHeapColumnVector(int capacity, DataType type) {
    +    super(capacity, type);
    +    if (type instanceof IntegerType) {
    +      this.intData = new int[capacity];
    +    } else if (type instanceof DoubleType) {
    +      this.doubleData = new double[capacity];
    +    } else {
    +      throw new RuntimeException("Unhandled " + type);
    +    }
    +    this.nulls = new byte[capacity];
    +    reset();
    +  }
    +
    +  @Override
    +  public final long valuesNativeAddress() {
    +    throw new RuntimeException("Cannot get native address for on heap 
column");
    +  }
    +  @Override
    +  public final long nullsNativeAddress() {
    +    throw new RuntimeException("Cannot get native address for on heap 
column");
    +  }
    +
    +  @Override
    +  public final void close() {
    +    nulls = null;
    +    intData = null;
    +    doubleData = null;
    +  }
    +
    +
    +  //
    +  // APIs dealing with nulls
    +  //
    +
    +  @Override
    +  public final void putNotNull(int rowId) {
    +    nulls[rowId] = (byte)0;
    +  }
    +
    +  @Override
    +  public final void putNull(int rowId) {
    +    nulls[rowId] = (byte)1;
    +    ++numNulls;
    +    anyNullsSet = true;
    +  }
    +
    +  @Override
    +  public final void putNulls(int rowId, int count) {
    +    for (int i = 0; i < count; ++i) {
    +      nulls[rowId + i] = (byte)1;
    +    }
    +    anyNullsSet = true;
    +    numNulls += count;
    +  }
    +
    +  @Override
    +  public final void putNotNulls(int rowId, int count) {
    +    for (int i = 0; i < count; ++i) {
    +      nulls[rowId + i] = (byte)0;
    +    }
    +  }
    +
    +  @Override
    +  public final boolean getIsNull(int rowId) {
    +    return nulls[rowId] == 1;
    +  }
    +
    +  //
    +  // APIs dealing with Ints
    +  //
    +
    +  @Override
    +  public final void putInt(int rowId, int value) {
    +    intData[rowId] = value;
    +  }
    +
    +  @Override
    +  public final void putInts(int rowId, int count, int value) {
    +    for (int i = 0; i < count; ++i) {
    +      intData[i + rowId] = value;
    +    }
    +  }
    +
    +  @Override
    +  public final void putInts(int rowId, int count, int[] src, int srcIndex) 
{
    +    System.arraycopy(src, srcIndex, intData, rowId, count);
    +  }
    +
    +  @Override
    +  public final void putIntsLittleEndian(int rowId, int count, byte[] src, 
int srcIndex) {
    +    for (int i = 0; i < count; ++i) {
    +      // TODO: is this the fastest way?
    --- End diff --
    
    We could still use Platform.copyMemory() here


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to