jiangjiguang0719 created PARQUET-2190:
-----------------------------------------

             Summary: byte array has better performance than  ByteBuffer
                 Key: PARQUET-2190
                 URL: https://issues.apache.org/jira/browse/PARQUET-2190
             Project: Parquet
          Issue Type: Improvement
            Reporter: jiangjiguang0719


The BytePacker should add the following method
public abstract void unpack8Values(final byte[] input, final int inPos, final 
int[] output, final int outPos);

to replace method  
@Deprecated
public void unpack8Values(final byte[] input, final int inPos, final int[] 
output, final int outPos) {
    unpack8Values(ByteBuffer.wrap(input), inPos, output, outPos);
}

 

Tested by me byte array has better performance than  ByteBuffer,

The test result is:

[Unpack8ValuesByteArray spent time] 80 ms
[Unpack8ValuesByteBuffer spent time] 133 ms

 

The test code is:

package org.apache.parquet.column.values.bitpacking;

import java.nio.ByteBuffer;

public class ByteBufferTest {
  private static final BytePacker bytePacker = 
Packer.LITTLE_ENDIAN.newBytePacker(7);

  private static final int COUNT = 100000;

  public static void main(String[] args) {
    byte  [] in  = new byte[1008];
    int [] out = new int[1152];
    int [] out1 = new int[1152];
    int [] out2 = new int[1152];

    int res = 0;

    for(int i = 0; i < in.length; i++) {
      in[i] = (byte) i;
    }

    for(int i = 0; i < COUNT; i++) {
      res += unpack8ValuesBytes(in, out, i % out.length);
    }

    res = 0;
    long t1 = System.currentTimeMillis();
    for(int i = 0; i < COUNT; i++) {
      res += unpack8ValuesBytes(in, out1, i % out.length);
    }
    long t2 = System.currentTimeMillis();
    System.out.println("[Unpack8ValuesByteArray spent time] " + (t2-t1) + " 
ms");

    ByteBuffer byteBuffer = ByteBuffer.wrap(in);

    for(int i = 0; i < COUNT; i++) {
      res += unpack8ValuesByteBuffer(byteBuffer, out, i % out.length);
    }

    res = 0;
    long t3 = System.currentTimeMillis();
    for(int i = 0; i < COUNT; i++) {
      res += unpack8ValuesByteBuffer(byteBuffer, out2, i % out.length);
    }
    long t4 = System.currentTimeMillis();
    System.out.println("[Unpack8ValuesByteBuffer spent time] " + (t4-t3) + " 
ms");

    for (int i=0; i<out1.length; i++) {
      if(out1[i] != out2[i]) {
        System.out.println("diff: " + out1[i] + " " + out2[i]);
      }
    }
  }

  private static int unpack8ValuesBytes(byte [] in, int [] out, int ctr) {
    for(int i = 0, j = 0; i < in.length; i+=7, j+=8) {
      bytePacker.unpack8Values(in, i, out, j);
    }
    return out[ctr];
  }
  private static int unpack8ValuesByteBuffer(ByteBuffer in, int [] out, int 
ctr) {
    for(int i = 0, j = 0; i < in.capacity(); i+=7, j+=8) {
      bytePacker.unpack8Values(in, i, out, j);
    }
    return out[ctr];
  }
}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to