[ https://issues.apache.org/jira/browse/PARQUET-2190?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17696658#comment-17696658 ]
ASF GitHub Bot commented on PARQUET-2190: ----------------------------------------- jiangjiguang closed pull request #1006: PARQUET-2190 byte array has better performance than ByteBuffer URL: https://github.com/apache/parquet-mr/pull/1006 > byte array has better performance than ByteBuffer > -------------------------------------------------- > > Key: PARQUET-2190 > URL: https://issues.apache.org/jira/browse/PARQUET-2190 > Project: Parquet > Issue Type: Improvement > Reporter: jiangjiguang0719 > Priority: Major > > The BytePacker should add the following method > public abstract void unpack8Values(final byte[] input, final int inPos, final > int[] output, final int outPos); > to replace method > @Deprecated > public void unpack8Values(final byte[] input, final int inPos, final int[] > output, final int outPos) { > unpack8Values(ByteBuffer.wrap(input), inPos, output, outPos); > } > > Tested by me byte array has better performance than ByteBuffer, > The test result is: > [Unpack8ValuesByteArray spent time] 80 ms > [Unpack8ValuesByteBuffer spent time] 133 ms > > The test code is: > package org.apache.parquet.column.values.bitpacking; > import java.nio.ByteBuffer; > public class ByteBufferTest { > private static final BytePacker bytePacker = > Packer.LITTLE_ENDIAN.newBytePacker(7); > private static final int COUNT = 100000; > public static void main(String[] args) { > byte [] in = new byte[1008]; > int [] out = new int[1152]; > int [] out1 = new int[1152]; > int [] out2 = new int[1152]; > int res = 0; > for(int i = 0; i < in.length; i++) { > in[i] = (byte) i; > } > for(int i = 0; i < COUNT; i++) { > res += unpack8ValuesBytes(in, out, i % out.length); > } > res = 0; > long t1 = System.currentTimeMillis(); > for(int i = 0; i < COUNT; i++) { > res += unpack8ValuesBytes(in, out1, i % out.length); > } > long t2 = System.currentTimeMillis(); > System.out.println("[Unpack8ValuesByteArray spent time] " + (t2-t1) + " > ms"); > ByteBuffer byteBuffer = ByteBuffer.wrap(in); > for(int i = 0; i < COUNT; i++) { > res += unpack8ValuesByteBuffer(byteBuffer, out, i % out.length); > } > res = 0; > long t3 = System.currentTimeMillis(); > for(int i = 0; i < COUNT; i++) { > res += unpack8ValuesByteBuffer(byteBuffer, out2, i % out.length); > } > long t4 = System.currentTimeMillis(); > System.out.println("[Unpack8ValuesByteBuffer spent time] " + (t4-t3) + " > ms"); > for (int i=0; i<out1.length; i++) { > if(out1[i] != out2[i]) { > System.out.println("diff: " + out1[i] + " " + out2[i]); > } > } > } > private static int unpack8ValuesBytes(byte [] in, int [] out, int ctr) { > for(int i = 0, j = 0; i < in.length; i+=7, j+=8) { > bytePacker.unpack8Values(in, i, out, j); > } > return out[ctr]; > } > private static int unpack8ValuesByteBuffer(ByteBuffer in, int [] out, int > ctr) { > for(int i = 0, j = 0; i < in.capacity(); i+=7, j+=8) { > bytePacker.unpack8Values(in, i, out, j); > } > return out[ctr]; > } > } -- This message was sent by Atlassian Jira (v8.20.10#820010)