This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 06bb358  PARQUET-2106: Refactoring lexicographic `BinaryComparator` to 
avoid `ByteBuffer.wrap` in the hot-path (#940)
06bb358 is described below

commit 06bb358bcf8a0855c54f20122a57a88d9fde16c1
Author: Alexey Kudinkin <[email protected]>
AuthorDate: Thu Dec 9 01:29:59 2021 -0800

    PARQUET-2106: Refactoring lexicographic `BinaryComparator` to avoid 
`ByteBuffer.wrap` in the hot-path (#940)
---
 .../java/org/apache/parquet/io/api/Binary.java     | 129 ++++++++++++++++++++-
 .../apache/parquet/schema/PrimitiveComparator.java |  31 ++---
 2 files changed, 134 insertions(+), 26 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java 
b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
index 8afc8f2..3380bf6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
+++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -77,6 +77,12 @@ abstract public class Binary implements Comparable<Binary>, 
Serializable {
   @Deprecated
   abstract public int compareTo(Binary other);
 
+  abstract int lexicographicCompare(Binary other);
+
+  abstract int lexicographicCompare(byte[] other, int otherOffset, int 
otherLength);
+
+  abstract int lexicographicCompare(ByteBuffer other, int otherOffset, int 
otherLength);
+
   abstract public ByteBuffer toByteBuffer();
 
   @Override
@@ -192,6 +198,22 @@ abstract public class Binary implements 
Comparable<Binary>, Serializable {
     }
 
     @Override
+    int lexicographicCompare(Binary other) {
+      // NOTE: We have to flip the sign, since we swap operands sides
+      return -other.lexicographicCompare(value, offset, length);
+    }
+
+    @Override
+    int lexicographicCompare(byte[] other, int otherOffset, int otherLength) {
+      return Binary.lexicographicCompare(value, offset, length, other, 
otherOffset, otherLength);
+    }
+
+    @Override
+    int lexicographicCompare(ByteBuffer other, int otherOffset, int 
otherLength) {
+      return Binary.lexicographicCompare(value, offset, length, other, 
otherOffset, otherLength);
+    }
+
+    @Override
     public ByteBuffer toByteBuffer() {
       return ByteBuffer.wrap(value, offset, length);
     }
@@ -328,7 +350,23 @@ abstract public class Binary implements 
Comparable<Binary>, Serializable {
       return 
PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR.compare(this, 
other);
     }
 
-   @Override
+    @Override
+    int lexicographicCompare(Binary other) {
+      // NOTE: We have to flip the sign, since we swap operands sides
+      return -other.lexicographicCompare(value, 0, value.length);
+    }
+
+    @Override
+    int lexicographicCompare(byte[] other, int otherOffset, int otherLength) {
+      return Binary.lexicographicCompare(this.value, 0, value.length, other, 
otherOffset, otherLength);
+    }
+
+    @Override
+    int lexicographicCompare(ByteBuffer other, int otherOffset, int 
otherLength) {
+      return Binary.lexicographicCompare(this.value, 0, value.length, other, 
otherOffset, otherLength);
+    }
+
+    @Override
     public ByteBuffer toByteBuffer() {
       return ByteBuffer.wrap(value);
     }
@@ -476,6 +514,32 @@ abstract public class Binary implements 
Comparable<Binary>, Serializable {
     }
 
     @Override
+    int lexicographicCompare(Binary other) {
+      if (value.hasArray()) {
+        // NOTE: We have to flip the sign, since we swap operands sides
+        return -other.lexicographicCompare(value.array(), value.arrayOffset() 
+ offset, length);
+      } else {
+        // NOTE: We have to flip the sign, since we swap operands sides
+        return -other.lexicographicCompare(value, offset, length);
+      }
+    }
+
+    @Override
+    int lexicographicCompare(byte[] other, int otherOffset, int otherLength) {
+      if (value.hasArray()) {
+        return Binary.lexicographicCompare(value.array(), value.arrayOffset() 
+ offset, length, other, otherOffset, otherLength);
+      } else {
+        // NOTE: We have to flip the sign, since we swap operands sides
+        return -Binary.lexicographicCompare(other, otherOffset, otherLength, 
value, offset, length);
+      }
+    }
+
+    @Override
+    int lexicographicCompare(ByteBuffer other, int otherOffset, int 
otherLength) {
+      return Binary.lexicographicCompare(value, offset, length, other, 
otherOffset, otherLength);
+    }
+
+    @Override
     public ByteBuffer toByteBuffer() {
       ByteBuffer ret = value.duplicate();
       ret.position(offset);
@@ -542,6 +606,10 @@ abstract public class Binary implements 
Comparable<Binary>, Serializable {
     return new FromCharSequenceBinary(value);
   }
 
+  public static int lexicographicCompare(Binary one, Binary other) {
+    return one.lexicographicCompare(other);
+  }
+
   /**
    * @see {@link Arrays#hashCode(byte[])}
    * @param array
@@ -613,4 +681,57 @@ abstract public class Binary implements 
Comparable<Binary>, Serializable {
     }
     return true;
   }
+
+  private static final int lexicographicCompare(byte[] array1, int offset1, 
int length1, byte[] array2, int offset2, int length2) {
+    if (array1 == null && array2 == null) return 0;
+    if (array1 == null || array2 == null) return array1 != null ? 1 : -1;
+
+    int minLen = Math.min(length1, length2);
+    for (int i = 0; i < minLen; i++) {
+      int res = unsignedCompare(array1[i + offset1], array2[i + offset2]);
+      if (res != 0) {
+        return res;
+      }
+    }
+
+    return length1 - length2;
+  }
+
+  private static final int lexicographicCompare(byte[] array, int offset1, int 
length1, ByteBuffer buffer, int offset2, int length2) {
+    if (array == null && buffer == null) return 0;
+    if (array == null || buffer == null) return array != null ? 1 : -1;
+
+    int minLen = Math.min(length1, length2);
+    for (int i = 0; i < minLen; i++) {
+      int res = unsignedCompare(array[i + offset1], buffer.get(i + offset2));
+      if (res != 0) {
+        return res;
+      }
+    }
+
+    return length1 - length2;
+  }
+
+  private static final int lexicographicCompare(ByteBuffer buffer1, int 
offset1, int length1, ByteBuffer buffer2, int offset2, int length2) {
+    if (buffer1 == null && buffer2 == null) return 0;
+    if (buffer1 == null || buffer2 == null) return buffer1 != null ? 1 : -1;
+
+    int minLen = Math.min(length1, length2);
+    for (int i = 0; i < minLen; i++) {
+      int res = unsignedCompare(buffer1.get(i + offset1), buffer2.get(i + 
offset2));
+      if (res != 0) {
+        return res;
+      }
+    }
+
+    return length1 - length2;
+  }
+
+  private static int unsignedCompare(byte b1, byte b2) {
+    return toUnsigned(b1) - toUnsigned(b2);
+  }
+
+  private static final int toUnsigned(byte b) {
+    return b & 0xFF;
+  }
 }
diff --git 
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
 
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
index d343b0e..a762e95 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
@@ -183,10 +183,10 @@ public abstract class PrimitiveComparator<T> implements 
Comparator<T>, Serializa
   private static abstract class BinaryComparator extends 
PrimitiveComparator<Binary> {
     @Override
     int compareNotNulls(Binary o1, Binary o2) {
-      return compare(o1.toByteBuffer(), o2.toByteBuffer());
+      return compareBinary(o1, o2);
     }
 
-    abstract int compare(ByteBuffer b1, ByteBuffer b2);
+    abstract int compareBinary(Binary b1, Binary b2);
 
     final int toUnsigned(byte b) {
       return b & 0xFF;
@@ -195,25 +195,8 @@ public abstract class PrimitiveComparator<T> implements 
Comparator<T>, Serializa
 
   public static final PrimitiveComparator<Binary> 
UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR = new BinaryComparator() {
     @Override
-    int compare(ByteBuffer b1, ByteBuffer b2) {
-      int l1 = b1.remaining();
-      int l2 = b2.remaining();
-      int p1 = b1.position();
-      int p2 = b2.position();
-      int minL = Math.min(l1, l2);
-
-      for (int i = 0; i < minL; ++i) {
-        int result = unsignedCompare(b1.get(p1 + i), b2.get(p2 + i));
-        if (result != 0) {
-          return result;
-        }
-      }
-
-      return l1 - l2;
-    }
-
-    private int unsignedCompare(byte b1, byte b2) {
-      return toUnsigned(b1) - toUnsigned(b2);
+    int compareBinary(Binary b1, Binary b2) {
+      return Binary.lexicographicCompare(b1, b2);
     }
 
     @Override
@@ -232,7 +215,11 @@ public abstract class PrimitiveComparator<T> implements 
Comparator<T>, Serializa
     private static final int POSITIVE_PADDING = 0;
 
     @Override
-    int compare(ByteBuffer b1, ByteBuffer b2) {
+    int compareBinary(Binary b1, Binary b2) {
+      return compare(b1.toByteBuffer(), b2.toByteBuffer());
+    }
+
+    private int compare(ByteBuffer b1, ByteBuffer b2) {
       int l1 = b1.remaining();
       int l2 = b2.remaining();
       int p1 = b1.position();

Reply via email to