This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 06bb358 PARQUET-2106: Refactoring lexicographic `BinaryComparator` to
avoid `ByteBuffer.wrap` in the hot-path (#940)
06bb358 is described below
commit 06bb358bcf8a0855c54f20122a57a88d9fde16c1
Author: Alexey Kudinkin <[email protected]>
AuthorDate: Thu Dec 9 01:29:59 2021 -0800
PARQUET-2106: Refactoring lexicographic `BinaryComparator` to avoid
`ByteBuffer.wrap` in the hot-path (#940)
---
.../java/org/apache/parquet/io/api/Binary.java | 129 ++++++++++++++++++++-
.../apache/parquet/schema/PrimitiveComparator.java | 31 ++---
2 files changed, 134 insertions(+), 26 deletions(-)
diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
index 8afc8f2..3380bf6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
+++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -77,6 +77,12 @@ abstract public class Binary implements Comparable<Binary>,
Serializable {
@Deprecated
abstract public int compareTo(Binary other);
+ abstract int lexicographicCompare(Binary other);
+
+ abstract int lexicographicCompare(byte[] other, int otherOffset, int
otherLength);
+
+ abstract int lexicographicCompare(ByteBuffer other, int otherOffset, int
otherLength);
+
abstract public ByteBuffer toByteBuffer();
@Override
@@ -192,6 +198,22 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
}
@Override
+ int lexicographicCompare(Binary other) {
+ // NOTE: We have to flip the sign, since we swap operands sides
+ return -other.lexicographicCompare(value, offset, length);
+ }
+
+ @Override
+ int lexicographicCompare(byte[] other, int otherOffset, int otherLength) {
+ return Binary.lexicographicCompare(value, offset, length, other,
otherOffset, otherLength);
+ }
+
+ @Override
+ int lexicographicCompare(ByteBuffer other, int otherOffset, int
otherLength) {
+ return Binary.lexicographicCompare(value, offset, length, other,
otherOffset, otherLength);
+ }
+
+ @Override
public ByteBuffer toByteBuffer() {
return ByteBuffer.wrap(value, offset, length);
}
@@ -328,7 +350,23 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
return
PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR.compare(this,
other);
}
- @Override
+ @Override
+ int lexicographicCompare(Binary other) {
+ // NOTE: We have to flip the sign, since we swap operands sides
+ return -other.lexicographicCompare(value, 0, value.length);
+ }
+
+ @Override
+ int lexicographicCompare(byte[] other, int otherOffset, int otherLength) {
+ return Binary.lexicographicCompare(this.value, 0, value.length, other,
otherOffset, otherLength);
+ }
+
+ @Override
+ int lexicographicCompare(ByteBuffer other, int otherOffset, int
otherLength) {
+ return Binary.lexicographicCompare(this.value, 0, value.length, other,
otherOffset, otherLength);
+ }
+
+ @Override
public ByteBuffer toByteBuffer() {
return ByteBuffer.wrap(value);
}
@@ -476,6 +514,32 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
}
@Override
+ int lexicographicCompare(Binary other) {
+ if (value.hasArray()) {
+ // NOTE: We have to flip the sign, since we swap operands sides
+ return -other.lexicographicCompare(value.array(), value.arrayOffset()
+ offset, length);
+ } else {
+ // NOTE: We have to flip the sign, since we swap operands sides
+ return -other.lexicographicCompare(value, offset, length);
+ }
+ }
+
+ @Override
+ int lexicographicCompare(byte[] other, int otherOffset, int otherLength) {
+ if (value.hasArray()) {
+ return Binary.lexicographicCompare(value.array(), value.arrayOffset()
+ offset, length, other, otherOffset, otherLength);
+ } else {
+ // NOTE: We have to flip the sign, since we swap operands sides
+ return -Binary.lexicographicCompare(other, otherOffset, otherLength,
value, offset, length);
+ }
+ }
+
+ @Override
+ int lexicographicCompare(ByteBuffer other, int otherOffset, int
otherLength) {
+ return Binary.lexicographicCompare(value, offset, length, other,
otherOffset, otherLength);
+ }
+
+ @Override
public ByteBuffer toByteBuffer() {
ByteBuffer ret = value.duplicate();
ret.position(offset);
@@ -542,6 +606,10 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
return new FromCharSequenceBinary(value);
}
+ public static int lexicographicCompare(Binary one, Binary other) {
+ return one.lexicographicCompare(other);
+ }
+
/**
* @see {@link Arrays#hashCode(byte[])}
* @param array
@@ -613,4 +681,57 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
}
return true;
}
+
+ private static final int lexicographicCompare(byte[] array1, int offset1,
int length1, byte[] array2, int offset2, int length2) {
+ if (array1 == null && array2 == null) return 0;
+ if (array1 == null || array2 == null) return array1 != null ? 1 : -1;
+
+ int minLen = Math.min(length1, length2);
+ for (int i = 0; i < minLen; i++) {
+ int res = unsignedCompare(array1[i + offset1], array2[i + offset2]);
+ if (res != 0) {
+ return res;
+ }
+ }
+
+ return length1 - length2;
+ }
+
+ private static final int lexicographicCompare(byte[] array, int offset1, int
length1, ByteBuffer buffer, int offset2, int length2) {
+ if (array == null && buffer == null) return 0;
+ if (array == null || buffer == null) return array != null ? 1 : -1;
+
+ int minLen = Math.min(length1, length2);
+ for (int i = 0; i < minLen; i++) {
+ int res = unsignedCompare(array[i + offset1], buffer.get(i + offset2));
+ if (res != 0) {
+ return res;
+ }
+ }
+
+ return length1 - length2;
+ }
+
+ private static final int lexicographicCompare(ByteBuffer buffer1, int
offset1, int length1, ByteBuffer buffer2, int offset2, int length2) {
+ if (buffer1 == null && buffer2 == null) return 0;
+ if (buffer1 == null || buffer2 == null) return buffer1 != null ? 1 : -1;
+
+ int minLen = Math.min(length1, length2);
+ for (int i = 0; i < minLen; i++) {
+ int res = unsignedCompare(buffer1.get(i + offset1), buffer2.get(i +
offset2));
+ if (res != 0) {
+ return res;
+ }
+ }
+
+ return length1 - length2;
+ }
+
+ private static int unsignedCompare(byte b1, byte b2) {
+ return toUnsigned(b1) - toUnsigned(b2);
+ }
+
+ private static final int toUnsigned(byte b) {
+ return b & 0xFF;
+ }
}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
index d343b0e..a762e95 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
@@ -183,10 +183,10 @@ public abstract class PrimitiveComparator<T> implements
Comparator<T>, Serializa
private static abstract class BinaryComparator extends
PrimitiveComparator<Binary> {
@Override
int compareNotNulls(Binary o1, Binary o2) {
- return compare(o1.toByteBuffer(), o2.toByteBuffer());
+ return compareBinary(o1, o2);
}
- abstract int compare(ByteBuffer b1, ByteBuffer b2);
+ abstract int compareBinary(Binary b1, Binary b2);
final int toUnsigned(byte b) {
return b & 0xFF;
@@ -195,25 +195,8 @@ public abstract class PrimitiveComparator<T> implements
Comparator<T>, Serializa
public static final PrimitiveComparator<Binary>
UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR = new BinaryComparator() {
@Override
- int compare(ByteBuffer b1, ByteBuffer b2) {
- int l1 = b1.remaining();
- int l2 = b2.remaining();
- int p1 = b1.position();
- int p2 = b2.position();
- int minL = Math.min(l1, l2);
-
- for (int i = 0; i < minL; ++i) {
- int result = unsignedCompare(b1.get(p1 + i), b2.get(p2 + i));
- if (result != 0) {
- return result;
- }
- }
-
- return l1 - l2;
- }
-
- private int unsignedCompare(byte b1, byte b2) {
- return toUnsigned(b1) - toUnsigned(b2);
+ int compareBinary(Binary b1, Binary b2) {
+ return Binary.lexicographicCompare(b1, b2);
}
@Override
@@ -232,7 +215,11 @@ public abstract class PrimitiveComparator<T> implements
Comparator<T>, Serializa
private static final int POSITIVE_PADDING = 0;
@Override
- int compare(ByteBuffer b1, ByteBuffer b2) {
+ int compareBinary(Binary b1, Binary b2) {
+ return compare(b1.toByteBuffer(), b2.toByteBuffer());
+ }
+
+ private int compare(ByteBuffer b1, ByteBuffer b2) {
int l1 = b1.remaining();
int l2 = b2.remaining();
int p1 = b1.position();