spark git commit: [SPARK-25114][CORE] Fix RecordBinaryComparator when subtraction between two words is divisible by Integer.MAX_VALUE.
Repository: spark Updated Branches: refs/heads/branch-2.3 9702bb637 -> 8bde46781 [SPARK-25114][CORE] Fix RecordBinaryComparator when subtraction between two words is divisible by Integer.MAX_VALUE. https://github.com/apache/spark/pull/22079#discussion_r209705612 It is possible for two objects to be unequal and yet we consider them as equal with this code, if the long values are separated by Int.MaxValue. This PR fixes the issue. Add new test cases in `RecordBinaryComparatorSuite`. Closes #22101 from jiangxb1987/fix-rbc. Authored-by: Xingbo Jiang Signed-off-by: Xiao Li (cherry picked from commit 4fb96e5105cec4a3eb19a2b7997600b086bac32f) Signed-off-by: Xiao Li Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8bde4678 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8bde4678 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8bde4678 Branch: refs/heads/branch-2.3 Commit: 8bde4678166f5f01837919d4f8d742b89f5e76b8 Parents: 9702bb6 Author: Xingbo Jiang Authored: Mon Aug 20 23:13:31 2018 -0700 Committer: Xiao Li Committed: Mon Aug 20 23:18:17 2018 -0700 -- .../sql/execution/RecordBinaryComparator.java | 26 +- .../sort/RecordBinaryComparatorSuite.java | 322 +++ 2 files changed, 337 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8bde4678/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java -- diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java index bb77b5b..40c2cc8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java @@ -22,12 +22,10 @@ import org.apache.spark.util.collection.unsafe.sort.RecordComparator; public final class RecordBinaryComparator extends RecordComparator { - // TODO(jiangxb) Add test suite for this. @Override public int compare( Object leftObj, long leftOff, int leftLen, Object rightObj, long rightOff, int rightLen) { int i = 0; -int res = 0; // If the arrays have different length, the longer one is larger. if (leftLen != rightLen) { @@ -40,27 +38,33 @@ public final class RecordBinaryComparator extends RecordComparator { // check if stars align and we can get both offsets to be aligned if ((leftOff % 8) == (rightOff % 8)) { while ((leftOff + i) % 8 != 0 && i < leftLen) { -res = (Platform.getByte(leftObj, leftOff + i) & 0xff) - -(Platform.getByte(rightObj, rightOff + i) & 0xff); -if (res != 0) return res; +final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff; +final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff; +if (v1 != v2) { + return v1 > v2 ? 1 : -1; +} i += 1; } } // for architectures that support unaligned accesses, chew it up 8 bytes at a time if (Platform.unaligned() || (((leftOff + i) % 8 == 0) && ((rightOff + i) % 8 == 0))) { while (i <= leftLen - 8) { -res = (int) ((Platform.getLong(leftObj, leftOff + i) - -Platform.getLong(rightObj, rightOff + i)) % Integer.MAX_VALUE); -if (res != 0) return res; +final long v1 = Platform.getLong(leftObj, leftOff + i); +final long v2 = Platform.getLong(rightObj, rightOff + i); +if (v1 != v2) { + return v1 > v2 ? 1 : -1; +} i += 8; } } // this will finish off the unaligned comparisons, or do the entire aligned comparison // whichever is needed. while (i < leftLen) { - res = (Platform.getByte(leftObj, leftOff + i) & 0xff) - - (Platform.getByte(rightObj, rightOff + i) & 0xff); - if (res != 0) return res; + final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff; + final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff; + if (v1 != v2) { +return v1 > v2 ? 1 : -1; + } i += 1; } http://git-wip-us.apache.org/repos/asf/spark/blob/8bde4678/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java -- diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java new file mode 100644 index 000..97f3dc5 --- /dev/null +++ b/sql/core/src/test/java/test/org/apache/spar
spark git commit: [SPARK-25114][CORE] Fix RecordBinaryComparator when subtraction between two words is divisible by Integer.MAX_VALUE.
Repository: spark Updated Branches: refs/heads/master f984ec75e -> 4fb96e510 [SPARK-25114][CORE] Fix RecordBinaryComparator when subtraction between two words is divisible by Integer.MAX_VALUE. ## What changes were proposed in this pull request? https://github.com/apache/spark/pull/22079#discussion_r209705612 It is possible for two objects to be unequal and yet we consider them as equal with this code, if the long values are separated by Int.MaxValue. This PR fixes the issue. ## How was this patch tested? Add new test cases in `RecordBinaryComparatorSuite`. Closes #22101 from jiangxb1987/fix-rbc. Authored-by: Xingbo Jiang Signed-off-by: Xiao Li Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4fb96e51 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4fb96e51 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4fb96e51 Branch: refs/heads/master Commit: 4fb96e5105cec4a3eb19a2b7997600b086bac32f Parents: f984ec7 Author: Xingbo Jiang Authored: Mon Aug 20 23:13:31 2018 -0700 Committer: Xiao Li Committed: Mon Aug 20 23:13:31 2018 -0700 -- .../sql/execution/RecordBinaryComparator.java | 26 .../sort/RecordBinaryComparatorSuite.java | 66 2 files changed, 81 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4fb96e51/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java -- diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java index bb77b5b..40c2cc8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java @@ -22,12 +22,10 @@ import org.apache.spark.util.collection.unsafe.sort.RecordComparator; public final class RecordBinaryComparator extends RecordComparator { - // TODO(jiangxb) Add test suite for this. @Override public int compare( Object leftObj, long leftOff, int leftLen, Object rightObj, long rightOff, int rightLen) { int i = 0; -int res = 0; // If the arrays have different length, the longer one is larger. if (leftLen != rightLen) { @@ -40,27 +38,33 @@ public final class RecordBinaryComparator extends RecordComparator { // check if stars align and we can get both offsets to be aligned if ((leftOff % 8) == (rightOff % 8)) { while ((leftOff + i) % 8 != 0 && i < leftLen) { -res = (Platform.getByte(leftObj, leftOff + i) & 0xff) - -(Platform.getByte(rightObj, rightOff + i) & 0xff); -if (res != 0) return res; +final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff; +final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff; +if (v1 != v2) { + return v1 > v2 ? 1 : -1; +} i += 1; } } // for architectures that support unaligned accesses, chew it up 8 bytes at a time if (Platform.unaligned() || (((leftOff + i) % 8 == 0) && ((rightOff + i) % 8 == 0))) { while (i <= leftLen - 8) { -res = (int) ((Platform.getLong(leftObj, leftOff + i) - -Platform.getLong(rightObj, rightOff + i)) % Integer.MAX_VALUE); -if (res != 0) return res; +final long v1 = Platform.getLong(leftObj, leftOff + i); +final long v2 = Platform.getLong(rightObj, rightOff + i); +if (v1 != v2) { + return v1 > v2 ? 1 : -1; +} i += 8; } } // this will finish off the unaligned comparisons, or do the entire aligned comparison // whichever is needed. while (i < leftLen) { - res = (Platform.getByte(leftObj, leftOff + i) & 0xff) - - (Platform.getByte(rightObj, rightOff + i) & 0xff); - if (res != 0) return res; + final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff; + final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff; + if (v1 != v2) { +return v1 > v2 ? 1 : -1; + } i += 1; } http://git-wip-us.apache.org/repos/asf/spark/blob/4fb96e51/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java -- diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java index a19ddbd..97f3dc5 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparato