This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new 6a72c2b0a8 AVRO-4210: Fix BinaryData.compareBytes to treat bytes as
unsigned (#3537)
6a72c2b0a8 is described below
commit 6a72c2b0a8c2bc2050e3862f65f55026dde30355
Author: Jason <[email protected]>
AuthorDate: Sun Nov 23 17:11:17 2025 +0200
AVRO-4210: Fix BinaryData.compareBytes to treat bytes as unsigned (#3537)
* Fixed BinaryData.compareBytes to treat bytes as unsigned
This is a regression fix introudeced in 1.12.1
added unit test
* Re-use the DirectBytes.compareBytes method to ensure consistency
This returns the comparision to be unsigned like it was in 1.12.0
This reverts commit 005ee8069dd69169e5c1842f0849d90373910b18.
---
.../java/org/apache/avro/generic/GenericData.java | 2 +-
.../main/java/org/apache/avro/io/BinaryData.java | 10 +++----
.../java/org/apache/avro/reflect/ReflectData.java | 3 +-
.../org/apache/avro/specific/SpecificFixed.java | 3 +-
.../src/main/java/org/apache/avro/util/Utf8.java | 3 +-
.../java/org/apache/avro/io/TestBinaryData.java | 35 ++++++++++++++++++++--
6 files changed, 44 insertions(+), 12 deletions(-)
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
index f327851e5d..0098d8da5e 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
@@ -546,7 +546,7 @@ public class GenericData {
@Override
public int compareTo(Fixed that) {
- return Arrays.compare(this.bytes, 0, this.bytes.length, that.bytes, 0,
that.bytes.length);
+ return BinaryData.compareBytes(this.bytes, 0, this.bytes.length,
that.bytes, 0, that.bytes.length);
}
}
diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
index 9ce68e9f4c..0df469d495 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
@@ -156,8 +156,7 @@ public class BinaryData {
}
case FIXED: {
int size = schema.getFixedSize();
- int c = Arrays.compare(d.d1.getBuf(), d.d1.getPos(), d.d1.getPos() +
size, d.d2.getBuf(), d.d2.getPos(),
- d.d2.getPos() + size);
+ int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), size, d.d2.getBuf(),
d.d2.getPos(), size);
d.d1.skipFixed(size);
d.d2.skipFixed(size);
return c;
@@ -166,8 +165,7 @@ public class BinaryData {
case BYTES: {
int l1 = d1.readInt();
int l2 = d2.readInt();
- int c = Arrays.compare(d.d1.getBuf(), d.d1.getPos(), d.d1.getPos() + l1,
d.d2.getBuf(), d.d2.getPos(),
- d.d2.getPos() + l2);
+ int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), l1, d.d2.getBuf(),
d.d2.getPos(), l2);
d.d1.skipFixed(l1);
d.d2.skipFixed(l2);
return c;
@@ -181,10 +179,10 @@ public class BinaryData {
/**
* Lexicographically compare bytes. If equal, return zero. If greater-than,
- * return a positive value, if less than return a negative value.
+ * return a positive value, if less than, return a negative value.
*/
public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2,
int l2) {
- return Arrays.compare(b1, s1, s1 + l1, b2, s2, s2 + l2);
+ return Arrays.compareUnsigned(b1, s1, s1 + l1, b2, s2, s2 + l2);
}
private static class HashData {
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
index 916c15efbf..1810b27e06 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
@@ -30,6 +30,7 @@ import org.apache.avro.generic.GenericContainer;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.IndexedRecord;
+import org.apache.avro.io.BinaryData;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.FixedSize;
@@ -995,7 +996,7 @@ public class ReflectData extends SpecificData {
break;
byte[] b1 = (byte[]) o1;
byte[] b2 = (byte[]) o2;
- return Arrays.compare(b1, 0, b1.length, b2, 0, b2.length);
+ return BinaryData.compareBytes(b1, 0, b1.length, b2, 0, b2.length);
}
return super.compare(o1, o2, s, equals);
}
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java
b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java
index 98a553eecb..9984acb568 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericFixed;
+import org.apache.avro.io.BinaryData;
/** Base class for generated fixed-sized data classes. */
public abstract class SpecificFixed implements GenericFixed,
Comparable<SpecificFixed>, Externalizable {
@@ -69,7 +70,7 @@ public abstract class SpecificFixed implements GenericFixed,
Comparable<Specific
@Override
public int compareTo(SpecificFixed that) {
- return Arrays.compare(this.bytes, 0, this.bytes.length, that.bytes, 0,
that.bytes.length);
+ return BinaryData.compareBytes(this.bytes, 0, this.bytes.length,
that.bytes, 0, that.bytes.length);
}
@Override
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 5e5af1a111..a5c4ece29d 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.avro.SystemLimitException;
+import org.apache.avro.io.BinaryData;
/**
* A Utf8 string. Unlike {@link String}, instances are mutable. This is more
@@ -195,7 +196,7 @@ public class Utf8 implements Comparable<Utf8>,
CharSequence, Externalizable {
@Override
public int compareTo(Utf8 that) {
- return Arrays.compare(this.bytes, 0, this.length, that.bytes, 0,
that.length);
+ return BinaryData.compareBytes(this.bytes, 0, this.length, that.bytes, 0,
that.length);
}
// CharSequence implementation
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java
b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java
index 167cd72463..595f8c3128 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java
@@ -18,10 +18,11 @@
package org.apache.avro.io;
+import org.junit.jupiter.api.Test;
+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestBinaryData {
@@ -60,4 +61,34 @@ public class TestBinaryData {
BinaryData.encodeLong(Integer.MIN_VALUE, longResult, 0);
assertArrayEquals(intResult, longResult);
}
+
+ @Test
+ void testCompareBytesUnsigned() {
+ // Test case: byte value 0xFF (-1 as signed, 255 as unsigned)
+ // should be greater than 0x7F (127)
+ byte[] b1 = new byte[] { (byte) 0xFF };
+ byte[] b2 = new byte[] { (byte) 0x7F };
+ int result = BinaryData.compareBytes(b1, 0, 1, b2, 0, 1);
+ assertTrue(result > 0, "0xFF (255 unsigned) should be greater than 0x7F
(127)");
+ result = BinaryData.compareBytes(b2, 0, 1, b1, 0, 1);
+ assertTrue(result < 0, "0x7F (127) should be less than 0xFF (255
unsigned)");
+ result = BinaryData.compareBytes(b1, 0, 1, b1, 0, 1);
+ assertEquals(0, result, "Equal byte arrays should return 0");
+
+ // Test with multiple bytes: {0x00, 0xFF} vs {0x00, 0x7F}
+ byte[] b3 = new byte[] { 0x00, (byte) 0xFF };
+ byte[] b4 = new byte[] { 0x00, (byte) 0x7F };
+ byte[] b5 = new byte[] { (byte) 0xFF, 0x00 };
+ byte[] b6 = new byte[] { (byte) 0x7F, 0x00 };
+ result = BinaryData.compareBytes(b3, 0, 2, b4, 0, 2);
+ assertTrue(result > 1, "{0x00, 0xFF} should be greater than {0x00, 0x7F}");
+ result = BinaryData.compareBytes(b5, 0, 2, b6, 0, 2);
+ assertTrue(result > 1, "{0xFF, 0x00} should be greater than {0x7F, 0x00}");
+
+ // Test with negative byte values: -1 (0xFF) should be greater than -128
(0x80)
+ byte[] b7 = new byte[] { (byte) -1 };
+ byte[] b8 = new byte[] { (byte) -128 };
+ result = BinaryData.compareBytes(b7, 0, 1, b8, 0, 1);
+ assertTrue(result > 0, "-1 (0xFF=255 unsigned) should be greater than -128
(0x80=128 unsigned)");
+ }
}