This is an automated email from the ASF dual-hosted git repository.
dmollitor pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new 1a2d200d3 AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3177)
1a2d200d3 is described below
commit 1a2d200d37236ed520f9e5e7fb03fd9b20bb51f8
Author: belugabehr <[email protected]>
AuthorDate: Fri Sep 27 13:33:13 2024 -0400
AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3177)
---
.../main/java/org/apache/avro/io/BinaryData.java | 2 +-
.../src/main/java/org/apache/avro/util/Utf8.java | 4 ++-
.../test/java/org/apache/avro/util/TestUtf8.java | 37 +++++++++++-----------
3 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
index e6fc7086e..b6126ec23 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
@@ -257,7 +257,7 @@ public class BinaryData {
case FIXED:
return hashBytes(1, data, schema.getFixedSize(), false);
case STRING:
- return hashBytes(0, data, decoder.readInt(), false);
+ return hashBytes(1, data, decoder.readInt(), false);
case BYTES:
return hashBytes(1, data, decoder.readInt(), true);
case NULL:
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index b38d237f2..ae4df8e5c 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -41,7 +41,8 @@ public class Utf8 implements Comparable<Utf8>, CharSequence,
Externalizable {
private String string;
public Utf8() {
- bytes = EMPTY;
+ this.bytes = EMPTY;
+ this.hash = 1;
}
public Utf8(String string) {
@@ -174,6 +175,7 @@ public class Utf8 implements Comparable<Utf8>,
CharSequence, Externalizable {
if (h == 0) {
byte[] bytes = this.bytes;
int length = this.length;
+ h = 1;
for (int i = 0; i < length; i++) {
h = h * 31 + bytes[i];
}
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index e0977ff9f..91618ca5e 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -59,43 +59,44 @@ public class TestUtf8 {
@Test
void hashCodeReused() {
- assertEquals(97, new Utf8("a").hashCode());
- assertEquals(3904, new Utf8("zz").hashCode());
- assertEquals(122, new Utf8("z").hashCode());
- assertEquals(99162322, new Utf8("hello").hashCode());
- assertEquals(3198781, new Utf8("hell").hashCode());
+ assertEquals(1, new Utf8().hashCode());
+ assertEquals(128, new Utf8("a").hashCode());
+ assertEquals(4865, new Utf8("zz").hashCode());
+ assertEquals(153, new Utf8("z").hashCode());
+ assertEquals(127791473, new Utf8("hello").hashCode());
+ assertEquals(4122302, new Utf8("hell").hashCode());
Utf8 u = new Utf8("a");
- assertEquals(97, u.hashCode());
- assertEquals(97, u.hashCode());
+ assertEquals(128, u.hashCode());
+ assertEquals(128, u.hashCode());
u.set("a");
- assertEquals(97, u.hashCode());
+ assertEquals(128, u.hashCode());
u.setByteLength(1);
- assertEquals(97, u.hashCode());
+ assertEquals(128, u.hashCode());
u.setByteLength(2);
- assertNotEquals(97, u.hashCode());
+ assertNotEquals(128, u.hashCode());
u.set("zz");
- assertEquals(3904, u.hashCode());
+ assertEquals(4865, u.hashCode());
u.setByteLength(1);
- assertEquals(122, u.hashCode());
+ assertEquals(153, u.hashCode());
u.set("hello");
- assertEquals(99162322, u.hashCode());
+ assertEquals(127791473, u.hashCode());
u.setByteLength(4);
- assertEquals(3198781, u.hashCode());
+ assertEquals(4122302, u.hashCode());
u.set(new Utf8("zz"));
- assertEquals(3904, u.hashCode());
+ assertEquals(4865, u.hashCode());
u.setByteLength(1);
- assertEquals(122, u.hashCode());
+ assertEquals(153, u.hashCode());
u.set(new Utf8("hello"));
- assertEquals(99162322, u.hashCode());
+ assertEquals(127791473, u.hashCode());
u.setByteLength(4);
- assertEquals(3198781, u.hashCode());
+ assertEquals(4122302, u.hashCode());
}
@Test