This is an automated email from the ASF dual-hosted git repository.

dmollitor pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new 1a2d200d3 AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3177)
1a2d200d3 is described below

commit 1a2d200d37236ed520f9e5e7fb03fd9b20bb51f8
Author: belugabehr <[email protected]>
AuthorDate: Fri Sep 27 13:33:13 2024 -0400

    AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3177)
---
 .../main/java/org/apache/avro/io/BinaryData.java   |  2 +-
 .../src/main/java/org/apache/avro/util/Utf8.java   |  4 ++-
 .../test/java/org/apache/avro/util/TestUtf8.java   | 37 +++++++++++-----------
 3 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java 
b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
index e6fc7086e..b6126ec23 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
@@ -257,7 +257,7 @@ public class BinaryData {
     case FIXED:
       return hashBytes(1, data, schema.getFixedSize(), false);
     case STRING:
-      return hashBytes(0, data, decoder.readInt(), false);
+      return hashBytes(1, data, decoder.readInt(), false);
     case BYTES:
       return hashBytes(1, data, decoder.readInt(), true);
     case NULL:
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java 
b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index b38d237f2..ae4df8e5c 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -41,7 +41,8 @@ public class Utf8 implements Comparable<Utf8>, CharSequence, 
Externalizable {
   private String string;
 
   public Utf8() {
-    bytes = EMPTY;
+    this.bytes = EMPTY;
+    this.hash = 1;
   }
 
   public Utf8(String string) {
@@ -174,6 +175,7 @@ public class Utf8 implements Comparable<Utf8>, 
CharSequence, Externalizable {
     if (h == 0) {
       byte[] bytes = this.bytes;
       int length = this.length;
+      h = 1;
       for (int i = 0; i < length; i++) {
         h = h * 31 + bytes[i];
       }
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java 
b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index e0977ff9f..91618ca5e 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -59,43 +59,44 @@ public class TestUtf8 {
 
   @Test
   void hashCodeReused() {
-    assertEquals(97, new Utf8("a").hashCode());
-    assertEquals(3904, new Utf8("zz").hashCode());
-    assertEquals(122, new Utf8("z").hashCode());
-    assertEquals(99162322, new Utf8("hello").hashCode());
-    assertEquals(3198781, new Utf8("hell").hashCode());
+    assertEquals(1, new Utf8().hashCode());
+    assertEquals(128, new Utf8("a").hashCode());
+    assertEquals(4865, new Utf8("zz").hashCode());
+    assertEquals(153, new Utf8("z").hashCode());
+    assertEquals(127791473, new Utf8("hello").hashCode());
+    assertEquals(4122302, new Utf8("hell").hashCode());
 
     Utf8 u = new Utf8("a");
-    assertEquals(97, u.hashCode());
-    assertEquals(97, u.hashCode());
+    assertEquals(128, u.hashCode());
+    assertEquals(128, u.hashCode());
 
     u.set("a");
-    assertEquals(97, u.hashCode());
+    assertEquals(128, u.hashCode());
 
     u.setByteLength(1);
-    assertEquals(97, u.hashCode());
+    assertEquals(128, u.hashCode());
     u.setByteLength(2);
-    assertNotEquals(97, u.hashCode());
+    assertNotEquals(128, u.hashCode());
 
     u.set("zz");
-    assertEquals(3904, u.hashCode());
+    assertEquals(4865, u.hashCode());
     u.setByteLength(1);
-    assertEquals(122, u.hashCode());
+    assertEquals(153, u.hashCode());
 
     u.set("hello");
-    assertEquals(99162322, u.hashCode());
+    assertEquals(127791473, u.hashCode());
     u.setByteLength(4);
-    assertEquals(3198781, u.hashCode());
+    assertEquals(4122302, u.hashCode());
 
     u.set(new Utf8("zz"));
-    assertEquals(3904, u.hashCode());
+    assertEquals(4865, u.hashCode());
     u.setByteLength(1);
-    assertEquals(122, u.hashCode());
+    assertEquals(153, u.hashCode());
 
     u.set(new Utf8("hello"));
-    assertEquals(99162322, u.hashCode());
+    assertEquals(127791473, u.hashCode());
     u.setByteLength(4);
-    assertEquals(3198781, u.hashCode());
+    assertEquals(4122302, u.hashCode());
   }
 
   @Test

Reply via email to