This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/main by this push:
     new 175ed4c6c2 Fix & speed up ByteArrayHashIndex #5972 (#6053)
175ed4c6c2 is described below

commit 175ed4c6c266719d784cfb7ce5164181b6df138e
Author: Matteo <[email protected]>
AuthorDate: Thu Nov 27 12:46:17 2025 +0100

    Fix & speed up ByteArrayHashIndex #5972 (#6053)
---
 .../apache/hop/core/hash/ByteArrayHashIndex.java   | 86 +++++++++++-----------
 .../hop/core/hash/ByteArrayHashIndexTest.java      |  3 +
 2 files changed, 46 insertions(+), 43 deletions(-)

diff --git 
a/core/src/main/java/org/apache/hop/core/hash/ByteArrayHashIndex.java 
b/core/src/main/java/org/apache/hop/core/hash/ByteArrayHashIndex.java
index 5bb6c70a49..a8811043f0 100644
--- a/core/src/main/java/org/apache/hop/core/hash/ByteArrayHashIndex.java
+++ b/core/src/main/java/org/apache/hop/core/hash/ByteArrayHashIndex.java
@@ -28,7 +28,7 @@ public class ByteArrayHashIndex {
 
   private IRowMeta keyRowMeta;
   private ByteArrayHashIndexEntry[] index;
-  private int size;
+  private int count;
   private int resizeThresHold;
 
   /**
@@ -45,7 +45,7 @@ public class ByteArrayHashIndex {
       factor2Size <<= 1; // Multiply by 2
     }
 
-    this.size = factor2Size;
+    this.count = 0;
     this.resizeThresHold = (int) (factor2Size * STANDARD_LOAD_FACTOR);
 
     index = new ByteArrayHashIndexEntry[factor2Size];
@@ -56,11 +56,15 @@ public class ByteArrayHashIndex {
   }
 
   public int getSize() {
-    return size;
+    return index.length;
+  }
+
+  public int getCount() {
+    return count;
   }
 
   public boolean isEmpty() {
-    return size == 0;
+    return count == 0;
   }
 
   public byte[] get(byte[] key) throws HopValueException {
@@ -70,7 +74,7 @@ public class ByteArrayHashIndex {
     ByteArrayHashIndexEntry check = index[indexPointer];
 
     while (check != null) {
-      if (check.hashCode == hashCode && check.equalsKey(key)) {
+      if (check.hashCode == hashCode && equalsByteArray(check.key, key)) {
         return check.value;
       }
       check = check.nextEntry;
@@ -78,41 +82,54 @@ public class ByteArrayHashIndex {
     return null;
   }
 
+  public static final boolean equalsByteArray(byte[] value, byte[] cmpValue) {
+    if (value.length != cmpValue.length) {
+      return false;
+    }
+    for (int i = value.length - 1; i >= 0; i--) {
+      if (value[i] != cmpValue[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   public void put(byte[] key, byte[] value) throws HopValueException {
     int hashCode = generateHashCode(key, keyRowMeta);
     int indexPointer = hashCode & (index.length - 1);
 
-    // First see if there is an entry on that pointer...
+    // If home is empty, place entry there and done
     //
-    boolean searchEmptySpot = false;
-
     ByteArrayHashIndexEntry check = index[indexPointer];
-    ByteArrayHashIndexEntry previousCheck = null;
-
-    while (check != null) {
-      searchEmptySpot = true;
+    if (check == null) {
+      index[indexPointer] = new ByteArrayHashIndexEntry(hashCode, key, value, 
index[indexPointer]);
+      return;
+    }
 
+    ByteArrayHashIndexEntry previousCheck = null;
+    do {
       // If there is an identical entry in there, we replace the value.
       // And then we just return...
       //
-      if (check.hashCode == hashCode && check.equalsKey(key)) {
+      if (check.hashCode == hashCode && equalsByteArray(check.key, key)) {
         check.value = value;
         return;
       }
       previousCheck = check;
       check = check.nextEntry;
-    }
+    } while (check != null);
 
     // If we are still here, that means that we are ready to put the value 
down...
     // Where do we need to search for an empty spot in the index?
     //
-    while (searchEmptySpot) {
+    int len = index.length;
+    while (true) {
       indexPointer++;
-      if (indexPointer >= size) {
+      if (indexPointer >= len) {
         indexPointer = 0;
       }
       if (index[indexPointer] == null) {
-        searchEmptySpot = false;
+        break;
       }
     }
 
@@ -134,11 +151,11 @@ public class ByteArrayHashIndex {
   private final void resize() {
     // Increase the size of the index...
     //
-    size++;
+    count++;
 
     // See if we've reached our resize threshold...
     //
-    if (size >= resizeThresHold) {
+    if (count >= resizeThresHold) {
 
       ByteArrayHashIndexEntry[] oldIndex = index;
 
@@ -148,6 +165,7 @@ public class ByteArrayHashIndex {
       int newSize = 2 * index.length;
 
       ByteArrayHashIndexEntry[] newIndex = new 
ByteArrayHashIndexEntry[newSize];
+      int mask = newSize - 1;
 
       // Loop over the old index and re-distribute the entries
       // We want to make sure that the calculation
@@ -156,6 +174,7 @@ public class ByteArrayHashIndex {
       //
       for (int i = 0; i < oldIndex.length; i++) {
         ByteArrayHashIndexEntry entry = oldIndex[i];
+
         if (entry != null) {
           oldIndex[i] = null;
           entry.nextEntry = null; // we assume there is plenty of room in the 
new index...
@@ -163,7 +182,7 @@ public class ByteArrayHashIndex {
           // Make sure we follow all the linked entries...
           // TODO This is a lot of extra work, see how we can avoid it!
           //
-          int newIndexPointer = entry.hashCode & (newSize - 1);
+          int newIndexPointer = entry.hashCode & mask;
 
           // Make sure on this new index pointer, we have room to put the entry
           //
@@ -176,10 +195,11 @@ public class ByteArrayHashIndex {
             // No, we need to look for a nice spot to put the hash entry...
             //
             ByteArrayHashIndexEntry previousCheck = null;
-            while (check != null) {
+            do {
               previousCheck = check;
               check = check.nextEntry;
-            }
+            } while (check != null);
+
             while (newIndex[newIndexPointer] != null) {
               newIndexPointer++;
               if (newIndexPointer >= newSize) {
@@ -231,10 +251,6 @@ public class ByteArrayHashIndex {
       this.nextEntry = nextEntry;
     }
 
-    public boolean equalsKey(byte[] cmpKey) {
-      return equalsByteArray(key, cmpKey);
-    }
-
     /**
      * The row is the same if the value is the same The data types are the 
same so no error is made
      * here.
@@ -243,23 +259,7 @@ public class ByteArrayHashIndex {
     public boolean equals(Object obj) {
       ByteArrayHashIndexEntry e = (ByteArrayHashIndexEntry) obj;
 
-      return equalsValue(e.value);
-    }
-
-    public boolean equalsValue(byte[] cmpValue) {
-      return equalsByteArray(value, cmpValue);
-    }
-
-    public static final boolean equalsByteArray(byte[] value, byte[] cmpValue) 
{
-      if (value.length != cmpValue.length) {
-        return false;
-      }
-      for (int i = value.length - 1; i >= 0; i--) {
-        if (value[i] != cmpValue[i]) {
-          return false;
-        }
-      }
-      return true;
+      return equalsByteArray(e.value, value);
     }
   }
 }
diff --git 
a/core/src/test/java/org/apache/hop/core/hash/ByteArrayHashIndexTest.java 
b/core/src/test/java/org/apache/hop/core/hash/ByteArrayHashIndexTest.java
index 71f00f98cc..90d65ad353 100644
--- a/core/src/test/java/org/apache/hop/core/hash/ByteArrayHashIndexTest.java
+++ b/core/src/test/java/org/apache/hop/core/hash/ByteArrayHashIndexTest.java
@@ -44,6 +44,9 @@ public class ByteArrayHashIndexTest {
 
     obj = new ByteArrayHashIndex(new RowMeta(), 99);
     assertEquals(128, obj.getSize());
+
+    obj = new ByteArrayHashIndex(new RowMeta(), 9);
+    assertEquals(0, obj.getCount());
   }
 
   @Test

Reply via email to