This is an automated email from the ASF dual-hosted git repository.

jackylee-ch pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 54bac26b9c [GLUTEN-12350][CORE] Place ConsistentHash virtual nodes by 
Node.key() instead of toString() (#12351)
54bac26b9c is described below

commit 54bac26b9cdd535fff95df6590189d9da7576766
Author: YangJie <[email protected]>
AuthorDate: Fri Jun 26 16:48:12 2026 +0800

    [GLUTEN-12350][CORE] Place ConsistentHash virtual nodes by Node.key() 
instead of toString() (#12351)
---
 .../org/apache/gluten/hash/ConsistentHash.java     | 62 ++++++++++++++--------
 .../org/apache/gluten/hash/ConsistentHashTest.java | 48 +++++++++++++++++
 2 files changed, 88 insertions(+), 22 deletions(-)

diff --git 
a/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java 
b/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
index e3edb34451..b432049645 100644
--- a/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
+++ b/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
@@ -199,28 +199,34 @@ public class ConsistentHash<T extends 
ConsistentHash.Node> {
   }
 
   private boolean add(T node) {
-    boolean added = false;
-    if (node != null && !nodes.containsKey(node)) {
-      Set<Partition<T>> partitions =
-          IntStream.range(0, replicate)
-              .mapToObj(idx -> new Partition<T>(node, idx))
-              .collect(Collectors.toSet());
-      nodes.put(node, partitions);
-
-      // allocate slot.
-      for (Partition<T> partition : partitions) {
-        long slot;
-        int seed = 0;
-        do {
-          slot = this.hasher.hash(partition.getPartitionKey(), seed++);
-        } while (ring.containsKey(slot));
-
-        partition.setSlot(slot);
-        ring.put(slot, partition);
-      }
-      added = true;
+    if (node == null) {
+      return false;
+    }
+    // Validate the key before any map lookup: nodes.containsKey() invokes the 
node's
+    // hashCode()/equals(), which may themselves dereference key() and NPE on 
a null key, masking
+    // the intended fail-fast here.
+    Preconditions.checkArgument(node.key() != null, "Node key must not be 
null: %s", node);
+    if (nodes.containsKey(node)) {
+      return false;
     }
-    return added;
+    Set<Partition<T>> partitions =
+        IntStream.range(0, replicate)
+            .mapToObj(idx -> new Partition<T>(node, idx))
+            .collect(Collectors.toSet());
+    nodes.put(node, partitions);
+
+    // allocate slot.
+    for (Partition<T> partition : partitions) {
+      long slot;
+      int seed = 0;
+      do {
+        slot = this.hasher.hash(partition.getPartitionKey(), seed++);
+      } while (ring.containsKey(slot));
+
+      partition.setSlot(slot);
+      ring.put(slot, partition);
+    }
+    return true;
   }
 
   public static class Partition<T extends ConsistentHash.Node> {
@@ -228,15 +234,21 @@ public class ConsistentHash<T extends 
ConsistentHash.Node> {
 
     private final int index;
 
+    // Hash the node by its logical key() so the ring is stable and 
reproducible across JVMs (avoid
+    // node.toString(), which may fall back to the identity hash). Computed 
once: the key is
+    // immutable and re-read on every collision-retry in add().
+    private final String partitionKey;
+
     private long slot;
 
     public Partition(T node, int index) {
       this.node = node;
       this.index = index;
+      this.partitionKey = node.key() + ":" + index;
     }
 
     public String getPartitionKey() {
-      return String.format("%s:%d", node, index);
+      return partitionKey;
     }
 
     public T getNode() {
@@ -254,6 +266,12 @@ public class ConsistentHash<T extends ConsistentHash.Node> 
{
 
   /** Base interface for the node in the ring. */
   public interface Node {
+    /**
+     * A stable, non-null identifier for this node. The ring hashes each 
virtual node by this key,
+     * so it MUST be deterministic across JVMs and process restarts (e.g. 
derived from a host or
+     * executor id) and consistent with {@link Object#equals}: two equal nodes 
must return the same
+     * key. Returning identity- or {@code toString()}-based values breaks ring 
stability.
+     */
     String key();
   }
 
diff --git 
a/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java 
b/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
index 7b27594202..875fc46082 100644
--- a/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
+++ b/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
@@ -20,6 +20,8 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -115,6 +117,52 @@ public class ConsistentHashTest {
     }
   }
 
+  @Test
+  public void testRingHashesOnNodeKeyNotToString() {
+    // A node whose key() differs from toString(). The ring must hash 
partitions by key() so the
+    // distribution is stable/reproducible; relying on toString() would 
silently break any Node
+    // that overrides only the contractual key().
+    final List<String> hashedKeys = new ArrayList<>();
+    ConsistentHash.Hasher recordingHasher =
+        (key, seed) -> {
+          hashedKeys.add(key);
+          return key.hashCode() + seed;
+        };
+    ConsistentHash<ConsistentHash.Node> ring = new ConsistentHash<>(REPLICAS, 
recordingHasher);
+
+    ConsistentHash.Node node =
+        new ConsistentHash.Node() {
+          @Override
+          public String key() {
+            return "logical-key";
+          }
+
+          @Override
+          public String toString() {
+            return "DISPLAY-ONLY";
+          }
+        };
+    ring.addNode(node);
+
+    // Every partition key fed to the hasher must derive from key(), never 
from toString().
+    Assert.assertEquals(REPLICAS, hashedKeys.size());
+    Assert.assertTrue(hashedKeys.stream().allMatch(k -> 
k.startsWith("logical-key:")));
+    Assert.assertTrue(hashedKeys.stream().noneMatch(k -> 
k.contains("DISPLAY-ONLY")));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testAddNodeRejectsNullKey() {
+    // The ring hashes on key(); a null key would silently collapse distinct 
nodes onto identical
+    // hash inputs, so it must fail fast.
+    consistentHash.addNode(
+        new ConsistentHash.Node() {
+          @Override
+          public String key() {
+            return null;
+          }
+        });
+  }
+
   private static class HostNode implements ConsistentHash.Node {
     private final String host;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to