This is an automated email from the ASF dual-hosted git repository.

xyuanlu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new 4738f6dd2 [apache/helix] -- Fixes #2638, Improve Hard Constraint 
Failure Debuggability by adding details in the error message (#2639)
4738f6dd2 is described below

commit 4738f6dd225a8b8609c7cf479c695a050e7b127e
Author: Himanshu Kandwal <[email protected]>
AuthorDate: Fri Oct 20 00:00:50 2023 -0700

    [apache/helix] -- Fixes #2638, Improve Hard Constraint Failure 
Debuggability by adding details in the error message (#2639)
    
    n this PR, we are logging the specific details of the hard constraint 
validation failures, that improvs the issue debuggability.
---
 .../constraints/FaultZoneAwareConstraint.java      | 19 +++++--
 .../waged/constraints/NodeCapacityConstraint.java  |  8 ++-
 .../NodeMaxPartitionLimitConstraint.java           | 27 ++++++++--
 .../constraints/ReplicaActivateConstraint.java     | 16 ++++--
 .../SamePartitionOnInstanceConstraint.java         | 15 +++++-
 .../waged/constraints/ValidGroupTagConstraint.java | 12 ++++-
 .../constraints/TestConstraintBasedAlgorithm.java  | 29 ++++++++++-
 .../constraints/TestFaultZoneAwareConstraint.java  |  5 +-
 ...int.java => TestReplicaActivateConstraint.java} | 59 ++++++++++------------
 9 files changed, 138 insertions(+), 52 deletions(-)

diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
index cf3e32241..e0992b5d4 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
@@ -19,21 +19,34 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  * under the License.
  */
 
+import java.util.Set;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 class FaultZoneAwareConstraint extends HardConstraint {
 
+  private static final Logger LOG = 
LoggerFactory.getLogger(FaultZoneAwareConstraint.class);
+
   @Override
   boolean isAssignmentValid(AssignableNode node, AssignableReplica replica,
       ClusterContext clusterContext) {
     if (!node.hasFaultZone()) {
       return true;
     }
-    return !clusterContext
-        .getPartitionsForResourceAndFaultZone(replica.getResourceName(), 
node.getFaultZone())
-        .contains(replica.getPartitionName());
+
+    Set<String> partitionsForResourceAndFaultZone =
+        
clusterContext.getPartitionsForResourceAndFaultZone(replica.getResourceName(), 
node.getFaultZone());
+
+    if 
(partitionsForResourceAndFaultZone.contains(replica.getPartitionName())) {
+      LOG.debug("A fault zone cannot contain more than 1 replica of same 
partition. Found replica for partition: {}",
+          replica.getPartitionName());
+      return false;
+    }
+    return true;
   }
 
   @Override
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
index f18c2ffa5..056d7f842 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
@@ -20,13 +20,17 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  */
 
 import java.util.Map;
-
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 class NodeCapacityConstraint extends HardConstraint {
 
+  private static final Logger LOG = 
LoggerFactory.getLogger(NodeCapacityConstraint.class);
+
   @Override
   boolean isAssignmentValid(AssignableNode node, AssignableReplica replica,
       ClusterContext clusterContext) {
@@ -36,6 +40,8 @@ class NodeCapacityConstraint extends HardConstraint {
     for (String key : replicaCapacity.keySet()) {
       if (nodeCapacity.containsKey(key)) {
         if (nodeCapacity.get(key) < replicaCapacity.get(key)) {
+          LOG.debug("Node has insufficient capacity for: {}. Left available: 
{}, Required: {}",
+                  key, nodeCapacity.get(key), replicaCapacity.get(key));
           return false;
         }
       }
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
index a5e29e926..deedf4aa5 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
@@ -22,18 +22,37 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 class NodeMaxPartitionLimitConstraint extends HardConstraint {
 
+  private static final Logger LOG = 
LoggerFactory.getLogger(NodeMaxPartitionLimitConstraint.class);
+
   @Override
   boolean isAssignmentValid(AssignableNode node, AssignableReplica replica,
       ClusterContext clusterContext) {
     boolean exceedMaxPartitionLimit =
         node.getMaxPartition() < 0 || node.getAssignedReplicaCount() < 
node.getMaxPartition();
-    boolean exceedResourceMaxPartitionLimit = 
replica.getResourceMaxPartitionsPerInstance() < 0
-        || 
node.getAssignedPartitionsByResource(replica.getResourceName()).size() < replica
-        .getResourceMaxPartitionsPerInstance();
-    return exceedMaxPartitionLimit && exceedResourceMaxPartitionLimit;
+
+    if (!exceedMaxPartitionLimit) {
+      LOG.debug("Cannot exceed the max number of partitions ({}) limitation on 
node. Assigned replica count: {}",
+          node.getMaxPartition(), node.getAssignedReplicaCount());
+      return false;
+    }
+
+    int resourceMaxPartitionsPerInstance = 
replica.getResourceMaxPartitionsPerInstance();
+    int assignedPartitionsByResourceSize = 
node.getAssignedPartitionsByResource(replica.getResourceName()).size();
+    boolean exceedResourceMaxPartitionLimit = resourceMaxPartitionsPerInstance 
< 0
+        || assignedPartitionsByResourceSize < resourceMaxPartitionsPerInstance;
+
+    if (!exceedResourceMaxPartitionLimit) {
+      LOG.debug("Cannot exceed the max number of partitions per resource ({}) 
limitation on node. Assigned replica count: {}",
+          resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize);
+      return false;
+    }
+    return true;
   }
 
   @Override
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
index e06fe5922..13e380a0a 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
@@ -24,14 +24,24 @@ import java.util.List;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 class ReplicaActivateConstraint extends HardConstraint {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ReplicaActivateConstraint.class);
+
   @Override
   boolean isAssignmentValid(AssignableNode node, AssignableReplica replica,
       ClusterContext clusterContext) {
-    List<String> disabledPartitions =
-        node.getDisabledPartitionsMap().get(replica.getResourceName());
-    return disabledPartitions == null || 
!disabledPartitions.contains(replica.getPartitionName());
+    List<String> disabledPartitions = 
node.getDisabledPartitionsMap().get(replica.getResourceName());
+
+    if (disabledPartitions != null && 
disabledPartitions.contains(replica.getPartitionName())) {
+      LOG.debug("Cannot assign the inactive replica: {}", 
replica.getPartitionName());
+      return false;
+    }
+    return true;
   }
 
   @Override
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
index 8f6b523b4..e6de9989c 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
@@ -19,17 +19,28 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  * under the License.
  */
 
+import java.util.Set;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 class SamePartitionOnInstanceConstraint extends HardConstraint {
 
+  private static final Logger LOG = 
LoggerFactory.getLogger(SamePartitionOnInstanceConstraint.class);
+
   @Override
   boolean isAssignmentValid(AssignableNode node, AssignableReplica replica,
       ClusterContext clusterContext) {
-    return !node.getAssignedPartitionsByResource(replica.getResourceName())
-        .contains(replica.getPartitionName());
+    Set<String> assignedPartitionsByResource = 
node.getAssignedPartitionsByResource(replica.getResourceName());
+
+    if (assignedPartitionsByResource.contains(replica.getPartitionName())) {
+      LOG.debug("Same partition ({}) of different states cannot co-exist in 
one instance", replica.getPartitionName());
+      return false;
+    }
+    return true;
   }
 
   @Override
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
index 2fd3743f3..b1e257c87 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
@@ -22,8 +22,14 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 class ValidGroupTagConstraint extends HardConstraint {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(SamePartitionOnInstanceConstraint.class);
+
   @Override
   boolean isAssignmentValid(AssignableNode node, AssignableReplica replica,
       ClusterContext clusterContext) {
@@ -31,7 +37,11 @@ class ValidGroupTagConstraint extends HardConstraint {
       return true;
     }
 
-    return 
node.getInstanceTags().contains(replica.getResourceInstanceGroupTag());
+    if 
(!node.getInstanceTags().contains(replica.getResourceInstanceGroupTag())) {
+      LOG.debug("Instance doesn't have the tag of the replica ({})", 
replica.getResourceInstanceGroupTag());
+      return false;
+    }
+    return true;
   }
 
   @Override
diff --git 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
index 3ba92ef57..0a75002f1 100644
--- 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
+++ 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
@@ -19,11 +19,14 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  * under the License.
  */
 
-import java.io.IOException;
-
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
 import org.apache.helix.HelixRebalanceException;
+import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterModel;
 import 
org.apache.helix.controller.rebalancer.waged.model.ClusterModelTestHelper;
 import org.apache.helix.controller.rebalancer.waged.model.OptimalAssignment;
@@ -118,4 +121,26 @@ public class TestConstraintBasedAlgorithm {
           "The cluster does not have enough item1 capacity for all partitions. 
 Failure Type: FAILED_TO_CALCULATE");
     }
   }
+
+  @Test
+  public void testCalculateWithInvalidAssignmentForNodeCapacity() throws 
IOException {
+    HardConstraint nodeCapacityConstraint = new NodeCapacityConstraint();
+    SoftConstraint soft1 = new MaxCapacityUsageInstanceConstraint();
+    SoftConstraint soft2 = new InstancePartitionsCountConstraint();
+    ConstraintBasedAlgorithm algorithm =
+        new ConstraintBasedAlgorithm(ImmutableList.of(nodeCapacityConstraint),
+            ImmutableMap.of(soft1, 1f, soft2, 1f));
+    ClusterModel clusterModel = new 
ClusterModelTestHelper().getMultiNodeClusterModel();
+    // increase the ask capacity of item 3, which will trigger the capacity 
constraint to fail.
+    Map<String, Set<AssignableReplica>> assignableReplicaMap = new 
HashMap<>(clusterModel.getAssignableReplicaMap());
+    Set<AssignableReplica> resourceAssignableReplicas = 
assignableReplicaMap.get("Resource3");
+    AssignableReplica replica = resourceAssignableReplicas.iterator().next();
+    replica.getCapacity().put("item3", 40); // available: 30, requested: 40.
+
+    try {
+      algorithm.calculate(clusterModel);
+    } catch (HelixRebalanceException ex) {
+      Assert.assertEquals(ex.getFailureType(), 
HelixRebalanceException.Type.FAILED_TO_CALCULATE);
+    }
+  }
 }
diff --git 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
index f5b34e3d6..2de5af93a 100644
--- 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
+++ 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
@@ -19,8 +19,7 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  * under the License.
  */
 
-import static org.mockito.Mockito.when;
-
+import com.google.common.collect.ImmutableSet;
 import java.util.Collections;
 
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
@@ -31,7 +30,7 @@ import org.testng.Assert;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import com.google.common.collect.ImmutableSet;
+import static org.mockito.Mockito.*;
 
 public class TestFaultZoneAwareConstraint {
   private static final String TEST_PARTITION = "testPartition";
diff --git 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java
similarity index 56%
copy from 
helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
copy to 
helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java
index f5b34e3d6..359398280 100644
--- 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java
+++ 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java
@@ -19,61 +19,54 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  * under the License.
  */
 
-import static org.mockito.Mockito.when;
-
+import java.util.ArrayList;
 import java.util.Collections;
-
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
 import org.mockito.Mockito;
 import org.testng.Assert;
-import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import com.google.common.collect.ImmutableSet;
+import static org.mockito.Mockito.*;
+
+
+public class TestReplicaActivateConstraint {
 
-public class TestFaultZoneAwareConstraint {
-  private static final String TEST_PARTITION = "testPartition";
-  private static final String TEST_ZONE = "testZone";
   private static final String TEST_RESOURCE = "testResource";
+  private static final String TEST_PARTITION = "testPartition";
+
   private final AssignableReplica _testReplica = 
Mockito.mock(AssignableReplica.class);
   private final AssignableNode _testNode = Mockito.mock(AssignableNode.class);
   private final ClusterContext _clusterContext = 
Mockito.mock(ClusterContext.class);
 
-  private final HardConstraint _faultZoneAwareConstraint = new 
FaultZoneAwareConstraint();
+  private final HardConstraint _faultZoneAwareConstraint = new 
ReplicaActivateConstraint();
+
+  @Test
+  public void validWhenEmptyDisabledReplicaMap() {
+    Map<String, List<String>> disabledReplicaMap = new HashMap<>();
+    disabledReplicaMap.put(TEST_RESOURCE, new ArrayList<>());
 
-  @BeforeMethod
-  public void init() {
     when(_testReplica.getResourceName()).thenReturn(TEST_RESOURCE);
     when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION);
-    when(_testNode.getFaultZone()).thenReturn(TEST_ZONE);
-  }
-
-  @Test
-  public void inValidWhenFaultZoneAlreadyAssigned() {
-    when(_testNode.hasFaultZone()).thenReturn(true);
-    when(_clusterContext.getPartitionsForResourceAndFaultZone(TEST_RESOURCE, 
TEST_ZONE)).thenReturn(
-            ImmutableSet.of(TEST_PARTITION));
+    when(_testNode.getDisabledPartitionsMap()).thenReturn(disabledReplicaMap);
 
-    Assert.assertFalse(
-        _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, 
_clusterContext));
+    Assert.assertTrue(_faultZoneAwareConstraint.isAssignmentValid(_testNode, 
_testReplica, _clusterContext));
   }
 
   @Test
-  public void validWhenEmptyAssignment() {
-    when(_testNode.hasFaultZone()).thenReturn(true);
-    when(_clusterContext.getPartitionsForResourceAndFaultZone(TEST_RESOURCE, 
TEST_ZONE)).thenReturn(Collections.emptySet());
-
-    Assert.assertTrue(
-        _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, 
_clusterContext));
-  }
+  public void invalidWhenPartitionIsDisabled() {
+    Map<String, List<String>> disabledReplicaMap = new HashMap<>();
+    disabledReplicaMap.put(TEST_RESOURCE, 
Collections.singletonList(TEST_PARTITION));
 
-  @Test
-  public void validWhenNoFaultZone() {
-    when(_testNode.hasFaultZone()).thenReturn(false);
+    when(_testReplica.getResourceName()).thenReturn(TEST_RESOURCE);
+    when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION);
+    when(_testNode.getDisabledPartitionsMap()).thenReturn(disabledReplicaMap);
 
-    Assert.assertTrue(
-        _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, 
_clusterContext));
+    Assert.assertFalse(_faultZoneAwareConstraint.isAssignmentValid(_testNode, 
_testReplica, _clusterContext));
   }
+
 }

Reply via email to