jiajunwang commented on a change in pull request #1037: URL: https://github.com/apache/helix/pull/1037#discussion_r434985817
########## File path: helix-core/src/main/java/org/apache/helix/controller/rebalancer/constraint/ExcessiveTopStateResolver.java ########## @@ -0,0 +1,120 @@ +package org.apache.helix.controller.rebalancer.constraint; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.helix.api.rebalancer.constraint.AbnormalStateResolver; +import org.apache.helix.controller.stages.CurrentStateOutput; +import org.apache.helix.model.Partition; +import org.apache.helix.model.StateModelDefinition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The abnormal state resolver that gracefully fixes the abnormality of excessive top states for + * single-topstate state model. For example, two replcias of a MasterSlave partition are assigned + * with the Master state at the same time. This could be caused by a network partitioning or the + * other unexpected issues. + * + * The resolver checks for the abnormality and computes recovery assignment which triggers the + * rebalancer to eventually reset all the top state replias for once. After the resets, only one + * replica will be assigned the top state. + * + * Note that without using this resolver, the regular Helix rebalance pipeline also removes the + * excessive top state replicas. However, the default logic does not force resetting ALL the top + * state replicas. Since the multiple top states situation may break application data, the default + * resolution won't be enough to fix the potential problem. + */ +public class ExcessiveTopStateResolver implements AbnormalStateResolver { + private static final Logger LOG = LoggerFactory.getLogger(ExcessiveTopStateResolver.class); + + /** + * The current states are not valid if there are more than 2 top state replicas for a single top + * state state model. + */ + @Override + public boolean checkCurrentStates(final CurrentStateOutput currentStateOutput, + final String resourceName, final Partition partition, StateModelDefinition stateModelDef) { + if (!stateModelDef.isSingleTopStateModel()) { Review comment: This is controlled by the cluster config, if you still remember my previous PR https://github.com/apache/helix/pull/1028 The assumption is that in a certain cluster, one state model has only one resolver configured. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
