mcvsubbu commented on a change in pull request #4553: Refactor
ControllerLeaderLocator
URL: https://github.com/apache/incubator-pinot/pull/4553#discussion_r324286616
##########
File path:
pinot-core/src/main/java/org/apache/pinot/server/realtime/ControllerLeaderLocator.java
##########
@@ -80,45 +84,111 @@ public static ControllerLeaderLocator getInstance() {
/**
* Locates the controller leader so that we can send LLC segment completion
requests to it.
- * Checks the {@link
ControllerLeaderLocator::_cachedControllerLeaderInvalid} flag and fetches the
leader from helix if cached value is invalid
+ * Checks the {@link
ControllerLeaderLocator::_cachedControllerLeaderInvalid} flag and fetches the
leaders to {@link ControllerLeaderLocator::_cachedControllerLeaderMap} from
helix if cached value is invalid
* @param rawTableName table name without type.
* @return The host-port pair of the current controller leader.
*/
public synchronized Pair<String, Integer> getControllerLeader(String
rawTableName) {
+ int partitionId = LeadControllerUtils.getPartitionIdForTable(rawTableName);
if (!_cachedControllerLeaderInvalid) {
- return _controllerLeaderHostPort;
+ return _cachedControllerLeaderMap.get(partitionId);
}
- Pair<String, Integer> leaderForTable = getLeaderForTable(rawTableName);
- if (leaderForTable == null) {
- LOGGER.warn("Failed to find a leader for Table: {}", rawTableName);
+ // No controller leader cached, fetches a fresh copy of external view and
then gets the leader for the given table.
+ boolean success = refreshControllerLeaderMap();
+ if (success) {
+ _cachedControllerLeaderInvalid = false;
+ LOGGER.info("Refreshed controller leader map successfully.");
+ return _cachedControllerLeaderMap.get(partitionId);
+ } else {
_cachedControllerLeaderInvalid = true;
+ LOGGER.warn("Failed to refresh controller leader map.");
return null;
- } else {
- _controllerLeaderHostPort = leaderForTable;
- _cachedControllerLeaderInvalid = false;
- LOGGER.info("Setting controller leader to be {}:{}",
_controllerLeaderHostPort.getFirst(),
- _controllerLeaderHostPort.getSecond());
- return _controllerLeaderHostPort;
}
}
/**
- * Firstly checks whether lead controller resource has been enabled or not.
- * If yes, use this as the leader for realtime segment completion once
partition leader exists.
- * Otherwise, try to use Helix leader.
- * @param rawTableName table name without type.
- * @return the controller leader id with hostname and port for this table,
e.g. localhost_9000
+ * Checks whether lead controller resource has been enabled or not.
+ * If yes, updates lead controller pairs from the external view of lead
controller resource.
+ * Otherwise, updates lead controller pairs from Helix cluster leader.
+ * @return true if refresh is completed successfully.
*/
- private Pair<String, Integer> getLeaderForTable(String rawTableName) {
+ private boolean refreshControllerLeaderMap() {
// Checks whether lead controller resource has been enabled or not.
if (isLeadControllerResourceEnabled()) {
- // Gets leader from lead controller resource.
- return getLeaderFromLeadControllerResource(rawTableName);
+ return refreshControllerLeaderMapFromLeadControllerResource();
} else {
- // Gets Helix leader to be the leader to this table, otherwise returns
null.
- return getHelixClusterLeader();
+ return refreshControllerLeaderMapFromHelixClusterLeader();
+ }
+ }
+
+ /**
+ * Updates lead controller pairs from the external view of lead controller
resource.
+ * @return true if refresh is completed successfully, false if no leader
found or exception thrown.
+ */
+ private boolean refreshControllerLeaderMapFromLeadControllerResource() {
+ try {
+ ExternalView leadControllerResourceExternalView =
_helixManager.getClusterManagmentTool()
+ .getResourceExternalView(_helixManager.getClusterName(),
CommonConstants.Helix.LEAD_CONTROLLER_RESOURCE_NAME);
+ if (leadControllerResourceExternalView == null) {
+ LOGGER.warn("External view of lead controller resource is null.");
+ return false;
+ }
+ Set<String> partitionNames =
leadControllerResourceExternalView.getPartitionSet();
+ if (partitionNames.isEmpty()) {
+ LOGGER.warn("The partition set of lead controller resource is empty.");
+ return false;
+ }
+ if (partitionNames.size() !=
CommonConstants.Helix.NUMBER_OF_PARTITIONS_IN_LEAD_CONTROLLER_RESOURCE) {
+ LOGGER.warn("The partition size of lead controller resource isn't {}.
Actual size: {}",
+
CommonConstants.Helix.NUMBER_OF_PARTITIONS_IN_LEAD_CONTROLLER_RESOURCE,
partitionNames.size());
+ return false;
+ }
+ for (String partitionName : partitionNames) {
+ int partitionId =
LeadControllerUtils.extractPartitionId(partitionName);
+ Map<String, String> partitionStateMap =
leadControllerResourceExternalView.getStateMap(partitionName);
+ boolean masterFound = false;
+ // Get master host from partition map. Return null if no master found.
+ for (Map.Entry<String, String> entry : partitionStateMap.entrySet()) {
+ if (MasterSlaveSMD.States.MASTER.name().equals(entry.getValue())) {
+ // Found the controller in master state.
+ // Converts participant id (with Prefix "Controller_") to
controller id and assigns it as the leader,
+ // since realtime segment completion protocol doesn't need the
prefix in controller instance id.
+ String participantInstanceId = entry.getKey();
+ String controllerInstanceId =
participantInstanceId.substring(participantInstanceId.indexOf('_') + 1);
+ Pair<String, Integer> leadControllerPair =
convertToHostAndPortPair(controllerInstanceId);
+ masterFound = true;
+ _cachedControllerLeaderMap.put(partitionId, leadControllerPair);
+ }
+ }
+ if (!masterFound) {
+ LOGGER.warn("There is no controller in MASTER state for partition:
{} in lead controller resource",
Review comment:
This should be a metric on the controller. Maybe in another PR, but we need
to add a controller metric in which we set the number of partitions that have
no master in the external view. If this metric goes above 0 for some period of
time, we should alert.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]