Github user aweisberg commented on a diff in the pull request: https://github.com/apache/cassandra/pull/191#discussion_r168304560 --- Diff: src/java/org/apache/cassandra/net/MessagingService.java --- @@ -1664,4 +1676,113 @@ public static boolean isEncryptedConnection(InetAddressAndPort address) } return true; } + + public void blockForPeers() + { + // TODO make these yaml props? + int alivePercent = Integer.getInteger(Config.PROPERTY_PREFIX + "blockForPeers.percent", 70); + if (alivePercent < 0) + alivePercent = 0; + else if (alivePercent > 100) + alivePercent = 100; + + int aliveTimeoutSecs = Integer.getInteger(Config.PROPERTY_PREFIX + "blockForPeers.timeout_in_secs", 10); + if (aliveTimeoutSecs < 0) + aliveTimeoutSecs = 1; + else if (aliveTimeoutSecs > 100) + aliveTimeoutSecs = 100; + + if (alivePercent > 0) + blockForPeers(alivePercent, aliveTimeoutSecs); + } + + private void blockForPeers(int targetAlivePercent, int aliveTimeoutSecs) + { + // grab a snapshot of the current cluster from Gossiper. this is completely prone to race conditions, but it's + // good enough for the purposes of blocking until some certain percentage of nodes are considered 'alive'/connected. + Set<Map.Entry<InetAddressAndPort, EndpointState>> peers = new HashSet<>(Gossiper.instance.getEndpointStates()); + + // remove current node from the set + peers = peers.stream() + .filter(entry -> !entry.getKey().equals(FBUtilities.getBroadcastAddressAndPort())) + .collect(Collectors.toSet()); + + final int totalSize = peers.size(); + + // don't block if there's no other nodes in the cluster (or we don't know about them) + if (totalSize <= 1) + return; + + logger.info("choosing to block until {}% of peers are marked alive; max time to wait = {} seconds", targetAlivePercent, aliveTimeoutSecs); + + // first, send out a ping message to open up the non-gossip connections + AtomicInteger connectedCount = sendPingMessages(peers); --- End diff -- Do the connections consume resources if there are no queued messages? Are there any buffers allocated? The resources consumed should be minimal and even then if we can theoretically provision these resources then we will get more predictable performance and behavior if we provision them all up front instead of lazily where we find out oh we didn't have enough at some arbitrary later time. Maybe we are trying to hack this process too much by having the transport system unaware of what we are attempting and if we made it possible to say "hey connect on all things and send this small message on each" it would still be simple. In fact do we even need to send a message or is just instructing the transport system to open them enough?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For additional commands, e-mail: pr-h...@cassandra.apache.org