Github user aweisberg commented on a diff in the pull request:
https://github.com/apache/cassandra/pull/283#discussion_r225327904
--- Diff: src/java/org/apache/cassandra/locator/DynamicEndpointSnitch.java
---
@@ -22,121 +22,194 @@
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
-import com.codahale.metrics.ExponentiallyDecayingReservoir;
import javax.management.MBeanServer;
import javax.management.ObjectName;
-import com.codahale.metrics.Snapshot;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.cassandra.concurrent.ScheduledExecutors;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.gms.ApplicationState;
import org.apache.cassandra.gms.EndpointState;
import org.apache.cassandra.gms.Gossiper;
import org.apache.cassandra.gms.VersionedValue;
+import org.apache.cassandra.net.IAsyncCallback;
+import org.apache.cassandra.net.LatencyMeasurementType;
+import org.apache.cassandra.net.MessageIn;
+import org.apache.cassandra.net.MessageOut;
import org.apache.cassandra.net.MessagingService;
+import org.apache.cassandra.net.PingMessage;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.FBUtilities;
+import static org.apache.cassandra.net.MessagingService.Verb.PING;
+import static
org.apache.cassandra.net.async.OutboundConnectionIdentifier.ConnectionType.LARGE_MESSAGE;
+import static
org.apache.cassandra.net.async.OutboundConnectionIdentifier.ConnectionType.SMALL_MESSAGE;
+
+
/**
* A dynamic snitch that sorts endpoints by latency with an adapted phi
failure detector
+ * Note that the subclasses (e.g. {@link DynamicEndpointSnitchHistogram})
are responsible for actually measuring
+ * latency and populating the {@link #scores} map.
*/
-public class DynamicEndpointSnitch extends AbstractEndpointSnitch
implements ILatencySubscriber, DynamicEndpointSnitchMBean
+public abstract class DynamicEndpointSnitch extends AbstractEndpointSnitch
implements ILatencySubscriber, DynamicEndpointSnitchMBean
{
- private static final boolean USE_SEVERITY =
!Boolean.getBoolean("cassandra.ignore_dynamic_snitch_severity");
-
- private static final double ALPHA = 0.75; // set to 0.75 to make EDS
more biased to towards the newer values
- private static final int WINDOW_SIZE = 100;
-
- private volatile int dynamicUpdateInterval =
DatabaseDescriptor.getDynamicUpdateInterval();
- private volatile int dynamicResetInterval =
DatabaseDescriptor.getDynamicResetInterval();
- private volatile double dynamicBadnessThreshold =
DatabaseDescriptor.getDynamicBadnessThreshold();
+ private static final Logger logger =
LoggerFactory.getLogger(DynamicEndpointSnitch.class);
+
+ // Subclass specific functionality
+ protected static final boolean USE_SEVERITY =
!Boolean.getBoolean("cassandra.ignore_dynamic_snitch_severity");
+ protected boolean registered = false;
+ // The scores map is updated via copy in updateScores
+ // We keep it in the base class for performance reasons (so it can be
easily aliased)
+ protected volatile Map<InetAddressAndPort, Double> scores = new
HashMap<>();
+
+ // Rate limit how often we generate latency probes
+ protected long nextAllowedProbeGenerationTime;
--- End diff --
As we discussed I don't think we really need this. We can back off probing
nodes we aren't talking to very frequently to get back to the only check a node
every 10 minutes behavior. That coupled with not probing nodes we aren't even
considering for queries should do get the messaging down.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]