atris commented on a change in pull request #96:
URL: https://github.com/apache/solr/pull/96#discussion_r639601638



##########
File path: 
solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java
##########
@@ -66,14 +66,19 @@ protected boolean isEnabled() {
     private final int memCBThreshold;
     private final boolean cpuCBEnabled;
     private final int cpuCBThreshold;
+    private final boolean loadAverageCBEnabled;
+    private final double loadAverageCBThreshold;
 
     public CircuitBreakerConfig(final boolean enabled, final boolean 
memCBEnabled, final int memCBThreshold,
-                                  final boolean cpuCBEnabled, final int 
cpuCBThreshold) {
+                                final boolean cpuCBEnabled, final int 
cpuCBThreshold,

Review comment:
       Rename to highlight the metric (like you did for loadAverageCB renaming)

##########
File path: 
solr/core/src/java/org/apache/solr/util/circuitbreaker/CPUCircuitBreaker.java
##########
@@ -110,6 +113,27 @@ public double getCpuUsageThreshold() {
   }
 
   protected double calculateLiveCPUUsage() {
-    return operatingSystemMXBean.getSystemLoadAverage();
+    Metric metric = this.core
+        .getCoreContainer()
+        .getMetricManager()
+        .registry("solr.jvm")
+        .getMetrics()
+        .get("os.systemCpuLoad");
+
+    if (metric == null) {
+        return -1.0;
+    }
+
+    if (metric instanceof Gauge) {
+      @SuppressWarnings({"rawtypes"})
+          Gauge gauge = (Gauge) metric;
+      // unwrap if needed
+      if (gauge instanceof SolrMetricManager.GaugeWrapper) {
+        gauge = ((SolrMetricManager.GaugeWrapper) gauge).getGauge();
+      }
+      return ((Double) gauge.getValue()).doubleValue();
+    }
+
+    return -1.0;                // Unable to unpack metric

Review comment:
       Please log a warning here -- we should not silently project that the 
system is not under load

##########
File path: solr/core/src/test/org/apache/solr/util/TestCircuitBreaker.java
##########
@@ -335,13 +393,24 @@ protected long calculateLiveMemoryUsage() {
   }
 
   private static class FakeCPUCircuitBreaker extends CPUCircuitBreaker {
-    public FakeCPUCircuitBreaker(CircuitBreakerConfig config) {
-      super(config);
+    public FakeCPUCircuitBreaker(CircuitBreakerConfig config, SolrCore core) {
+      super(config, core);
     }
 
     @Override
     protected double calculateLiveCPUUsage() {
       return 92; // Return a value large enough to trigger the circuit breaker
     }
   }
+
+  private static class FakeLoadAverageCircuitBreaker extends 
LoadAverageCircuitBreaker {
+    public FakeLoadAverageCircuitBreaker(CircuitBreakerConfig config) {

Review comment:
       Again, in this class not an exact copy of FakeCPUCircuitBreaker?

##########
File path: 
solr/core/src/java/org/apache/solr/util/circuitbreaker/CPUCircuitBreaker.java
##########
@@ -110,6 +113,27 @@ public double getCpuUsageThreshold() {
   }
 
   protected double calculateLiveCPUUsage() {
-    return operatingSystemMXBean.getSystemLoadAverage();
+    Metric metric = this.core
+        .getCoreContainer()
+        .getMetricManager()
+        .registry("solr.jvm")
+        .getMetrics()
+        .get("os.systemCpuLoad");

Review comment:
       I dont quite understand this change -- so you are replacing the default 
CPU circuit breaker with a new implementation and moving the earlier 
implementation to a new class?
   
   IMO, both the circuit breakers operate on CPU, using different metrics. Both 
circuit breakers should be named in a way which represents the fact that they 
operate on CPU and highlight which metric they depend on.

##########
File path: solr/core/src/test/org/apache/solr/util/TestCircuitBreaker.java
##########
@@ -199,7 +201,63 @@ public void testFakeCPUCircuitBreaker() {
       PluginInfo pluginInfo = 
h.getCore().getSolrConfig().getPluginInfo(CircuitBreakerManager.class.getName());
 
       CircuitBreaker.CircuitBreakerConfig circuitBreakerConfig = 
CircuitBreakerManager.buildCBConfig(pluginInfo);
-      CircuitBreaker circuitBreaker = new 
FakeCPUCircuitBreaker(circuitBreakerConfig);
+      CircuitBreaker circuitBreaker = new 
FakeCPUCircuitBreaker(circuitBreakerConfig, null);

Review comment:
       Please add a comment representing what the null stands for

##########
File path: 
solr/core/src/java/org/apache/solr/util/circuitbreaker/LoadAverageCircuitBreaker.java
##########
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.circuitbreaker;
+
+import java.lang.invoke.MethodHandles;
+import java.lang.management.ManagementFactory;
+import java.lang.management.OperatingSystemMXBean;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * Tracks current system load average and triggers if the specified threshold 
is breached.
+ *
+ * This circuit breaker gets the load average (length of the run queue) over 
the last
+ * minute and uses that data to take a decision. We depend on 
OperatingSystemMXBean which does
+ * not allow a configurable interval of collection of data.
+ * //TODO: Use Codahale Meter to calculate the value locally.
+ * </p>
+ *
+ * <p>
+ * The configuration to define which mode to use and the trigger threshold are 
defined in
+ * solrconfig.xml
+ * </p>
+ */
+public class LoadAverageCircuitBreaker extends CircuitBreaker {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final OperatingSystemMXBean operatingSystemMXBean = 
ManagementFactory.getOperatingSystemMXBean();
+
+  private final boolean enabled;
+  private final double loadAverageThreshold;
+
+  // Assumption -- the value of these parameters will be set correctly before 
invoking getDebugInfo()
+  private static final ThreadLocal<Double> seenLoadAverage = 
ThreadLocal.withInitial(() -> 0.0);
+
+  private static final ThreadLocal<Double> allowedLoadAverage = 
ThreadLocal.withInitial(() -> 0.0);
+
+  public LoadAverageCircuitBreaker(CircuitBreakerConfig config) {
+    super(config);
+
+    this.enabled = config.getLoadAverageCBEnabled();
+    this.loadAverageThreshold = config.getLoadAverageCBThreshold();
+  }
+
+  @Override
+  public boolean isTripped() {
+    if (!isEnabled()) {
+      return false;
+    }
+
+    if (!enabled) {
+      return false;
+    }
+
+    double localAllowedLoadAverage = getLoadAverageThreshold();
+    double localSeenLoadAverage = calculateLiveLoadAverage();
+
+    if (localSeenLoadAverage < 0) {
+      if (log.isWarnEnabled()) {

Review comment:
       There is a fair bit of overlap between the two circuit breakers -- can 
we move the common code and methods to a super class, and define the contract 
which both (and future) CPU circuit breakers need to adhere to?

##########
File path: 
solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java
##########
@@ -40,12 +41,15 @@
  * solution. There will be a follow up with a SIP for a schema API design.
  */
 public class CircuitBreakerManager implements PluginInfoInitialized {
+  private final SolrCore core;

Review comment:
       I am wary of adding a SolrCore dependency here since only one circuit 
breaker really uses it and this pollutes the API a fair bit.

##########
File path: solr/solr-ref-guide/src/circuit-breakers.adoc
##########
@@ -94,16 +114,45 @@ Configuration for CPU utilization based circuit breaker:
 Note that this configuration will be overridden by the global circuit breaker 
flag -- if circuit breakers are disabled, this flag
 will not help you.
 
-The triggering threshold is defined in units of CPU utilization. The 
configuration to control this is as below:
+The triggering threshold is defined in percent CPU usage. A value of "0" maps 
to 0% usage 
+and a value of "100" maps to 100% usage.  This circuit breaker will trip when 
the CPU usage is
+equal to or greater than 75%:
 
 [source,xml]
 ----
 <str name="cpuThreshold">75</str>
 ----
 
+=== System Load Average Circuit Breaker
+This circuit breaker tracks system load average and triggers if the recent 
load average exceeds a configurable threshold.
+
+This is tracked with the JMX metric 
`OperatingSystemMXBean.getSystemLoadAverage()`. That measures the
+recent load average for the whole system. A "load average" is the number of 
processes using or waiting for a CPU,
+usually averaged over one minute. Some systems include processes waiting on IO 
in the load average. Check the
+documentation for your system and JVM to understand this metric. For more 
information, see the
+https://en.wikipedia.org/wiki/Load_(computing)[Wikipedia page for Load],
+
+Configuration for load average circuit breaker:
+
+[source,xml]
+----
+<str name="loadAverageEnabled">true</str>
+----
+
+Note that this configuration will be overridden by the global circuit breaker 
flag -- if circuit breakers are disabled, this flag
+will not help you.
+
+The triggering threshold is a floating point number matching load average. 
This circuit breaker will trip when the

Review comment:
       Not reviewing these changes -- will depend on Cassandra's review at some 
point

##########
File path: solr/core/src/test/org/apache/solr/util/TestCircuitBreaker.java
##########
@@ -199,7 +201,63 @@ public void testFakeCPUCircuitBreaker() {
       PluginInfo pluginInfo = 
h.getCore().getSolrConfig().getPluginInfo(CircuitBreakerManager.class.getName());
 
       CircuitBreaker.CircuitBreakerConfig circuitBreakerConfig = 
CircuitBreakerManager.buildCBConfig(pluginInfo);
-      CircuitBreaker circuitBreaker = new 
FakeCPUCircuitBreaker(circuitBreakerConfig);
+      CircuitBreaker circuitBreaker = new 
FakeCPUCircuitBreaker(circuitBreakerConfig, null);
+
+      h.getCore().getCircuitBreakerManager().register(circuitBreaker);
+
+      List<Future<?>> futures = new ArrayList<>();
+
+      for (int i = 0; i < 5; i++) {
+        Future<?> future = executor.submit(() -> {
+          try {
+            h.query(req("name:\"john smith\""));
+          } catch (SolrException e) {
+            assertThat(e.getMessage(), containsString("Circuit Breakers 
tripped"));
+            failureCount.incrementAndGet();
+          } catch (Exception e) {
+            throw new RuntimeException(e.getMessage());
+          }
+        });
+
+        futures.add(future);
+      }
+
+      for  (Future<?> future : futures) {
+        try {
+          future.get();
+        } catch (Exception e) {
+          throw new RuntimeException(e.getMessage());
+        }
+      }
+
+      executor.shutdown();
+      try {
+        executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        throw new RuntimeException(e.getMessage());
+      }
+
+      assertEquals("Number of failed queries is not correct",5, 
failureCount.get());
+    } finally {
+      if (!executor.isShutdown()) {
+        executor.shutdown();
+      }
+    }
+  }
+
+  public void testFakeLoadAverageCircuitBreaker() {
+    AtomicInteger failureCount = new AtomicInteger();
+
+    ExecutorService executor = ExecutorUtil.newMDCAwareCachedThreadPool(
+        new SolrNamedThreadFactory("TestCircuitBreaker"));
+    try {
+      removeAllExistingCircuitBreakers();
+
+      PluginInfo pluginInfo = 
h.getCore().getSolrConfig().getPluginInfo(CircuitBreakerManager.class.getName());

Review comment:
       Isnt this test doing the exact same thing as the original CPU circuit 
breaker test, only using a different class?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to