Author: ghenzler Date: Thu Mar 23 06:41:05 2017 New Revision: 1788179 URL: http://svn.apache.org/viewvc?rev=1788179&view=rev Log: SLING-6698 thread usage HC initial commit
Added: sling/trunk/bundles/extensions/healthcheck/support/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java (with props) sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/ sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java (with props) Modified: sling/trunk/bundles/extensions/healthcheck/support/pom.xml Modified: sling/trunk/bundles/extensions/healthcheck/support/pom.xml URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/healthcheck/support/pom.xml?rev=1788179&r1=1788178&r2=1788179&view=diff ============================================================================== --- sling/trunk/bundles/extensions/healthcheck/support/pom.xml (original) +++ sling/trunk/bundles/extensions/healthcheck/support/pom.xml Thu Mar 23 06:41:05 2017 @@ -78,6 +78,12 @@ </dependency> <dependency> <groupId>org.apache.sling</groupId> + <artifactId>org.apache.sling.hc.annotations</artifactId> + <version>1.0.4</version> + </dependency> + + <dependency> + <groupId>org.apache.sling</groupId> <artifactId>org.apache.sling.commons.osgi</artifactId> <version>2.2.0</version> <scope>provided</scope> Added: sling/trunk/bundles/extensions/healthcheck/support/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/healthcheck/support/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java?rev=1788179&view=auto ============================================================================== --- sling/trunk/bundles/extensions/healthcheck/support/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java (added) +++ sling/trunk/bundles/extensions/healthcheck/support/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java Thu Mar 23 06:41:05 2017 @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The SF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package org.apache.sling.hc.support.impl; + +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Dictionary; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.felix.scr.annotations.Activate; +import org.apache.felix.scr.annotations.ConfigurationPolicy; +import org.apache.felix.scr.annotations.Property; +import org.apache.sling.commons.osgi.PropertiesUtil; +import org.apache.sling.hc.annotations.SlingHealthCheck; +import org.apache.sling.hc.api.HealthCheck; +import org.apache.sling.hc.api.Result; +import org.apache.sling.hc.util.FormattingResultLog; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Uses ThreadMXBean to show (and potentially WARN about) information about thread usage. Use deadlock detection provided by ThreadMXBean to send CRITICAL. + */ +@SlingHealthCheck(name = ThreadUsageHealthCheck.HC_NAME, label = "Apache Sling " + + ThreadUsageHealthCheck.HC_NAME, description = "Checks time threads are using ThreadMXBean", tags = { "resources", + "threads" }, configurationPolicy = ConfigurationPolicy.REQUIRE) +public class ThreadUsageHealthCheck implements HealthCheck { + private static final Logger LOG = LoggerFactory.getLogger(ThreadUsageHealthCheck.class); + + public static final String HC_NAME = "Thread Usage Health Check"; + + private static final long DEFAULT_SAMPLE_PERIOD_IN_MS = 200; + + static final String PROP_SAMPLE_PERIOD_IN_MS = "samplePeriodInMs"; + @Property(name = PROP_SAMPLE_PERIOD_IN_MS, label = "Sample Period", description = "Period this HC uses to measure", longValue = DEFAULT_SAMPLE_PERIOD_IN_MS) + private long samplePeriodInMs; + + static final String PROP_CPU_TIME_THRESHOLD_WARN = "cpuTimeThresholdWarn"; + @Property(name = PROP_CPU_TIME_THRESHOLD_WARN, label = "CPU Time Threshold for WARN in %", description = "Will WARN once this threshold is reached. Has to be configured according to cores of the system (e.g. use 400% for 4 cores)") + private long cpuTimeThresholdWarn; + + private int availableProcessors; + private boolean cpuTimeThresholdWarnIsConfigured; + + @Activate + protected final void activate(final ComponentContext componentContext) { + final Dictionary<String, Object> properties = componentContext.getProperties(); + this.samplePeriodInMs = PropertiesUtil.toLong(properties.get(PROP_SAMPLE_PERIOD_IN_MS), DEFAULT_SAMPLE_PERIOD_IN_MS); + + this.availableProcessors = Runtime.getRuntime().availableProcessors(); + long defaultThresholdWarn = availableProcessors * 90; // 100% is rarely reached, 90% means a very busy system + this.cpuTimeThresholdWarn = PropertiesUtil.toLong(properties.get(PROP_CPU_TIME_THRESHOLD_WARN), defaultThresholdWarn); + this.cpuTimeThresholdWarnIsConfigured = properties.get(PROP_CPU_TIME_THRESHOLD_WARN) !=null; + + } + + @Override + public Result execute() { + FormattingResultLog log = new FormattingResultLog(); + + log.debug("Checking threads for exceeding {}% CPU time within time period of {}ms", cpuTimeThresholdWarn, samplePeriodInMs); + + try { + ThreadMXBean threadMxBean = ManagementFactory.getThreadMXBean(); + + List<ThreadTimeInfo> threadTimeInfos = collectThreadTimeInfos(log, threadMxBean); + + Collections.sort(threadTimeInfos); + + float totalCpuTimePercentage = 0; + for (int i = 0; i < threadTimeInfos.size(); i++) { + + ThreadTimeInfo threadInfo = threadTimeInfos.get(i); + float cpuTimePercentage = ((float) threadInfo.getCpuTime() / ((float) samplePeriodInMs * 1000000)) * 100f; + totalCpuTimePercentage +=cpuTimePercentage; + + String msg = String.format("%4.1f", cpuTimePercentage) + "% used by thread \"" + threadInfo.name + "\""; + if (i<3 || (i<10 && cpuTimePercentage > 15)) { + log.info(msg); + } else { + log.debug(msg); + } + } + log.info(threadTimeInfos.size() + " threads using "+String.format("%4.1f", totalCpuTimePercentage)+"% CPU Time"); + boolean isHighCpuTime = totalCpuTimePercentage > cpuTimeThresholdWarn; + if(isHighCpuTime) { + log.warn("Threads are consuming significant CPU time "+ ( cpuTimeThresholdWarnIsConfigured ? + "(more than configured " + cpuTimeThresholdWarn + "% within " + samplePeriodInMs + "ms)" + : "(more than "+availableProcessors+" cores * 90% = "+cpuTimeThresholdWarn + "%)")); + } + + checkForDeadlock(log, threadMxBean); + + } catch (Exception e) { + LOG.error("Could not analyse thread usage "+e, e); + log.healthCheckError("Could not analyse thread usage", e); + } + + return new Result(log); + + } + + List<ThreadTimeInfo> collectThreadTimeInfos(FormattingResultLog log, ThreadMXBean threadMxBean) { + + + Map<Long, ThreadTimeInfo> threadTimeInfos = new HashMap<Long, ThreadTimeInfo>(); + + long[] allThreadIds = threadMxBean.getAllThreadIds(); + for (long threadId : allThreadIds) { + ThreadTimeInfo threadTimeInfo = new ThreadTimeInfo(); + long threadCpuTimeStart = threadMxBean.getThreadCpuTime(threadId); + threadTimeInfo.start = threadCpuTimeStart; + ThreadInfo threadInfo = threadMxBean.getThreadInfo(threadId); + threadTimeInfo.name = threadInfo!=null?threadInfo.getThreadName():"Thread id "+threadId+" (name not resolvable)"; + threadTimeInfos.put(threadId, threadTimeInfo); + } + + try { + Thread.sleep(samplePeriodInMs); + } catch (InterruptedException e) { + log.warn("Could not sleep configured samplePeriodInMs={} to gather thread load", samplePeriodInMs); + } + + for (long threadId : allThreadIds) { + ThreadTimeInfo threadTimeInfo = threadTimeInfos.get(threadId); + if (threadTimeInfo == null) { + continue; + } + long threadCpuTimeStop = threadMxBean.getThreadCpuTime(threadId); + threadTimeInfo.stop = threadCpuTimeStop; + } + + List<ThreadTimeInfo> threads = new ArrayList<ThreadTimeInfo>(threadTimeInfos.values()); + + return threads; + } + + + void checkForDeadlock(FormattingResultLog log, ThreadMXBean threadMxBean) { + long[] findDeadlockedThreads = threadMxBean.findDeadlockedThreads(); + if (findDeadlockedThreads != null) { + for (long threadId : findDeadlockedThreads) { + log.critical("Thread " + threadMxBean.getThreadInfo(threadId).getThreadName() + " is DEADLOCKED"); + } + } + } + + static class ThreadTimeInfo implements Comparable<ThreadTimeInfo> { + long start; + long stop; + String name; + + long getCpuTime() { + long cpuTime = stop - start; + if(cpuTime < 0) { + cpuTime = 0; + } + return cpuTime; + } + + @Override + public int compareTo(ThreadTimeInfo otherThreadTimeInfo) { + if (otherThreadTimeInfo == null) { + return -1; + } + return (int) (otherThreadTimeInfo.getCpuTime() - this.getCpuTime()); + } + + } + +} \ No newline at end of file Propchange: sling/trunk/bundles/extensions/healthcheck/support/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java?rev=1788179&view=auto ============================================================================== --- sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java (added) +++ sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java Thu Mar 23 06:41:05 2017 @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The SF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package org.apache.sling.hc.support.impl; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doReturn; +import static org.mockito.MockitoAnnotations.initMocks; + +import java.lang.management.ThreadMXBean; +import java.util.ArrayList; +import java.util.Date; +import java.util.Dictionary; +import java.util.List; + +import org.apache.sling.hc.api.Result; +import org.apache.sling.hc.util.FormattingResultLog; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.Spy; +import org.osgi.service.component.ComponentContext; + +import org.apache.sling.hc.support.impl.ThreadUsageHealthCheck.ThreadTimeInfo; + +public class ThreadUsageHealthCheckTest { + + @Spy + ThreadUsageHealthCheck threadsHealthCheck; + + @Mock + ComponentContext componentContext; + + @Mock + Dictionary<String, Object> componentConfig; + + @Before + public void setup() { + initMocks(this); + + doReturn(componentConfig).when(componentContext).getProperties(); + doNothing().when(threadsHealthCheck).checkForDeadlock(any(FormattingResultLog.class), any(ThreadMXBean.class)); + + } + + @Test + public void testThreadActivityWithinThreshold() { + + long samplePeriod = 200L; + int processorsAvailable = 4; + int busyThreads = 3; + + List<ThreadTimeInfo> resultListOverloaded = getFullLoadThreadInfos(samplePeriod, busyThreads); + + setupExpectations(samplePeriod, processorsAvailable, resultListOverloaded); + + Result result = threadsHealthCheck.execute(); + + assertEquals(Result.Status.OK, result.getStatus()); + + } + + @Test + public void testThreadActivityOverloaded() { + + long samplePeriod = 200L; + int processorsAvailable = 4; + int busyThreads = 5; + + List<ThreadTimeInfo> resultListOverloaded = getFullLoadThreadInfos(samplePeriod, busyThreads); + + setupExpectations(samplePeriod, processorsAvailable, resultListOverloaded); + + Result result = threadsHealthCheck.execute(); + + assertEquals(Result.Status.WARN, result.getStatus()); + + } + + private List<ThreadTimeInfo> getFullLoadThreadInfos(long samplePeriod, int count) { + List<ThreadTimeInfo> resultList = new ArrayList<ThreadTimeInfo>(); + for (int i = 0; i < count; i++) { + resultList.add(getThreadTimeInfo((long) (samplePeriod * .95))); + } + return resultList; + } + + private void setupExpectations(long samplePeriod, int processorsAvailable, List<ThreadTimeInfo> resultListOk) { + doReturn(processorsAvailable * 90).when(componentConfig).get(ThreadUsageHealthCheck.PROP_CPU_TIME_THRESHOLD_WARN); + doReturn(samplePeriod).when(componentConfig).get(ThreadUsageHealthCheck.PROP_SAMPLE_PERIOD_IN_MS); + threadsHealthCheck.activate(componentContext); + doReturn(resultListOk).when(threadsHealthCheck).collectThreadTimeInfos(any(FormattingResultLog.class), any(ThreadMXBean.class)); + } + + private ThreadUsageHealthCheck.ThreadTimeInfo getThreadTimeInfo(long diffInMs) { + ThreadUsageHealthCheck.ThreadTimeInfo threadTimeInfo = new ThreadUsageHealthCheck.ThreadTimeInfo(); + threadTimeInfo.start = new Date().getTime(); + threadTimeInfo.stop = threadTimeInfo.start + (diffInMs * 1000000); + threadTimeInfo.name = "Unit Test Thread"; + return threadTimeInfo; + } + +} Propchange: sling/trunk/bundles/extensions/healthcheck/support/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain