[ https://issues.apache.org/jira/browse/ARTEMIS-1324?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16117391#comment-16117391 ]
ASF GitHub Bot commented on ARTEMIS-1324: ----------------------------------------- Github user michaelandrepearce commented on a diff in the pull request: https://github.com/apache/activemq-artemis/pull/1443#discussion_r131774052 --- Diff: artemis-commons/src/main/java/org/apache/activemq/artemis/utils/critical/CriticalAnalyzerImpl.java --- @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.activemq.artemis.utils.critical; + +import java.util.ConcurrentModificationException; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; + +import org.apache.activemq.artemis.utils.collections.ConcurrentHashSet; +import org.jboss.logging.Logger; + +public class CriticalAnalyzerImpl implements CriticalAnalyzer { + + private final Logger logger = Logger.getLogger(CriticalAnalyzer.class); + + private volatile long timeout; + + private volatile long checkTime; + + @Override + public void clear() { + actions.clear(); + components.clear(); + } + + private CopyOnWriteArrayList<CriticalAction> actions = new CopyOnWriteArrayList<>(); + + private Thread thread; + + private final Semaphore running = new Semaphore(1); + + private final ConcurrentHashSet<CriticalComponent> components = new ConcurrentHashSet<>(); + + @Override + public void add(CriticalComponent component) { + components.add(component); + } + + @Override + public void remove(CriticalComponent component) { + components.remove(component); + } + + @Override + public CriticalAnalyzer setCheckTime(long timeout) { + this.checkTime = timeout; + return this; + } + + @Override + public long getCheckTime() { + if (checkTime == 0) { + checkTime = getTimeout() / 2; + } + return checkTime; + } + + @Override + public CriticalAnalyzer setTimeout(long timeout) { + this.timeout = timeout; + return this; + } + + @Override + public long getTimeout() { + if (timeout == 0) { + timeout = TimeUnit.MINUTES.toMillis(2); + } + return timeout; + } + + @Override + public CriticalAnalyzer addAction(CriticalAction action) { + this.actions.add(action); + return this; + } + + @Override + public void check() { + boolean retry = true; + while (retry) { + try { + for (CriticalComponent component : components) { + + int pathReturned = component.isExpired(timeout); + if (pathReturned >= 0) { + fireAction(component, pathReturned); + // no need to keep running if there's already a component failed + return; + } + } + retry = false; // got to the end of the list, no need to retry + } catch (ConcurrentModificationException dontCare) { + // lets retry on the loop + } + } + } + + private void fireAction(CriticalComponent component, int path) { + for (CriticalAction action: actions) { + try { + action.run(component, path); + } catch (Throwable e) { + logger.warn(e.getMessage(), e); + } + } + } + + @Override + public void start() { + + if (!running.tryAcquire()) { + // already running + return; + } + + // we are not using any Thread Pool or any Scheduled Executors from the ArtemisServer + // as that would defeat the purpose, + // as in any deadlocks the schedulers may be starving for something not responding fast enough + thread = new Thread("Artemis Critical Analyzer") { + @Override + public void run() { + try { + while (true) { + if (running.tryAcquire(getCheckTime(), TimeUnit.MILLISECONDS)) { + running.release(); + // this means that the server has been stopped as we could acquire the semaphore... returning now + break; + } + check(); + } + } catch (InterruptedException interrupted) { + // i will just leave on that case + } + } + }; + + thread.setDaemon(true); + + thread.start(); + } + + @Override + public void stop() { --- End diff -- im lost, what do you mean here? i see no locks. > Critical Analysis and deadlock detection on broker > -------------------------------------------------- > > Key: ARTEMIS-1324 > URL: https://issues.apache.org/jira/browse/ARTEMIS-1324 > Project: ActiveMQ Artemis > Issue Type: New Feature > Components: Broker > Reporter: clebert suconic > Assignee: clebert suconic > Priority: Critical > Fix For: 2.3.0 > > -- This message was sent by Atlassian JIRA (v6.4.14#64029)