ACCUMULO-3084 just warn about long timer delays
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/1dc77838 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/1dc77838 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/1dc77838 Branch: refs/heads/master Commit: 1dc77838353f3b253e407cfbb27f7de3cc69c815 Parents: 13d2094 c906e4b Author: Eric C. Newton <[email protected]> Authored: Tue Aug 26 17:47:35 2014 -0400 Committer: Eric C. Newton <[email protected]> Committed: Tue Aug 26 17:47:35 2014 -0400 ---------------------------------------------------------------------- .../accumulo/server/GarbageCollectionLogger.java | 13 +++++++++++++ .../java/org/apache/accumulo/tserver/TabletServer.java | 5 +++-- 2 files changed, 16 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/1dc77838/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java ---------------------------------------------------------------------- diff --cc server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java index b66d0f7,0000000..d53f240 mode 100644,000000..100644 --- a/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java +++ b/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java @@@ -1,103 -1,0 +1,116 @@@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.server; + +import java.lang.management.GarbageCollectorMXBean; +import java.lang.management.ManagementFactory; +import java.util.HashMap; +import java.util.List; + +import org.apache.accumulo.core.conf.AccumuloConfiguration; +import org.apache.accumulo.core.conf.Property; +import org.apache.accumulo.server.util.Halt; +import org.apache.log4j.Logger; + +public class GarbageCollectionLogger { + private static final Logger log = Logger.getLogger(GarbageCollectionLogger.class); + + private final HashMap<String,Long> prevGcTime = new HashMap<String,Long>(); + private long lastMemorySize = 0; + private long gcTimeIncreasedCount = 0; ++ private static long lastMemoryCheckTime = 0; + + public GarbageCollectionLogger() { + } + + public synchronized void logGCInfo(AccumuloConfiguration conf) { ++ final long now = System.currentTimeMillis(); ++ + List<GarbageCollectorMXBean> gcmBeans = ManagementFactory.getGarbageCollectorMXBeans(); + Runtime rt = Runtime.getRuntime(); + + StringBuilder sb = new StringBuilder("gc"); + + boolean sawChange = false; + + long maxIncreaseInCollectionTime = 0; + + for (GarbageCollectorMXBean gcBean : gcmBeans) { + Long prevTime = prevGcTime.get(gcBean.getName()); + long pt = 0; + if (prevTime != null) { + pt = prevTime; + } + + long time = gcBean.getCollectionTime(); + + if (time - pt != 0) { + sawChange = true; + } + + long increaseInCollectionTime = time - pt; + sb.append(String.format(" %s=%,.2f(+%,.2f) secs", gcBean.getName(), time / 1000.0, increaseInCollectionTime / 1000.0)); + maxIncreaseInCollectionTime = Math.max(increaseInCollectionTime, maxIncreaseInCollectionTime); + prevGcTime.put(gcBean.getName(), time); + } + + long mem = rt.freeMemory(); + if (maxIncreaseInCollectionTime == 0) { + gcTimeIncreasedCount = 0; + } else { + gcTimeIncreasedCount++; + if (gcTimeIncreasedCount > 3 && mem < rt.maxMemory() * 0.05) { + log.warn("Running low on memory"); + gcTimeIncreasedCount = 0; + } + } + + if (mem > lastMemorySize) { + sawChange = true; + } + + String sign = "+"; + if (mem - lastMemorySize <= 0) { + sign = ""; + } + + sb.append(String.format(" freemem=%,d(%s%,d) totalmem=%,d", mem, sign, (mem - lastMemorySize), rt.totalMemory())); + + if (sawChange) { + log.debug(sb.toString()); + } + + final long keepAliveTimeout = conf.getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT); ++ if (lastMemoryCheckTime > 0 && lastMemoryCheckTime < now) { ++ final long diff = now - lastMemoryCheckTime; ++ if (diff > keepAliveTimeout) { ++ log.warn(String.format("Check for long GC pauses not called in a timely fashion %.1f", diff / 1000.)); ++ } ++ lastMemoryCheckTime = now; ++ return; ++ } ++ + if (maxIncreaseInCollectionTime > keepAliveTimeout) { + Halt.halt("Garbage collection may be interfering with lock keep-alive. Halting.", -1); + } + + lastMemorySize = mem; ++ lastMemoryCheckTime = now; + } + + +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/1dc77838/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java ---------------------------------------------------------------------- diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java index ce84bd3,f1a9f2e..b9505d9 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java @@@ -236,70 -235,28 +236,71 @@@ import org.apache.zookeeper.KeeperExcep import com.google.common.net.HostAndPort; -enum ScanRunState { - QUEUED, RUNNING, FINISHED -} - public class TabletServer extends AbstractMetricsImpl implements org.apache.accumulo.tserver.metrics.TabletServerMBean { - private static final Logger log = Logger.getLogger(TabletServer.class); - - private static HashMap<String,Long> prevGcTime = new HashMap<String,Long>(); - private static long lastMemorySize = 0; - private static long lastMemoryCheckTime = 0; - private static long gcTimeIncreasedCount; + static enum ScanRunState { + QUEUED, RUNNING, FINISHED + } + private static final Logger log = Logger.getLogger(TabletServer.class); + private static final String METRICS_PREFIX = "tserver"; private static final long MAX_TIME_TO_WAIT_FOR_SCAN_RESULT_MILLIS = 1000; private static final long RECENTLY_SPLIT_MILLIES = 60 * 1000; + private static final long TIME_BETWEEN_GC_CHECKS = 5000; + private static final Set<Column> EMPTY_COLUMNS = Collections.emptySet(); + + private final GarbageCollectionLogger gcLogger = new GarbageCollectionLogger(); + private final TransactionWatcher watcher = new TransactionWatcher(); + private final ZooCache masterLockCache = new ZooCache(); + + private final TabletServerLogger logger; + + private final TabletServerMinCMetrics mincMetrics = new TabletServerMinCMetrics(); + public TabletServerMinCMetrics getMinCMetrics() { + return mincMetrics; + } + + private final ServerConfiguration serverConfig; + private final LogSorter logSorter; + private final TabletStatsKeeper statsKeeper; + private final AtomicInteger logIdGenerator = new AtomicInteger(); - private TabletServerLogger logger; + private final VolumeManager fs; + public Instance getInstance() { + return serverConfig.getInstance(); + } + + private final SortedMap<KeyExtent,Tablet> onlineTablets = Collections.synchronizedSortedMap(new TreeMap<KeyExtent,Tablet>()); + private final SortedSet<KeyExtent> unopenedTablets = Collections.synchronizedSortedSet(new TreeSet<KeyExtent>()); + private final SortedSet<KeyExtent> openingTablets = Collections.synchronizedSortedSet(new TreeSet<KeyExtent>()); + @SuppressWarnings("unchecked") + private final Map<KeyExtent,Long> recentlyUnloadedCache = Collections.synchronizedMap(new LRUMap(1000)); - protected TabletServerMinCMetrics mincMetrics = new TabletServerMinCMetrics(); + private final TabletServerResourceManager resourceManager; + private final SecurityOperation security; - private ServerConfiguration serverConfig; - private LogSorter logSorter = null; + private final BlockingDeque<MasterMessage> masterMessages = new LinkedBlockingDeque<MasterMessage>(); + + private Thread majorCompactorThread; + + private HostAndPort clientAddress; + + private volatile boolean serverStopRequested = false; + private volatile boolean majorCompactorDisabled = false; + private volatile boolean shutdownComplete = false; - ++ + private ZooLock tabletServerLock; + + private TServer server; + + private DistributedWorkQueue bulkFailedCopyQ; + + private String lockID; + + private static ObjectName OBJECT_NAME = null; + + public static final AtomicLong seekCount = new AtomicLong(0); + + private final AtomicLong totalMinorCompactions = new AtomicLong(0); public TabletServer(ServerConfiguration conf, VolumeManager fs) { super(); @@@ -3281,7 -3539,7 +3282,7 @@@ } }; - SimpleTimer.getInstance(aconf).schedule(gcDebugTask, 0, 1000); - SimpleTimer.getInstance().schedule(gcDebugTask, 0, 1000); ++ SimpleTimer.getInstance(aconf).schedule(gcDebugTask, 0, TIME_BETWEEN_GC_CHECKS); Runnable constraintTask = new Runnable() {
