Log when a node is down longer than the hint window Patch by Vijay, reviewed by Carl Yeksigian and jbellis for CASSANDRA-4554
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/60acf0d7 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/60acf0d7 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/60acf0d7 Branch: refs/heads/trunk Commit: 60acf0d792cb761da06bf1fc91d9f2b57fcc065c Parents: 29915e8 Author: Vijay Parthasarathy <[email protected]> Authored: Sun Feb 3 14:49:20 2013 -0800 Committer: Vijay Parthasarathy <[email protected]> Committed: Sun Feb 3 14:49:20 2013 -0800 ---------------------------------------------------------------------- .../org/apache/cassandra/config/CFMetaData.java | 5 + .../org/apache/cassandra/config/KSMetaData.java | 1 + .../apache/cassandra/db/HintedHandOffManager.java | 4 + src/java/org/apache/cassandra/db/SystemTable.java | 8 + .../cassandra/metrics/HintedHandoffMetrics.java | 108 +++++++++++++++ .../org/apache/cassandra/service/StorageProxy.java | 6 + .../org/apache/cassandra/db/HintedHandOffTest.java | 20 +++ 7 files changed, 152 insertions(+), 0 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/src/java/org/apache/cassandra/config/CFMetaData.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/config/CFMetaData.java b/src/java/org/apache/cassandra/config/CFMetaData.java index 21ca90a..1738d2d 100644 --- a/src/java/org/apache/cassandra/config/CFMetaData.java +++ b/src/java/org/apache/cassandra/config/CFMetaData.java @@ -174,6 +174,11 @@ public final class CFMetaData + "rack text" + ") WITH COMMENT='known peers in the cluster'"); + public static final CFMetaData PeerEventsCf = compile(12, "CREATE TABLE " + SystemTable.PEER_EVENTS_CF + " (" + + "peer inet PRIMARY KEY," + + "hints_dropped map<uuid, int>" + + ") WITH COMMENT='cf contains events related to peers'"); + public static final CFMetaData LocalCf = compile(13, "CREATE TABLE " + SystemTable.LOCAL_CF + " (" + "key text PRIMARY KEY," + "tokens set<varchar>," http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/src/java/org/apache/cassandra/config/KSMetaData.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/config/KSMetaData.java b/src/java/org/apache/cassandra/config/KSMetaData.java index cfb4aef..de41f57 100644 --- a/src/java/org/apache/cassandra/config/KSMetaData.java +++ b/src/java/org/apache/cassandra/config/KSMetaData.java @@ -83,6 +83,7 @@ public final class KSMetaData CFMetaData.RangeXfersCf, CFMetaData.LocalCf, CFMetaData.PeersCf, + CFMetaData.PeerEventsCf, CFMetaData.HintsCf, CFMetaData.IndexCf, CFMetaData.CounterIdCf, http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/src/java/org/apache/cassandra/db/HintedHandOffManager.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/HintedHandOffManager.java b/src/java/org/apache/cassandra/db/HintedHandOffManager.java index 8ca0281..483d8cd 100644 --- a/src/java/org/apache/cassandra/db/HintedHandOffManager.java +++ b/src/java/org/apache/cassandra/db/HintedHandOffManager.java @@ -52,6 +52,7 @@ import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTable; +import org.apache.cassandra.metrics.HintedHandoffMetrics; import org.apache.cassandra.net.IAsyncCallback; import org.apache.cassandra.net.MessageOut; import org.apache.cassandra.net.MessagingService; @@ -94,6 +95,8 @@ public class HintedHandOffManager implements HintedHandOffManagerMBean private static final int PAGE_SIZE = 128; private static final int LARGE_NUMBER = 65536; // 64k nodes ought to be enough for anybody. + public final HintedHandoffMetrics metrics = new HintedHandoffMetrics(); + private volatile boolean hintedHandOffPaused = false; static final CompositeType comparator = CompositeType.getInstance(Arrays.<AbstractType<?>>asList(UUIDType.instance, Int32Type.instance)); @@ -124,6 +127,7 @@ public class HintedHandOffManager implements HintedHandOffManagerMBean public void run() { scheduleAllDeliveries(); + metrics.log(); } }; StorageService.optionalTasks.scheduleWithFixedDelay(runnable, 10, 10, TimeUnit.MINUTES); http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/src/java/org/apache/cassandra/db/SystemTable.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/SystemTable.java b/src/java/org/apache/cassandra/db/SystemTable.java index 4f6234b..629defe 100644 --- a/src/java/org/apache/cassandra/db/SystemTable.java +++ b/src/java/org/apache/cassandra/db/SystemTable.java @@ -61,6 +61,7 @@ public class SystemTable // see CFMetaData for schema definitions public static final String PEERS_CF = "peers"; + public static final String PEER_EVENTS_CF = "peer_events"; public static final String LOCAL_CF = "local"; public static final String INDEX_CF = "IndexInfo"; public static final String COUNTER_ID_CF = "NodeIdInfo"; @@ -330,6 +331,13 @@ public class SystemTable processInternal(String.format(req, PEERS_CF, columnName, ep.getHostAddress(), value)); } + public static synchronized void updateHintsDropped(InetAddress ep, UUID timePeriod, int value) + { + // with 30 day TTL + String req = "UPDATE system.%s USING TTL 2592000 SET hints_dropped[ %s ] = %s WHERE peer = '%s'"; + processInternal(String.format(req, PEER_EVENTS_CF, timePeriod.toString(), value, ep.getHostAddress())); + } + public static synchronized void updateSchemaVersion(UUID version) { String req = "INSERT INTO system.%s (key, schema_version) VALUES ('%s', %s)"; http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/src/java/org/apache/cassandra/metrics/HintedHandoffMetrics.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/metrics/HintedHandoffMetrics.java b/src/java/org/apache/cassandra/metrics/HintedHandoffMetrics.java new file mode 100644 index 0000000..a12be19 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/HintedHandoffMetrics.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.metrics; + +import java.net.InetAddress; +import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; + +import org.apache.cassandra.db.HintedHandOffManager; +import org.apache.cassandra.db.SystemTable; +import org.apache.cassandra.utils.UUIDGen; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.yammer.metrics.Metrics; +import com.yammer.metrics.core.Counter; +import com.yammer.metrics.core.MetricName; + +/** + * Metrics for {@link HintedHandOffManager}. + */ +public class HintedHandoffMetrics +{ + private static final Logger logger = LoggerFactory.getLogger(HintedHandoffMetrics.class); + + public static final String GROUP_NAME = "org.apache.cassandra.metrics"; + public static final String TYPE_NAME = "HintedHandOffManager"; + + /** Total number of hints which are not stored, This is not a cache. */ + private final LoadingCache<InetAddress, DifferencingCounter> notStored = CacheBuilder.newBuilder().build(new CacheLoader<InetAddress, DifferencingCounter>() + { + public DifferencingCounter load(InetAddress address) + { + return new DifferencingCounter(address); + } + }); + + public void incrPastWindow(InetAddress address) + { + try + { + notStored.get(address).mark(); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); // this cannot happen + } + } + + public void log() + { + for (Entry<InetAddress, DifferencingCounter> entry : notStored.asMap().entrySet()) + { + long diffrence = entry.getValue().diffrence(); + if (diffrence == 0) + continue; + logger.warn("{} has {} dropped hints, because node is down past configured hint window.", entry.getKey(), diffrence); + SystemTable.updateHintsDropped(entry.getKey(), UUIDGen.getTimeUUID(), (int) diffrence); + } + } + + public class DifferencingCounter + { + private final Counter meter; + private long reported = 0; + + public DifferencingCounter(InetAddress address) + { + this.meter = Metrics.newCounter(new MetricName(GROUP_NAME, TYPE_NAME, "Hints_not_stored-" + address.toString())); + } + + public long diffrence() + { + long current = meter.count(); + long diffrence = current - reported; + this.reported = current; + return diffrence; + } + + public long count() + { + return meter.count(); + } + + public void mark() + { + meter.inc(); + } + } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/src/java/org/apache/cassandra/service/StorageProxy.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index c234bd1..272c52b 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -1486,11 +1486,17 @@ public class StorageProxy implements StorageProxyMBean public static boolean shouldHint(InetAddress ep) { if (!DatabaseDescriptor.hintedHandoffEnabled()) + { + HintedHandOffManager.instance.metrics.incrPastWindow(ep); return false; + } boolean hintWindowExpired = Gossiper.instance.getEndpointDowntime(ep) > DatabaseDescriptor.getMaxHintWindow(); if (hintWindowExpired) + { + HintedHandOffManager.instance.metrics.incrPastWindow(ep); logger.trace("not hinting {} which has been down {}ms", ep, Gossiper.instance.getEndpointDowntime(ep)); + } return !hintWindowExpired; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/60acf0d7/test/unit/org/apache/cassandra/db/HintedHandOffTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/db/HintedHandOffTest.java b/test/unit/org/apache/cassandra/db/HintedHandOffTest.java index 0b04250..260f1c5 100644 --- a/test/unit/org/apache/cassandra/db/HintedHandOffTest.java +++ b/test/unit/org/apache/cassandra/db/HintedHandOffTest.java @@ -21,19 +21,27 @@ package org.apache.cassandra.db; */ +import java.net.InetAddress; +import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; import org.junit.Test; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.compaction.CompactionManager; import org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy; import org.apache.cassandra.db.filter.QueryPath; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; +import com.google.common.collect.Iterators; + import static junit.framework.Assert.assertEquals; +import static org.apache.cassandra.cql3.QueryProcessor.processInternal; public class HintedHandOffTest extends SchemaLoader { @@ -77,4 +85,16 @@ public class HintedHandOffTest extends SchemaLoader // is 10 hours and there are no any tombstones in sstable assertEquals(1, hintStore.getSSTables().size()); } + + @Test + public void testHintsMetrics() throws Exception + { + for (int i = 0; i < 99; i++) + HintedHandOffManager.instance.metrics.incrPastWindow(InetAddress.getLocalHost()); + HintedHandOffManager.instance.metrics.log(); + + UntypedResultSet rows = processInternal("SELECT hints_dropped FROM system." + SystemTable.PEER_EVENTS_CF); + Map<UUID, Integer> returned = rows.one().getMap("hints_dropped", UUIDType.instance, Int32Type.instance); + assertEquals(Iterators.getLast(returned.values().iterator()).intValue(), 99); + } }
