Updated Branches: refs/heads/trunk 3297a96e1 -> 329dd758e
Make inconsistent counter shards converge patch by scode; reviewed by slebresne for CASSANDRA-3641 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/329dd758 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/329dd758 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/329dd758 Branch: refs/heads/trunk Commit: 329dd758e8b07368c4bab5451c42dc04b0bb48ae Parents: 3297a96 Author: Sylvain Lebresne <[email protected]> Authored: Mon Jan 2 18:10:20 2012 +0100 Committer: Sylvain Lebresne <[email protected]> Committed: Mon Jan 2 18:16:36 2012 +0100 ---------------------------------------------------------------------- .../cassandra/db/context/CounterContext.java | 52 +++++++++++++-- 1 files changed, 47 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/329dd758/src/java/org/apache/cassandra/db/context/CounterContext.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/context/CounterContext.java b/src/java/org/apache/cassandra/db/context/CounterContext.java index 0ac8e0a..d466caa 100644 --- a/src/java/org/apache/cassandra/db/context/CounterContext.java +++ b/src/java/org/apache/cassandra/db/context/CounterContext.java @@ -169,6 +169,8 @@ public class CounterContext implements IContext { long leftClock = leftState.getClock(); long rightClock = rightState.getClock(); + long leftCount = leftState.getCount(); + long rightCount = rightState.getCount(); // advance leftState.moveToNext(); @@ -177,7 +179,16 @@ public class CounterContext implements IContext // process clock comparisons if (leftClock == rightClock) { - continue; + if (leftCount != rightCount) + { + // Inconsistent shard (see the corresponding code in merge()). We return DISJOINT in this + // case so that it will be treated as a difference, allowing read-repair to work. + return ContextRelationship.DISJOINT; + } + else + { + continue; + } } else if ((leftClock >= 0 && rightClock > 0 && leftClock > rightClock) || (leftClock < 0 && (rightClock > 0 || leftClock < rightClock))) @@ -356,11 +367,42 @@ public class CounterContext implements IContext { long leftClock = leftState.getClock(); long rightClock = rightState.getClock(); - if ((leftClock >= 0 && rightClock > 0 && leftClock >= rightClock) - || (leftClock < 0 && (rightClock > 0 || leftClock < rightClock))) - leftState.copyTo(mergedState); + + if (leftClock == rightClock) + { + // We should never see non-delta shards w/ same id+clock but different counts. However, if we do + // we should "heal" the problem by being deterministic in our selection of shard - and + // log the occurrence so that the operator will know something is wrong. + long leftCount = leftState.getCount(); + long rightCount = rightState.getCount(); + + if (leftCount != rightCount) + { + logger.error("invalid counter shard detected; ({}, {}, {}) and ({}, {}, {}) differ only in " + + "count; will pick highest to self-heal; this indicates a bug or corruption generated a bad counter shard", + new Object[] { + leftState.getNodeId(), leftClock, leftCount, + rightState.getNodeId(), rightClock, rightCount, + }); + } + + if (leftCount > rightCount) + { + leftState.copyTo(mergedState); + } + else + { + rightState.copyTo(mergedState); + } + } else - rightState.copyTo(mergedState); + { + if ((leftClock >= 0 && rightClock > 0 && leftClock >= rightClock) + || (leftClock < 0 && (rightClock > 0 || leftClock < rightClock))) + leftState.copyTo(mergedState); + else + rightState.copyTo(mergedState); + } } rightState.moveToNext(); leftState.moveToNext();
