Fix assertionError for CF with gc_grace = 0 patch by slebresne and jbellis for CASSANDRA-3579
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/89e54091 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/89e54091 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/89e54091 Branch: refs/heads/trunk Commit: 89e54091fd2b55363f3a4917d0883d688c9e8a9c Parents: 621bc0c Author: Sylvain Lebresne <[email protected]> Authored: Mon Jan 9 17:56:00 2012 +0100 Committer: Sylvain Lebresne <[email protected]> Committed: Mon Jan 9 17:56:00 2012 +0100 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../cassandra/db/AbstractColumnContainer.java | 2 +- .../org/apache/cassandra/db/ColumnFamilyStore.java | 13 +++++++------ .../org/apache/cassandra/db/ExpiringColumn.java | 12 ++++++++++++ .../apache/cassandra/db/filter/QueryFilter.java | 2 +- 5 files changed, 22 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 859401a..cadd3cc 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,7 @@ * Reset min/max compaction threshold when creating size tiered compaction strategy (CASSANDRA-3666) * Don't ignore IOException during compaction (CASSANDRA-3655) + * Fix assertion error for CF with gc_grace=0 (CASSANDRA-3579) Merged from 0.8: * avoid logging (harmless) exception when GC takes < 1ms (CASSANDRA-3656) * prevent new nodes from thinking down nodes are up forever (CASSANDRA-3626) http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/AbstractColumnContainer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/AbstractColumnContainer.java b/src/java/org/apache/cassandra/db/AbstractColumnContainer.java index c184f9f..87e75eb 100644 --- a/src/java/org/apache/cassandra/db/AbstractColumnContainer.java +++ b/src/java/org/apache/cassandra/db/AbstractColumnContainer.java @@ -102,7 +102,7 @@ public abstract class AbstractColumnContainer implements IColumnContainer, IIter // Stop if either we don't need to change the deletion info (it's // still MIN_VALUE or not expired yet) or we've succesfully changed it if (current.localDeletionTime == Integer.MIN_VALUE - || current.localDeletionTime > gcBefore + || current.localDeletionTime >= gcBefore || deletionInfo.compareAndSet(current, new DeletionInfo())) { break; http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/ColumnFamilyStore.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 983adc9..151564c 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -823,9 +823,7 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean public static ColumnFamily removeDeletedCF(ColumnFamily cf, int gcBefore) { - // in case of a timestamp tie, tombstones get priority over non-tombstones. - // (we want this to be deterministic to avoid confusion.) - if (cf.getColumnCount() == 0 && cf.getLocalDeletionTime() <= gcBefore) + if (cf.getColumnCount() == 0 && cf.getLocalDeletionTime() < gcBefore) return null; cf.maybeResetDeletionTimes(gcBefore); @@ -867,7 +865,10 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean // remove columns if // (a) the column itself is tombstoned or // (b) the CF is tombstoned and the column is not newer than it - if ((c.isMarkedForDelete() && c.getLocalDeletionTime() <= gcBefore) + // + // Note that we need the inequality below for case (a) to be strict for expiring columns + // to work correctly -- see the comment in ExpiringColumn.isMarkedForDelete(). + if ((c.isMarkedForDelete() && c.getLocalDeletionTime() < gcBefore) || c.timestamp() <= cf.getMarkedForDeleteAt()) { iter.remove(); @@ -893,12 +894,12 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean // (a) the subcolumn itself is tombstoned or // (b) the supercolumn is tombstoned and the subcolumn is not newer than it if (subColumn.timestamp() <= minTimestamp - || (subColumn.isMarkedForDelete() && subColumn.getLocalDeletionTime() <= gcBefore)) + || (subColumn.isMarkedForDelete() && subColumn.getLocalDeletionTime() < gcBefore)) { subIter.remove(); } } - if (c.getSubColumns().isEmpty() && c.getLocalDeletionTime() <= gcBefore) + if (c.getSubColumns().isEmpty() && c.getLocalDeletionTime() < gcBefore) { iter.remove(); } http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/ExpiringColumn.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/ExpiringColumn.java b/src/java/org/apache/cassandra/db/ExpiringColumn.java index cb79590..2340ef0 100644 --- a/src/java/org/apache/cassandra/db/ExpiringColumn.java +++ b/src/java/org/apache/cassandra/db/ExpiringColumn.java @@ -77,6 +77,18 @@ public class ExpiringColumn extends Column @Override public boolean isMarkedForDelete() { + /* + * For compaction, we need to ensure that at all time if + * localExpirationTime < gcbefore, then isMarkedForDelete() == true + * (otherwise LCR may expire columns between it's two phases compaction -- see #3579). + * + * Since during compaction we know that at all time, gcbefore <= now + * (the = is important in case where gc_grace=0), it follows that to + * ensure the propery above we need for isMarkedForDelete to be + * now > localExpirationTime (*not* now >= localExpiration). For the + * same reason, compaction should consider a column tomstoned if + * getLocalDeletionTime() < gcbefore, *not* if getLocalDeletionTime() <= gcbefore. + */ return (int) (System.currentTimeMillis() / 1000 ) > localExpirationTime; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/filter/QueryFilter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/filter/QueryFilter.java b/src/java/org/apache/cassandra/db/filter/QueryFilter.java index c27d636..3405c46 100644 --- a/src/java/org/apache/cassandra/db/filter/QueryFilter.java +++ b/src/java/org/apache/cassandra/db/filter/QueryFilter.java @@ -155,7 +155,7 @@ public class QueryFilter // and if its container is deleted, the column must be changed more recently than the container tombstone (2) // (since otherwise, the only thing repair cares about is the container tombstone) long maxChange = column.mostRecentLiveChangeAt(); - return (!column.isMarkedForDelete() || column.getLocalDeletionTime() > gcBefore || maxChange > column.getMarkedForDeleteAt()) // (1) + return (!column.isMarkedForDelete() || column.getLocalDeletionTime() >= gcBefore || maxChange > column.getMarkedForDeleteAt()) // (1) && (!container.isMarkedForDelete() || maxChange > container.getMarkedForDeleteAt()); // (2) }
