fix race between cleanup and flush on secondary index CFSes patch by yukim and jbellis for CASSANDRA-3712
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/9ca84786 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/9ca84786 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/9ca84786 Branch: refs/heads/cassandra-1.0 Commit: 9ca84786b5be14b0a881268e3649b697f7f893b9 Parents: 4ab6fad Author: Jonathan Ellis <jbel...@apache.org> Authored: Mon Feb 13 16:30:34 2012 -0600 Committer: Jonathan Ellis <jbel...@apache.org> Committed: Mon Feb 13 16:30:34 2012 -0600 ---------------------------------------------------------------------- CHANGES.txt | 1 + src/java/org/apache/cassandra/db/Table.java | 2 +- .../cassandra/db/compaction/CompactionManager.java | 24 ++++++++++----- 3 files changed, 18 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/9ca84786/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 0875da5..500b9fb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 1.0.8 + * fix race between cleanup and flush on secondary index CFSes (CASSANDRA-3712) * avoid including non-queried nodes in rangeslice read repair (CASSANDRA-3843) * Only snapshot CF being compacted for snapshot_before_compaction http://git-wip-us.apache.org/repos/asf/cassandra/blob/9ca84786/src/java/org/apache/cassandra/db/Table.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/Table.java b/src/java/org/apache/cassandra/db/Table.java index 0168f0c..f954fbc 100644 --- a/src/java/org/apache/cassandra/db/Table.java +++ b/src/java/org/apache/cassandra/db/Table.java @@ -71,7 +71,7 @@ public class Table * * (Enabling fairness in the RRWL is observed to decrease throughput, so we leave it off.) */ - static final ReentrantReadWriteLock switchLock = new ReentrantReadWriteLock(); + public static final ReentrantReadWriteLock switchLock = new ReentrantReadWriteLock(); // It is possible to call Table.open without a running daemon, so it makes sense to ensure // proper directories here as well as in CassandraDaemon. http://git-wip-us.apache.org/repos/asf/cassandra/blob/9ca84786/src/java/org/apache/cassandra/db/compaction/CompactionManager.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java index caaf6d2..97e5067 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java @@ -729,14 +729,13 @@ public class CompactionManager implements CompactionManagerMBean } else { - cfs.invalidateCachedRow(row.getKey()); - + if (!indexedColumns.isEmpty() || isCommutative) { if (indexedColumnsInRow != null) indexedColumnsInRow.clear(); - + while (row.hasNext()) { IColumn column = row.next(); @@ -746,13 +745,24 @@ public class CompactionManager implements CompactionManagerMBean { if (indexedColumnsInRow == null) indexedColumnsInRow = new ArrayList<IColumn>(); - + indexedColumnsInRow.add(column); } } - + if (indexedColumnsInRow != null && !indexedColumnsInRow.isEmpty()) - cfs.indexManager.deleteFromIndexes(row.getKey(), indexedColumnsInRow); + { + // acquire memtable lock here because secondary index deletion may cause a race. See CASSANDRA-3712 + Table.switchLock.readLock().lock(); + try + { + cfs.indexManager.deleteFromIndexes(row.getKey(), indexedColumnsInRow); + } + finally + { + Table.switchLock.readLock().unlock(); + } + } } } } @@ -769,7 +779,6 @@ public class CompactionManager implements CompactionManagerMBean { scanner.close(); executor.finishCompaction(ci); - executor.finishCompaction(ci); } List<SSTableReader> results = new ArrayList<SSTableReader>(); @@ -787,7 +796,6 @@ public class CompactionManager implements CompactionManagerMBean // flush to ensure we don't lose the tombstones on a restart, since they are not commitlog'd cfs.indexManager.flushIndexesBlocking(); - cfs.replaceCompactedSSTables(Arrays.asList(sstable), results); }