Merge branch 'cassandra-1.1' into cassandra-1.2 Conflicts: src/java/org/apache/cassandra/db/compaction/CompactionTask.java
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/8d9510ae Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/8d9510ae Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/8d9510ae Branch: refs/heads/trunk Commit: 8d9510ae40b22b5874fd16259c5c3c8a184ccb8d Parents: 18a1a4b 3cc8656 Author: Yuki Morishita <yu...@apache.org> Authored: Fri Jan 11 12:56:24 2013 -0600 Committer: Yuki Morishita <yu...@apache.org> Committed: Fri Jan 11 12:56:24 2013 -0600 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../org/apache/cassandra/db/ColumnFamilyStore.java | 35 ++++++++++----- .../cassandra/db/compaction/CompactionTask.java | 33 ++++++++------ 3 files changed, 44 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/8d9510ae/CHANGES.txt ---------------------------------------------------------------------- diff --cc CHANGES.txt index b34a97c,6c76151..3dfc756 --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -41,158 -9,30 +41,159 @@@ Merged from 1.1 * fix user defined compaction to run against 1.1 data directory (CASSANDRA-5118) * Fix CQL3 BATCH authorization caching (CASSANDRA-5145) * fix get_count returns incorrect value with TTL (CASSANDRA-5099) + * better handling for amid compaction failure (CASSANDRA-5137) -1.1.8 - * reset getRangeSlice filter after finishing a row for get_paged_slice - (CASSANDRA-4919) +1.2.0 + * Disallow counters in collections (CASSANDRA-5082) + * cqlsh: add unit tests (CASSANDRA-3920) + * fix default bloom_filter_fp_chance for LeveledCompactionStrategy (CASSANDRA-5093) +Merged from 1.1: + * add validation for get_range_slices with start_key and end_token (CASSANDRA-5089) + + +1.2.0-rc2 + * fix nodetool ownership display with vnodes (CASSANDRA-5065) + * cqlsh: add DESCRIBE KEYSPACES command (CASSANDRA-5060) + * Fix potential infinite loop when reloading CFS (CASSANDRA-5064) + * Fix SimpleAuthorizer example (CASSANDRA-5072) + * cqlsh: force CL.ONE for tracing and system.schema* queries (CASSANDRA-5070) + * Includes cassandra-shuffle in the debian package (CASSANDRA-5058) +Merged from 1.1: + * fix multithreaded compaction deadlock (CASSANDRA-4492) * fix temporarily missing schema after upgrade from pre-1.1.5 (CASSANDRA-5061) + * Fix ALTER TABLE overriding compression options with defaults + (CASSANDRA-4996, 5066) + * fix specifying and altering crc_check_chance (CASSANDRA-5053) + * fix Murmur3Partitioner ownership% calculation (CASSANDRA-5076) + * Don't expire columns sooner than they should in 2ndary indexes (CASSANDRA-5079) + + +1.2-rc1 + * rename rpc_timeout settings to request_timeout (CASSANDRA-5027) + * add BF with 0.1 FP to LCS by default (CASSANDRA-5029) + * Fix preparing insert queries (CASSANDRA-5016) + * Fix preparing queries with counter increment (CASSANDRA-5022) + * Fix preparing updates with collections (CASSANDRA-5017) + * Don't generate UUID based on other node address (CASSANDRA-5002) + * Fix message when trying to alter a clustering key type (CASSANDRA-5012) + * Update IAuthenticator to match the new IAuthorizer (CASSANDRA-5003) + * Fix inserting only a key in CQL3 (CASSANDRA-5040) + * Fix CQL3 token() function when used with strings (CASSANDRA-5050) +Merged from 1.1: * reduce log spam from invalid counter shards (CASSANDRA-5026) * Improve schema propagation performance (CASSANDRA-5025) - * Fall back to old describe_splits if d_s_ex is not available (CASSANDRA-4803) - * Improve error reporting when streaming ranges fail (CASSANDRA-5009) + * Fix for IndexHelper.IndexFor throws OOB Exception (CASSANDRA-5030) + * cqlsh: make it possible to describe thrift CFs (CASSANDRA-4827) * cqlsh: fix timestamp formatting on some platforms (CASSANDRA-5046) - * Fix ALTER TABLE overriding compression options with defaults (CASSANDRA-4996, 5066) - * Avoid error opening data file on startup (CASSANDRA-4984) - * Fix wrong index_options in cli 'show schema' (CASSANDRA-5008) - * Allow overriding number of available processor (CASSANDRA-4790) -1.1.7 - * cqlsh: improve COPY FROM performance (CASSANDRA-4921) +1.2-beta3 + * make consistency level configurable in cqlsh (CASSANDRA-4829) + * fix cqlsh rendering of blob fields (CASSANDRA-4970) + * fix cqlsh DESCRIBE command (CASSANDRA-4913) + * save truncation position in system table (CASSANDRA-4906) + * Move CompressionMetadata off-heap (CASSANDRA-4937) + * allow CLI to GET cql3 columnfamily data (CASSANDRA-4924) + * Fix rare race condition in getExpireTimeForEndpoint (CASSANDRA-4402) + * acquire references to overlapping sstables during compaction so bloom filter + doesn't get free'd prematurely (CASSANDRA-4934) + * Don't share slice query filter in CQL3 SelectStatement (CASSANDRA-4928) + * Separate tracing from Log4J (CASSANDRA-4861) + * Exclude gcable tombstones from merkle-tree computation (CASSANDRA-4905) + * Better printing of AbstractBounds for tracing (CASSANDRA-4931) + * Optimize mostRecentTombstone check in CC.collectAllData (CASSANDRA-4883) + * Change stream session ID to UUID to avoid collision from same node (CASSANDRA-4813) + * Use Stats.db when bulk loading if present (CASSANDRA-4957) + * Skip repair on system_trace and keyspaces with RF=1 (CASSANDRA-4956) + * (cql3) Remove arbitrary SELECT limit (CASSANDRA-4918) + * Correctly handle prepared operation on collections (CASSANDRA-4945) + * Fix CQL3 LIMIT (CASSANDRA-4877) + * Fix Stress for CQL3 (CASSANDRA-4979) + * Remove cassandra specific exceptions from JMX interface (CASSANDRA-4893) + * (CQL3) Force using ALLOW FILTERING on potentially inefficient queries (CASSANDRA-4915) + * (cql3) Fix adding column when the table has collections (CASSANDRA-4982) + * (cql3) Fix allowing collections with compact storage (CASSANDRA-4990) + * (cql3) Refuse ttl/writetime function on collections (CASSANDRA-4992) + * Replace IAuthority with new IAuthorizer (CASSANDRA-4874) + * clqsh: fix KEY pseudocolumn escaping when describing Thrift tables + in CQL3 mode (CASSANDRA-4955) * add basic authentication support for Pig CassandraStorage (CASSANDRA-3042) * fix CQL2 ALTER TABLE compaction_strategy_class altering (CASSANDRA-4965) +Merged from 1.1: + * Fall back to old describe_splits if d_s_ex is not available (CASSANDRA-4803) + * Improve error reporting when streaming ranges fail (CASSANDRA-5009) + * Fix cqlsh timestamp formatting of timezone info (CASSANDRA-4746) + * Fix assertion failure with leveled compaction (CASSANDRA-4799) + * Check for null end_token in get_range_slice (CASSANDRA-4804) + * Remove all remnants of removed nodes (CASSANDRA-4840) + * Add aut-reloading of the log4j file in debian package (CASSANDRA-4855) + * Fix estimated row cache entry size (CASSANDRA-4860) + * reset getRangeSlice filter after finishing a row for get_paged_slice + (CASSANDRA-4919) * expunge row cache post-truncate (CASSANDRA-4940) - * remove IAuthority2 (CASSANDRA-4875) + * Allow static CF definition with compact storage (CASSANDRA-4910) + * Fix endless loop/compaction of schema_* CFs due to broken timestamps (CASSANDRA-4880) + * Fix 'wrong class type' assertion in CounterColumn (CASSANDRA-4976) + + +1.2-beta2 + * fp rate of 1.0 disables BF entirely; LCS defaults to 1.0 (CASSANDRA-4876) + * off-heap bloom filters for row keys (CASSANDRA_4865) + * add extension point for sstable components (CASSANDRA-4049) + * improve tracing output (CASSANDRA-4852, 4862) + * make TRACE verb droppable (CASSANDRA-4672) + * fix BulkLoader recognition of CQL3 columnfamilies (CASSANDRA-4755) + * Sort commitlog segments for replay by id instead of mtime (CASSANDRA-4793) + * Make hint delivery asynchronous (CASSANDRA-4761) + * Pluggable Thrift transport factories for CLI and cqlsh (CASSANDRA-4609, 4610) + * cassandra-cli: allow Double value type to be inserted to a column (CASSANDRA-4661) + * Add ability to use custom TServerFactory implementations (CASSANDRA-4608) + * optimize batchlog flushing to skip successful batches (CASSANDRA-4667) + * include metadata for system keyspace itself in schema tables (CASSANDRA-4416) + * add check to PropertyFileSnitch to verify presence of location for + local node (CASSANDRA-4728) + * add PBSPredictor consistency modeler (CASSANDRA-4261) + * remove vestiges of Thrift unframed mode (CASSANDRA-4729) + * optimize single-row PK lookups (CASSANDRA-4710) + * adjust blockFor calculation to account for pending ranges due to node + movement (CASSANDRA-833) + * Change CQL version to 3.0.0 and stop accepting 3.0.0-beta1 (CASSANDRA-4649) + * (CQL3) Make prepared statement global instead of per connection + (CASSANDRA-4449) + * Fix scrubbing of CQL3 created tables (CASSANDRA-4685) + * (CQL3) Fix validation when using counter and regular columns in the same + table (CASSANDRA-4706) + * Fix bug starting Cassandra with simple authentication (CASSANDRA-4648) + * Add support for batchlog in CQL3 (CASSANDRA-4545, 4738) + * Add support for multiple column family outputs in CFOF (CASSANDRA-4208) + * Support repairing only the local DC nodes (CASSANDRA-4747) + * Use rpc_address for binary protocol and change default port (CASSANRA-4751) + * Fix use of collections in prepared statements (CASSANDRA-4739) + * Store more information into peers table (CASSANDRA-4351, 4814) + * Configurable bucket size for size tiered compaction (CASSANDRA-4704) + * Run leveled compaction in parallel (CASSANDRA-4310) + * Fix potential NPE during CFS reload (CASSANDRA-4786) + * Composite indexes may miss results (CASSANDRA-4796) + * Move consistency level to the protocol level (CASSANDRA-4734, 4824) + * Fix Subcolumn slice ends not respected (CASSANDRA-4826) + * Fix Assertion error in cql3 select (CASSANDRA-4783) + * Fix list prepend logic (CQL3) (CASSANDRA-4835) + * Add booleans as literals in CQL3 (CASSANDRA-4776) + * Allow renaming PK columns in CQL3 (CASSANDRA-4822) + * Fix binary protocol NEW_NODE event (CASSANDRA-4679) + * Fix potential infinite loop in tombstone compaction (CASSANDRA-4781) + * Remove system tables accounting from schema (CASSANDRA-4850) + * (cql3) Force provided columns in clustering key order in + 'CLUSTERING ORDER BY' (CASSANDRA-4881) + * Fix composite index bug (CASSANDRA-4884) + * Fix short read protection for CQL3 (CASSANDRA-4882) + * Add tracing support to the binary protocol (CASSANDRA-4699) + * (cql3) Don't allow prepared marker inside collections (CASSANDRA-4890) + * Re-allow order by on non-selected columns (CASSANDRA-4645) + * Bug when composite index is created in a table having collections (CASSANDRA-4909) + * log index scan subject in CompositesSearcher (CASSANDRA-4904) +Merged from 1.1: * add get[Row|Key]CacheEntries to CacheServiceMBean (CASSANDRA-4859) * fix get_paged_slice to wrap to next row correctly (CASSANDRA-4816) * fix indexing empty column values (CASSANDRA-4832) http://git-wip-us.apache.org/repos/asf/cassandra/blob/8d9510ae/src/java/org/apache/cassandra/db/ColumnFamilyStore.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 44585ce,2781800..b01545a --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@@ -261,28 -238,41 +261,41 @@@ public class ColumnFamilyStore implemen if (loadSSTables) { - Directories.SSTableLister sstableFiles = directories.sstableLister().skipCompacted(true).skipTemporary(true); - Collection<SSTableReader> sstables = SSTableReader.batchOpen(sstableFiles.list().entrySet(), savedKeys, data, metadata, this.partitioner); + Directories.SSTableLister sstableFiles = directories.sstableLister().skipTemporary(true); + Collection<SSTableReader> sstables = SSTableReader.batchOpen(sstableFiles.list().entrySet(), metadata, this.partitioner); - // Filter non-compacted sstables, remove compacted ones - Set<Integer> compactedSSTables = new HashSet<Integer>(); - for (SSTableReader sstable : sstables) - compactedSSTables.addAll(sstable.getAncestors()); - - Set<SSTableReader> liveSSTables = new HashSet<SSTableReader>(); - for (SSTableReader sstable : sstables) + if (metadata.getDefaultValidator().isCommutative()) + { + // Filter non-compacted sstables, remove compacted ones + Set<Integer> compactedSSTables = new HashSet<Integer>(); + for (SSTableReader sstable : sstables) + compactedSSTables.addAll(sstable.getAncestors()); + + Set<SSTableReader> liveSSTables = new HashSet<SSTableReader>(); + for (SSTableReader sstable : sstables) + { + if (compactedSSTables.contains(sstable.descriptor.generation)) + { + logger.info("{} is already compacted and will be removed.", sstable); + sstable.markCompacted(); // we need to mark as compacted to be deleted + sstable.releaseReference(); // this amount to deleting the sstable + } + else + { + liveSSTables.add(sstable); + } + } + data.addInitialSSTables(liveSSTables); + } + else { - if (compactedSSTables.contains(sstable.descriptor.generation)) - sstable.releaseReference(); // this amount to deleting the sstable - else - liveSSTables.add(sstable); + data.addInitialSSTables(sstables); } - data.addInitialSSTables(liveSSTables); } + if (caching == Caching.ALL || caching == Caching.KEYS_ONLY) + CacheService.instance.keyCache.loadSaved(this); + // compaction strategy should be created after the CFS has been prepared this.compactionStrategy = metadata.createCompactionStrategyInstance(this); http://git-wip-us.apache.org/repos/asf/cassandra/blob/8d9510ae/src/java/org/apache/cassandra/db/compaction/CompactionTask.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/db/compaction/CompactionTask.java index 7168280,714e308..bd1d58d --- a/src/java/org/apache/cassandra/db/compaction/CompactionTask.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionTask.java @@@ -35,12 -31,10 +35,10 @@@ import org.slf4j.LoggerFactory import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.RowIndexEntry; import org.apache.cassandra.db.compaction.CompactionManager.CompactionExecutorStatsCollector; - import org.apache.cassandra.io.sstable.SSTable; - import org.apache.cassandra.io.sstable.SSTableReader; - import org.apache.cassandra.io.sstable.SSTableWriter; + import org.apache.cassandra.io.sstable.*; import org.apache.cassandra.utils.CloseableIterator; -import org.apache.cassandra.utils.FBUtilities; public class CompactionTask extends AbstractCompactionTask { @@@ -137,7 -125,7 +135,7 @@@ // we can't preheat until the tracker has been set. This doesn't happen until we tell the cfs to // replace the old entries. Track entries to preheat here until then. - Map<SSTableReader, Map<DecoratedKey, RowIndexEntry>> cachedKeyMap = new HashMap<SSTableReader, Map<DecoratedKey, RowIndexEntry>>(); - Map<Descriptor, Map<DecoratedKey, Long>> cachedKeyMap = new HashMap<Descriptor, Map<DecoratedKey, Long>>(); ++ Map<Descriptor, Map<DecoratedKey, RowIndexEntry>> cachedKeyMap = new HashMap<Descriptor, Map<DecoratedKey, RowIndexEntry>>(); Collection<SSTableReader> sstables = new ArrayList<SSTableReader>(); Collection<SSTableWriter> writers = new ArrayList<SSTableWriter>(); @@@ -188,34 -171,34 +186,42 @@@ } } } - if (!nni.hasNext() || newSSTableSegmentThresholdReached(writer)) + + if (newSSTableSegmentThresholdReached(writer)) { - SSTableReader sstable = writer.closeAndOpenReader(getMaxDataAge(toCompact)); - cachedKeyMap.put(sstable, cachedKeys); - sstables.add(sstable); + // tmp = false because later we want to query it with descriptor from SSTableReader + cachedKeyMap.put(writer.descriptor.asTemporary(false), cachedKeys); - if (nni.hasNext()) - { - writer = cfs.createCompactionWriter(keysPerSSTable, compactionFileLocation, toCompact); - writers.add(writer); - cachedKeys = new HashMap<DecoratedKey, Long>(); - } + writer = cfs.createCompactionWriter(keysPerSSTable, cfs.directories.getLocationForDisk(dataDirectory), toCompact); + writers.add(writer); + cachedKeys = new HashMap<DecoratedKey, RowIndexEntry>(); } } + if (writer.getFilePointer() > 0) + { - SSTableReader sstable = writer.closeAndOpenReader(getMaxDataAge(toCompact)); - cachedKeyMap.put(sstable, cachedKeys); - sstables.add(sstable); ++ cachedKeyMap.put(writer.descriptor.asTemporary(false), cachedKeys); + } + else + { + writer.abort(); ++ writers.remove(writer); + } ++ + long maxAge = getMaxDataAge(toCompact); + for (SSTableWriter completedWriter : writers) + sstables.add(completedWriter.closeAndOpenReader(maxAge)); } - catch (Exception e) + catch (Throwable t) { for (SSTableWriter writer : writers) writer.abort(); + // also remove already completed SSTables + for (SSTableReader sstable : sstables) + { + sstable.markCompacted(); + sstable.releaseReference(); + } - throw FBUtilities.unchecked(e); + throw Throwables.propagate(t); } finally { @@@ -236,14 -209,12 +242,13 @@@ cfs.replaceCompactedSSTables(toCompact, sstables, compactionType); // TODO: this doesn't belong here, it should be part of the reader to load when the tracker is wired up - for (Map.Entry<SSTableReader, Map<DecoratedKey, RowIndexEntry>> ssTableReaderMapEntry : cachedKeyMap.entrySet()) + for (SSTableReader sstable : sstables) { - SSTableReader key = ssTableReaderMapEntry.getKey(); - for (Map.Entry<DecoratedKey, RowIndexEntry> entry : ssTableReaderMapEntry.getValue().entrySet()) - key.cacheKey(entry.getKey(), entry.getValue()); - for (Map.Entry<DecoratedKey, Long> entry : cachedKeyMap.get(sstable.descriptor).entrySet()) ++ for (Map.Entry<DecoratedKey, RowIndexEntry> entry : cachedKeyMap.get(sstable.descriptor).entrySet()) + sstable.cacheKey(entry.getKey(), entry.getValue()); } + // log a bunch of statistics about the result long dTime = System.currentTimeMillis() - startTime; long startsize = SSTable.getTotalBytes(toCompact); long endsize = SSTable.getTotalBytes(sstables);