LCS defaults to AlwaysPresentFilter patch by jbellis; reviewed by vijay for CASSANDRA-4876
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/37cf942c Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/37cf942c Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/37cf942c Branch: refs/heads/trunk Commit: 37cf942c1128dfff392ce2947cc4c652a957c318 Parents: a15500e Author: Jonathan Ellis <[email protected]> Authored: Sat Oct 27 10:40:44 2012 -0700 Committer: Jonathan Ellis <[email protected]> Committed: Thu Nov 1 11:29:19 2012 -0500 ---------------------------------------------------------------------- CHANGES.txt | 1 + NEWS.txt | 9 ++- .../org/apache/cassandra/config/CFMetaData.java | 9 ++- .../cassandra/cql/CreateColumnFamilyStatement.java | 2 +- src/java/org/apache/cassandra/db/ColumnIndex.java | 3 +- .../apache/cassandra/io/sstable/SSTableReader.java | 2 +- .../apache/cassandra/io/sstable/SSTableWriter.java | 50 ++++++++------- .../apache/cassandra/thrift/CassandraServer.java | 2 - .../cassandra/utils/AlwaysPresentFilter.java | 17 +++++ .../org/apache/cassandra/utils/FilterFactory.java | 7 +-- 10 files changed, 62 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 30f68f3..75416d5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 1.2-beta2 + * fp rate of 1.0 disables BF entirely; LCS defaults to 1.0 (CASSANDRA-4876) * off-heap bloom filters for row keys (CASSANDRA_4865) * add extension point for sstable components (CASSANDRA-4049) * improve tracing output (CASSANDRA-4852, 4862) http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/NEWS.txt ---------------------------------------------------------------------- diff --git a/NEWS.txt b/NEWS.txt index 3736a2f..24474cc 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -30,10 +30,11 @@ Upgrading upgrade to prevent overcount during commitlog replay (see CASSANDRA-4782). For non-counter uses, drain is not required but is a good practice to minimize restart time. - - Server clock synchronization is more important in 1.2; replicas - will use a coordinator-provided timestamp to determine when a - request has timed out and is thus not worth proceeding with. - Using a service like NTP is strongly recommended. + - Tables using LeveledCompactionStrategy will default to not + creating a row-level bloom filter. The default in older versions + of Cassandra differs; you should manually set the false positive + rate to 1.0 (to disable) or 0.01 (to enable, if you make many + requests for rows that do not exist). - The hints schema was changed from 1.1 to 1.2. Cassandra automatically snapshots and then truncates the hints column family as part of starting up 1.2 for the first time. Additionally, upgraded nodes http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/config/CFMetaData.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/config/CFMetaData.java b/src/java/org/apache/cassandra/config/CFMetaData.java index 4ea683e..5319fb0 100644 --- a/src/java/org/apache/cassandra/config/CFMetaData.java +++ b/src/java/org/apache/cassandra/config/CFMetaData.java @@ -38,6 +38,8 @@ import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.cql3.statements.CreateColumnFamilyStatement; import org.apache.cassandra.db.*; import org.apache.cassandra.db.compaction.AbstractCompactionStrategy; +import org.apache.cassandra.db.compaction.LeveledCompactionStrategy; +import org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy; import org.apache.cassandra.db.index.SecondaryIndex; import org.apache.cassandra.db.marshal.*; import org.apache.cassandra.exceptions.ConfigurationException; @@ -73,7 +75,6 @@ public final class CFMetaData public final static String DEFAULT_COMPACTION_STRATEGY_CLASS = "SizeTieredCompactionStrategy"; public final static ByteBuffer DEFAULT_KEY_NAME = ByteBufferUtil.bytes("KEY"); public final static Caching DEFAULT_CACHING_STRATEGY = Caching.KEYS_ONLY; - public final static Double DEFAULT_BF_FP_CHANCE = 0.01; // Note that this is the default only for user created tables public final static String DEFAULT_COMPRESSOR = SnappyCompressor.isAvailable() ? SnappyCompressor.class.getCanonicalName() : null; @@ -550,9 +551,11 @@ public final class CFMetaData return superColumnName == null ? comparator : subcolumnComparator; } - public Double getBloomFilterFpChance() + public double getBloomFilterFpChance() { - return bloomFilterFpChance; + return bloomFilterFpChance == null + ? compactionStrategyClass == LeveledCompactionStrategy.class ? 1.0 : 0.01 + : bloomFilterFpChance; } public Caching getCaching() http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java b/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java index e0b69a4..fed856f 100644 --- a/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java +++ b/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java @@ -192,7 +192,7 @@ public class CreateColumnFamilyStatement .compactionStrategyOptions(cfProps.compactionStrategyOptions) .compressionParameters(CompressionParameters.create(cfProps.compressionParameters)) .caching(CFMetaData.Caching.fromString(getPropertyString(CFPropDefs.KW_CACHING, CFMetaData.DEFAULT_CACHING_STRATEGY.toString()))) - .bloomFilterFpChance(getPropertyDouble(CFPropDefs.KW_BF_FP_CHANCE, CFMetaData.DEFAULT_BF_FP_CHANCE)); + .bloomFilterFpChance(getPropertyDouble(CFPropDefs.KW_BF_FP_CHANCE, null)); // CQL2 can have null keyAliases if (keyAlias != null) http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/db/ColumnIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java b/src/java/org/apache/cassandra/db/ColumnIndex.java index 55244f6..761b004 100644 --- a/src/java/org/apache/cassandra/db/ColumnIndex.java +++ b/src/java/org/apache/cassandra/db/ColumnIndex.java @@ -24,6 +24,7 @@ import java.util.*; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.sstable.IndexHelper; +import org.apache.cassandra.utils.AlwaysPresentFilter; import org.apache.cassandra.utils.IFilter; import org.apache.cassandra.utils.FilterFactory; @@ -32,7 +33,7 @@ public class ColumnIndex public final List<IndexHelper.IndexInfo> columnsIndex; public final IFilter bloomFilter; - private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), FilterFactory.emptyFilter()); + private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), new AlwaysPresentFilter()); private ColumnIndex(int estimatedColumnCount) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/io/sstable/SSTableReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java index da0e457..bc37d67 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java @@ -322,7 +322,7 @@ public class SSTableReader extends SSTable { if (!components.contains(Component.FILTER)) { - bf = FilterFactory.emptyFilter(); + bf = new AlwaysPresentFilter(); return; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java index ad0421a..3e4656a 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java @@ -62,11 +62,13 @@ public class SSTableWriter extends SSTable private static Set<Component> components(CFMetaData metadata) { Set<Component> components = new HashSet<Component>(Arrays.asList(Component.DATA, - Component.FILTER, - Component.PRIMARY_INDEX, - Component.STATS, - Component.SUMMARY, - Component.TOC)); + Component.PRIMARY_INDEX, + Component.STATS, + Component.SUMMARY, + Component.TOC)); + + if (metadata.getBloomFilterFpChance() < 1.0) + components.add(Component.FILTER); if (metadata.compressionParameters().sstableCompressor != null) components.add(Component.COMPRESSION_INFO); @@ -438,15 +440,14 @@ public class SSTableWriter extends SSTable builder = SegmentedFile.getBuilder(DatabaseDescriptor.getIndexAccessMode()); summary = new IndexSummary(keyCount); - Double fpChance = metadata.getBloomFilterFpChance(); - if (fpChance != null && fpChance == 0) + double fpChance = metadata.getBloomFilterFpChance(); + if (fpChance == 0) { // paranoia -- we've had bugs in the thrift <-> avro <-> CfDef dance before, let's not let that break things logger.error("Bloom filter FP chance of zero isn't supposed to happen"); - fpChance = null; + fpChance = 0.01; } - bf = fpChance == null ? FilterFactory.getFilter(keyCount, 15, true) - : FilterFactory.getFilter(keyCount, fpChance, true); + bf = FilterFactory.getFilter(keyCount, fpChance, true); } public void append(DecoratedKey key, RowIndexEntry indexEntry) @@ -475,20 +476,23 @@ public class SSTableWriter extends SSTable */ public void close() { - String path = descriptor.filenameFor(SSTable.COMPONENT_FILTER); - try - { - // bloom filter - FileOutputStream fos = new FileOutputStream(path); - DataOutputStream stream = new DataOutputStream(fos); - FilterFactory.serialize(bf, stream, descriptor.version.filterType); - stream.flush(); - fos.getFD().sync(); - stream.close(); - } - catch (IOException e) + if (components.contains(Component.FILTER)) { - throw new FSWriteError(e, path); + String path = descriptor.filenameFor(SSTable.COMPONENT_FILTER); + try + { + // bloom filter + FileOutputStream fos = new FileOutputStream(path); + DataOutputStream stream = new DataOutputStream(fos); + FilterFactory.serialize(bf, stream, descriptor.version.filterType); + stream.flush(); + fos.getFD().sync(); + stream.close(); + } + catch (IOException e) + { + throw new FSWriteError(e, path); + } } // index http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/thrift/CassandraServer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/thrift/CassandraServer.java b/src/java/org/apache/cassandra/thrift/CassandraServer.java index 980b740..393b5f1 100644 --- a/src/java/org/apache/cassandra/thrift/CassandraServer.java +++ b/src/java/org/apache/cassandra/thrift/CassandraServer.java @@ -1305,8 +1305,6 @@ public class CassandraServer implements Cassandra.Iface state().hasColumnFamilyAccess(cf_def.name, Permission.CREATE); cf_def.unsetId(); // explicitly ignore any id set by client (Hector likes to set zero) CFMetaData cfm = CFMetaData.fromThrift(cf_def); - if (cfm.getBloomFilterFpChance() == null) - cfm.bloomFilterFpChance(CFMetaData.DEFAULT_BF_FP_CHANCE); cfm.addDefaultIndexNames(); MigrationManager.announceNewColumnFamily(cfm); return Schema.instance.getVersion().toString(); http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java b/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java new file mode 100644 index 0000000..723536d --- /dev/null +++ b/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java @@ -0,0 +1,17 @@ +package org.apache.cassandra.utils; + +import java.nio.ByteBuffer; + +public class AlwaysPresentFilter implements IFilter +{ + public boolean isPresent(ByteBuffer key) + { + return true; + } + + public void add(ByteBuffer key) { } + + public void clear() { } + + public void close() throws IOException { } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/utils/FilterFactory.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/FilterFactory.java b/src/java/org/apache/cassandra/utils/FilterFactory.java index a947a2f..88c8973 100644 --- a/src/java/org/apache/cassandra/utils/FilterFactory.java +++ b/src/java/org/apache/cassandra/utils/FilterFactory.java @@ -130,6 +130,8 @@ public class FilterFactory static IFilter getFilter(long numElements, double maxFalsePosProbability, Type type, boolean offheap) { assert maxFalsePosProbability <= 1.0 : "Invalid probability"; + if (maxFalsePosProbability == 1.0) + return new AlwaysPresentFilter(); int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements); BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability); return createFilter(spec.K, numElements, spec.bucketsPerElement, type, offheap); @@ -147,9 +149,4 @@ public class FilterFactory return new Murmur3BloomFilter(hash, bitset); } } - - public static BloomFilter emptyFilter() - { - return new Murmur3BloomFilter(0, new OpenBitSet(BITSET_EXCESS)); - } }
