LCS defaults to AlwaysPresentFilter
patch by jbellis; reviewed by vijay for CASSANDRA-4876


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/37cf942c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/37cf942c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/37cf942c

Branch: refs/heads/trunk
Commit: 37cf942c1128dfff392ce2947cc4c652a957c318
Parents: a15500e
Author: Jonathan Ellis <[email protected]>
Authored: Sat Oct 27 10:40:44 2012 -0700
Committer: Jonathan Ellis <[email protected]>
Committed: Thu Nov 1 11:29:19 2012 -0500

----------------------------------------------------------------------
 CHANGES.txt                                        |    1 +
 NEWS.txt                                           |    9 ++-
 .../org/apache/cassandra/config/CFMetaData.java    |    9 ++-
 .../cassandra/cql/CreateColumnFamilyStatement.java |    2 +-
 src/java/org/apache/cassandra/db/ColumnIndex.java  |    3 +-
 .../apache/cassandra/io/sstable/SSTableReader.java |    2 +-
 .../apache/cassandra/io/sstable/SSTableWriter.java |   50 ++++++++-------
 .../apache/cassandra/thrift/CassandraServer.java   |    2 -
 .../cassandra/utils/AlwaysPresentFilter.java       |   17 +++++
 .../org/apache/cassandra/utils/FilterFactory.java  |    7 +--
 10 files changed, 62 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 30f68f3..75416d5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 1.2-beta2
+ * fp rate of 1.0 disables BF entirely; LCS defaults to 1.0 (CASSANDRA-4876)
  * off-heap bloom filters for row keys (CASSANDRA_4865)
  * add extension point for sstable components (CASSANDRA-4049)
  * improve tracing output (CASSANDRA-4852, 4862)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 3736a2f..24474cc 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -30,10 +30,11 @@ Upgrading
       upgrade to prevent overcount during commitlog replay (see
       CASSANDRA-4782).  For non-counter uses, drain is not required
       but is a good practice to minimize restart time.
-    - Server clock synchronization is more important in 1.2; replicas
-      will use a coordinator-provided timestamp to determine when a
-      request has timed out and is thus not worth proceeding with.
-      Using a service like NTP is strongly recommended.
+    - Tables using LeveledCompactionStrategy will default to not
+      creating a row-level bloom filter.  The default in older versions
+      of Cassandra differs; you should manually set the false positive
+      rate to 1.0 (to disable) or 0.01 (to enable, if you make many
+      requests for rows that do not exist).
     - The hints schema was changed from 1.1 to 1.2. Cassandra automatically
       snapshots and then truncates the hints column family as part of
       starting up 1.2 for the first time.  Additionally, upgraded nodes

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/config/CFMetaData.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/config/CFMetaData.java 
b/src/java/org/apache/cassandra/config/CFMetaData.java
index 4ea683e..5319fb0 100644
--- a/src/java/org/apache/cassandra/config/CFMetaData.java
+++ b/src/java/org/apache/cassandra/config/CFMetaData.java
@@ -38,6 +38,8 @@ import org.apache.cassandra.cql3.UntypedResultSet;
 import org.apache.cassandra.cql3.statements.CreateColumnFamilyStatement;
 import org.apache.cassandra.db.*;
 import org.apache.cassandra.db.compaction.AbstractCompactionStrategy;
+import org.apache.cassandra.db.compaction.LeveledCompactionStrategy;
+import org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy;
 import org.apache.cassandra.db.index.SecondaryIndex;
 import org.apache.cassandra.db.marshal.*;
 import org.apache.cassandra.exceptions.ConfigurationException;
@@ -73,7 +75,6 @@ public final class CFMetaData
     public final static String DEFAULT_COMPACTION_STRATEGY_CLASS = 
"SizeTieredCompactionStrategy";
     public final static ByteBuffer DEFAULT_KEY_NAME = 
ByteBufferUtil.bytes("KEY");
     public final static Caching DEFAULT_CACHING_STRATEGY = Caching.KEYS_ONLY;
-    public final static Double DEFAULT_BF_FP_CHANCE = 0.01;
 
     // Note that this is the default only for user created tables
     public final static String DEFAULT_COMPRESSOR = 
SnappyCompressor.isAvailable() ? SnappyCompressor.class.getCanonicalName() : 
null;
@@ -550,9 +551,11 @@ public final class CFMetaData
         return superColumnName == null ? comparator : subcolumnComparator;
     }
 
-    public Double getBloomFilterFpChance()
+    public double getBloomFilterFpChance()
     {
-        return bloomFilterFpChance;
+        return bloomFilterFpChance == null
+               ? compactionStrategyClass == LeveledCompactionStrategy.class ? 
1.0 : 0.01
+               : bloomFilterFpChance;
     }
 
     public Caching getCaching()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java 
b/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java
index e0b69a4..fed856f 100644
--- a/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java
+++ b/src/java/org/apache/cassandra/cql/CreateColumnFamilyStatement.java
@@ -192,7 +192,7 @@ public class CreateColumnFamilyStatement
                    
.compactionStrategyOptions(cfProps.compactionStrategyOptions)
                    
.compressionParameters(CompressionParameters.create(cfProps.compressionParameters))
                    
.caching(CFMetaData.Caching.fromString(getPropertyString(CFPropDefs.KW_CACHING, 
CFMetaData.DEFAULT_CACHING_STRATEGY.toString())))
-                   
.bloomFilterFpChance(getPropertyDouble(CFPropDefs.KW_BF_FP_CHANCE, 
CFMetaData.DEFAULT_BF_FP_CHANCE));
+                   
.bloomFilterFpChance(getPropertyDouble(CFPropDefs.KW_BF_FP_CHANCE, null));
 
             // CQL2 can have null keyAliases
             if (keyAlias != null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/db/ColumnIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java 
b/src/java/org/apache/cassandra/db/ColumnIndex.java
index 55244f6..761b004 100644
--- a/src/java/org/apache/cassandra/db/ColumnIndex.java
+++ b/src/java/org/apache/cassandra/db/ColumnIndex.java
@@ -24,6 +24,7 @@ import java.util.*;
 
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.io.sstable.IndexHelper;
+import org.apache.cassandra.utils.AlwaysPresentFilter;
 import org.apache.cassandra.utils.IFilter;
 import org.apache.cassandra.utils.FilterFactory;
 
@@ -32,7 +33,7 @@ public class ColumnIndex
     public final List<IndexHelper.IndexInfo> columnsIndex;
     public final IFilter bloomFilter;
 
-    private static final ColumnIndex EMPTY = new 
ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), 
FilterFactory.emptyFilter());
+    private static final ColumnIndex EMPTY = new 
ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), new 
AlwaysPresentFilter());
 
     private ColumnIndex(int estimatedColumnCount)
     {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java 
b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
index da0e457..bc37d67 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
@@ -322,7 +322,7 @@ public class SSTableReader extends SSTable
     {
         if (!components.contains(Component.FILTER))
         {
-            bf = FilterFactory.emptyFilter();
+            bf = new AlwaysPresentFilter();
             return;
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java 
b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
index ad0421a..3e4656a 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
@@ -62,11 +62,13 @@ public class SSTableWriter extends SSTable
     private static Set<Component> components(CFMetaData metadata)
     {
         Set<Component> components = new 
HashSet<Component>(Arrays.asList(Component.DATA,
-                                                                 
Component.FILTER,
-                                                                 
Component.PRIMARY_INDEX,
-                                                                 
Component.STATS,
-                                                                 
Component.SUMMARY,
-                                                                 
Component.TOC));
+                                                                         
Component.PRIMARY_INDEX,
+                                                                         
Component.STATS,
+                                                                         
Component.SUMMARY,
+                                                                         
Component.TOC));
+
+        if (metadata.getBloomFilterFpChance() < 1.0)
+            components.add(Component.FILTER);
 
         if (metadata.compressionParameters().sstableCompressor != null)
             components.add(Component.COMPRESSION_INFO);
@@ -438,15 +440,14 @@ public class SSTableWriter extends SSTable
             builder = 
SegmentedFile.getBuilder(DatabaseDescriptor.getIndexAccessMode());
             summary = new IndexSummary(keyCount);
 
-            Double fpChance = metadata.getBloomFilterFpChance();
-            if (fpChance != null && fpChance == 0)
+            double fpChance = metadata.getBloomFilterFpChance();
+            if (fpChance == 0)
             {
                 // paranoia -- we've had bugs in the thrift <-> avro <-> CfDef 
dance before, let's not let that break things
                 logger.error("Bloom filter FP chance of zero isn't supposed to 
happen");
-                fpChance = null;
+                fpChance = 0.01;
             }
-            bf = fpChance == null ? FilterFactory.getFilter(keyCount, 15, true)
-                                  : FilterFactory.getFilter(keyCount, 
fpChance, true);
+            bf = FilterFactory.getFilter(keyCount, fpChance, true);
         }
 
         public void append(DecoratedKey key, RowIndexEntry indexEntry)
@@ -475,20 +476,23 @@ public class SSTableWriter extends SSTable
          */
         public void close()
         {
-            String path = descriptor.filenameFor(SSTable.COMPONENT_FILTER);
-            try
-            {
-                // bloom filter
-                FileOutputStream fos = new FileOutputStream(path);
-                DataOutputStream stream = new DataOutputStream(fos);
-                FilterFactory.serialize(bf, stream, 
descriptor.version.filterType);
-                stream.flush();
-                fos.getFD().sync();
-                stream.close();
-            }
-            catch (IOException e)
+            if (components.contains(Component.FILTER))
             {
-                throw new FSWriteError(e, path);
+                String path = descriptor.filenameFor(SSTable.COMPONENT_FILTER);
+                try
+                {
+                    // bloom filter
+                    FileOutputStream fos = new FileOutputStream(path);
+                    DataOutputStream stream = new DataOutputStream(fos);
+                    FilterFactory.serialize(bf, stream, 
descriptor.version.filterType);
+                    stream.flush();
+                    fos.getFD().sync();
+                    stream.close();
+                }
+                catch (IOException e)
+                {
+                    throw new FSWriteError(e, path);
+                }
             }
 
             // index

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/thrift/CassandraServer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/thrift/CassandraServer.java 
b/src/java/org/apache/cassandra/thrift/CassandraServer.java
index 980b740..393b5f1 100644
--- a/src/java/org/apache/cassandra/thrift/CassandraServer.java
+++ b/src/java/org/apache/cassandra/thrift/CassandraServer.java
@@ -1305,8 +1305,6 @@ public class CassandraServer implements Cassandra.Iface
             state().hasColumnFamilyAccess(cf_def.name, Permission.CREATE);
             cf_def.unsetId(); // explicitly ignore any id set by client 
(Hector likes to set zero)
             CFMetaData cfm = CFMetaData.fromThrift(cf_def);
-            if (cfm.getBloomFilterFpChance() == null)
-                cfm.bloomFilterFpChance(CFMetaData.DEFAULT_BF_FP_CHANCE);
             cfm.addDefaultIndexNames();
             MigrationManager.announceNewColumnFamily(cfm);
             return Schema.instance.getVersion().toString();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java 
b/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
new file mode 100644
index 0000000..723536d
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
@@ -0,0 +1,17 @@
+package org.apache.cassandra.utils;
+
+import java.nio.ByteBuffer;
+
+public class AlwaysPresentFilter implements IFilter
+{
+    public boolean isPresent(ByteBuffer key)
+    {
+        return true;
+    }
+
+    public void add(ByteBuffer key) { }
+
+    public void clear() { }
+
+    public void close() throws IOException { }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/37cf942c/src/java/org/apache/cassandra/utils/FilterFactory.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/FilterFactory.java 
b/src/java/org/apache/cassandra/utils/FilterFactory.java
index a947a2f..88c8973 100644
--- a/src/java/org/apache/cassandra/utils/FilterFactory.java
+++ b/src/java/org/apache/cassandra/utils/FilterFactory.java
@@ -130,6 +130,8 @@ public class FilterFactory
     static IFilter getFilter(long numElements, double maxFalsePosProbability, 
Type type, boolean offheap)
     {
         assert maxFalsePosProbability <= 1.0 : "Invalid probability";
+        if (maxFalsePosProbability == 1.0)
+            return new AlwaysPresentFilter();
         int bucketsPerElement = 
BloomCalculations.maxBucketsPerElement(numElements);
         BloomCalculations.BloomSpecification spec = 
BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
         return createFilter(spec.K, numElements, spec.bucketsPerElement, type, 
offheap);
@@ -147,9 +149,4 @@ public class FilterFactory
               return new Murmur3BloomFilter(hash, bitset);
         }
     }
-
-    public static BloomFilter emptyFilter()
-    {
-        return new Murmur3BloomFilter(0, new OpenBitSet(BITSET_EXCESS));
-    }
 }

Reply via email to