Author: jbellis
Date: Mon Dec 26 05:21:06 2011
New Revision: 1224679
URL: http://svn.apache.org/viewvc?rev=1224679&view=rev
Log:
Optimize key count estimation when opening sstable on startup
patch by Melvin Wang and jbellis; reviewed by slebresne for CASSANDRA-2988
Modified:
cassandra/trunk/CHANGES.txt
cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
Modified: cassandra/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/cassandra/trunk/CHANGES.txt?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/CHANGES.txt (original)
+++ cassandra/trunk/CHANGES.txt Mon Dec 26 05:21:06 2011
@@ -1,4 +1,6 @@
1.1-dev
+ * Optimize key count estimation when opening sstable on startup
+ (CASSANDRA-2988)
* multi-dc replication optimization supporting CL > ONE (CASSANDRA-3577)
* add command to stop compactions (CASSANDRA-1740, 3566, 3582)
* multithreaded streaming (CASSANDRA-3494)
Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java Mon
Dec 26 05:21:06 2011
@@ -207,25 +207,6 @@ public abstract class SSTable
return components;
}
- /** @return An estimate of the number of keys contained in the given data
file. */
- static long estimateRowsFromData(Descriptor desc, RandomAccessReader
dfile) throws IOException
- {
- // collect sizes for the first 1000 keys, or first 100 megabytes of
data
- final int SAMPLES_CAP = 1000, BYTES_CAP = (int)Math.min(100000000,
dfile.length());
- int keys = 0;
- long dataPosition = 0;
- while (dataPosition < BYTES_CAP && keys < SAMPLES_CAP)
- {
- dfile.seek(dataPosition);
- ByteBufferUtil.skipShortLength(dfile);
- long dataSize = SSTableReader.readRowSize(dfile, desc);
- dataPosition = dfile.getFilePointer() + dataSize;
- keys++;
- }
- dfile.seek(0);
- return dfile.length() / (dataPosition / keys);
- }
-
/** @return An estimate of the number of keys contained in the given index
file. */
static long estimateRowsFromIndex(RandomAccessReader ifile) throws
IOException
{
Modified:
cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
Mon Dec 26 05:21:06 2011
@@ -335,11 +335,14 @@ public class SSTableReader extends SSTab
try
{
long indexSize = input.length();
- long estimatedKeys = SSTable.estimateRowsFromIndex(input);
+ long histogramCount = sstableMetadata.estimatedRowSize.count();
+ long estimatedKeys = histogramCount > 0 &&
!sstableMetadata.estimatedRowSize.isOverflowed()
+ ? histogramCount
+ : SSTable.estimateRowsFromIndex(input); //
statistics is supposed to be optional
indexSummary = new IndexSummary(estimatedKeys);
if (recreatebloom)
- // estimate key count based on index length
bf = LegacyBloomFilter.getFilter(estimatedKeys, 15);
+
while (true)
{
long indexPosition = input.getFilePointer();
Modified:
cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
Mon Dec 26 05:21:06 2011
@@ -183,6 +183,17 @@ public class EstimatedHistogram
}
/**
+ * @return the total number of non-zero values
+ */
+ public long count()
+ {
+ long sum = 0L;
+ for (int i = 0; i < buckets.length(); i++)
+ sum += buckets.get(i);
+ return sum;
+ }
+
+ /**
* @return true if this histogram has overflowed -- that is, a value
larger than our largest bucket could bound was added
*/
public boolean isOverflowed()