optimize single-row PK lookups patch by Daniel Norberg; reviewed by jbellis for CASSANDRA-4710
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/9df0ed97 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/9df0ed97 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/9df0ed97 Branch: refs/heads/trunk Commit: 9df0ed97c64847df8264d6c71ea7986b2275aff8 Parents: 2a0389e Author: Jonathan Ellis <[email protected]> Authored: Tue Sep 25 16:36:15 2012 -0500 Committer: Jonathan Ellis <[email protected]> Committed: Tue Sep 25 16:40:24 2012 -0500 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/cassandra/io/sstable/SSTableReader.java | 48 ++++++++++----- 2 files changed, 34 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/9df0ed97/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index c53e556..7314f99 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 1.2-beta2 + * optimize single-row PK lookups (CASSANDRA-4710) * adjust blockFor calculation to account for pending ranges due to node movement (CASSANDRA-833) * Change CQL version to 3.0.0 and stop accepting 3.0.0-beta1 (CASSANDRA-4649) http://git-wip-us.apache.org/repos/asf/cassandra/blob/9df0ed97/src/java/org/apache/cassandra/io/sstable/SSTableReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java index ae49928..7b38bce 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java @@ -750,23 +750,46 @@ public class SSTableReader extends SSTable return null; } - // scan the on-disk index, starting at the nearest sampled position + // scan the on-disk index, starting at the nearest sampled position. + // The check against IndexInterval is to be exit the loop in the EQ case when the key looked for is not present + // (bloom filter false positive). + int i = 0; Iterator<FileDataInput> segments = ifile.iterator(sampledPosition, INDEX_FILE_BUFFER_BYTES); - while (segments.hasNext()) + while (segments.hasNext() && i < DatabaseDescriptor.getIndexInterval()) { FileDataInput input = segments.next(); try { - while (!input.isEOF()) + while (!input.isEOF() && i < DatabaseDescriptor.getIndexInterval()) { - // read key & data position from index entry - DecoratedKey indexDecoratedKey = decodeKey(partitioner, descriptor, ByteBufferUtil.readWithShortLength(input)); - int comparison = indexDecoratedKey.compareTo(key); - int v = op.apply(comparison); - if (v == 0) + i++; + + ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(input); + + boolean opSatisfied; // did we find an appropriate position for the op requested + boolean exactMatch; // is the current position an exact match for the key, suitable for caching + + // Compare raw keys if possible for performance, otherwise compare decorated keys. + if (op == Operator.EQ) + { + opSatisfied = exactMatch = indexKey.equals(((DecoratedKey) key).key); + } + else + { + DecoratedKey indexDecoratedKey = decodeKey(partitioner, descriptor, indexKey); + int comparison = indexDecoratedKey.compareTo(key); + int v = op.apply(comparison); + opSatisfied = (v == 0); + exactMatch = (comparison == 0); + if (v < 0) + return null; + } + + if (opSatisfied) { + // read data position from index entry RowIndexEntry indexEntry = RowIndexEntry.serializer.deserialize(input, descriptor.version); - if (comparison == 0 && keyCache != null && keyCache.getCapacity() > 0 && updateCacheAndStats) + if (exactMatch && keyCache != null && keyCache.getCapacity() > 0 && updateCacheAndStats) { assert key instanceof DecoratedKey; // key can be == to the index key only if it's a true row key DecoratedKey decoratedKey = (DecoratedKey)key; @@ -777,12 +800,7 @@ public class SSTableReader extends SSTable bloomFilterTracker.addTruePositive(); return indexEntry; } - if (v < 0) - { - if (op == Operator.EQ && updateCacheAndStats) - bloomFilterTracker.addFalsePositive(); - return null; - } + RowIndexEntry.serializer.skip(input, descriptor.version); } }
