Revert "Always record row-level tombstones in index component"

This reverts commit 798470e051af794b605cce28031b33b589cfc6d8.


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/17034c08
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/17034c08
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/17034c08

Branch: refs/heads/trunk
Commit: 17034c0878659ea99e24b665c71e0b454f21809e
Parents: 798470e
Author: Jonathan Ellis <jbel...@apache.org>
Authored: Fri Apr 19 13:26:27 2013 -0500
Committer: Jonathan Ellis <jbel...@apache.org>
Committed: Fri Apr 19 13:27:45 2013 -0500

----------------------------------------------------------------------
 CHANGES.txt                                        |    1 -
 build.xml                                          |    2 +-
 src/java/org/apache/cassandra/db/ColumnIndex.java  |    5 +-
 .../org/apache/cassandra/db/RowIndexEntry.java     |   85 +++++-----
 .../db/columniterator/IndexedSliceReader.java      |   93 +++++-----
 .../db/columniterator/SSTableNamesIterator.java    |  130 ++++++++++-----
 .../db/columniterator/SimpleSliceReader.java       |   15 +-
 .../apache/cassandra/io/sstable/Descriptor.java    |    7 +-
 .../apache/cassandra/io/sstable/SSTableReader.java |    2 +-
 .../cassandra/utils/AlwaysPresentFilter.java       |    4 -
 .../org/apache/cassandra/utils/FilterFactory.java  |    2 +-
 .../apache/cassandra/db/RangeTombstoneTest.java    |    4 +
 12 files changed, 192 insertions(+), 158 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7dbb62a..7fc93f4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,4 @@
 1.2.5
- * Fix promoted row-level tombstone writing (CASSANDRA-5486)
  * Include fatal errors in trace events (CASSANDRA-5447)
  * Ensure that PerRowSecondaryIndex is notified of row-level deletes
    (CASSANDRA-5445)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index 2ddd43d..3491431 100644
--- a/build.xml
+++ b/build.xml
@@ -519,7 +519,7 @@
       </artifact:pom>
     </target>
 
-    <target name="maven-ant-tasks-retrieve-build" 
depends="maven-declare-dependencies" unless="without.maven">
+    <target name="maven-ant-tasks-retrieve-build" 
depends="maven-declare-dependencies">
       <artifact:dependencies pomRefId="build-deps-pom"
                              filesetId="build-dependency-jars" 
                              sourcesFilesetId="build-dependency-sources" 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/db/ColumnIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java 
b/src/java/org/apache/cassandra/db/ColumnIndex.java
index e2ac3e4..bcd0eef 100644
--- a/src/java/org/apache/cassandra/db/ColumnIndex.java
+++ b/src/java/org/apache/cassandra/db/ColumnIndex.java
@@ -33,7 +33,7 @@ public class ColumnIndex
     public final List<IndexHelper.IndexInfo> columnsIndex;
     public final IFilter bloomFilter;
 
-    private static final ColumnIndex EMPTY = new 
ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), 
AlwaysPresentFilter.instance);
+    private static final ColumnIndex EMPTY = new 
ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), new 
AlwaysPresentFilter());
 
     private ColumnIndex(int estimatedColumnCount)
     {
@@ -42,9 +42,6 @@ public class ColumnIndex
 
     private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex, IFilter 
bloomFilter)
     {
-        assert columnsIndex != null;
-        assert bloomFilter != null;
-
         this.columnsIndex = columnsIndex;
         this.bloomFilter = bloomFilter;
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/db/RowIndexEntry.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/RowIndexEntry.java 
b/src/java/org/apache/cassandra/db/RowIndexEntry.java
index a60bf6d..a831498 100644
--- a/src/java/org/apache/cassandra/db/RowIndexEntry.java
+++ b/src/java/org/apache/cassandra/db/RowIndexEntry.java
@@ -28,7 +28,6 @@ import org.apache.cassandra.cache.IMeasurableMemory;
 import org.apache.cassandra.io.sstable.Descriptor;
 import org.apache.cassandra.io.sstable.IndexHelper;
 import org.apache.cassandra.io.util.FileUtils;
-import org.apache.cassandra.utils.AlwaysPresentFilter;
 import org.apache.cassandra.utils.IFilter;
 import org.apache.cassandra.utils.FilterFactory;
 import org.apache.cassandra.utils.ObjectSizes;
@@ -46,42 +45,35 @@ public class RowIndexEntry implements IMeasurableMemory
 
     public int serializedSize()
     {
-        return TypeSizes.NATIVE.sizeof(position) + promotedSize();
+        return TypeSizes.NATIVE.sizeof(position);
     }
 
-    public int promotedSize()
-    {
-        return 0;
-    }
-
-    // TODO only store DeletionTime
     public static RowIndexEntry create(long position, DeletionInfo 
deletionInfo, ColumnIndex index)
     {
-        assert deletionInfo != null;
-        assert index != null;
-
-        if (index.columnsIndex.size() > 1 || 
deletionInfo.getTopLevelDeletion() != DeletionTime.LIVE)
-            return new IndexedEntry(position,
-                                    deletionInfo,
-                                    index.columnsIndex.isEmpty() ? 
Collections.<IndexHelper.IndexInfo>emptyList() : index.columnsIndex,
-                                    index.columnsIndex.isEmpty() ? 
AlwaysPresentFilter.instance : index.bloomFilter);
+        if (index != null && index.columnsIndex != null && 
index.columnsIndex.size() > 1)
+            return new IndexedEntry(position, deletionInfo, 
index.columnsIndex, index.bloomFilter);
         else
             return new RowIndexEntry(position);
     }
 
+    public boolean isIndexed()
+    {
+        return !columnsIndex().isEmpty();
+    }
+
     public DeletionInfo deletionInfo()
     {
-        return DeletionInfo.LIVE;
+        throw new UnsupportedOperationException();
     }
 
     public List<IndexHelper.IndexInfo> columnsIndex()
     {
-        return Collections.emptyList();
+        return Collections.<IndexHelper.IndexInfo>emptyList();
     }
 
     public IFilter bloomFilter()
     {
-        return AlwaysPresentFilter.instance;
+        throw new UnsupportedOperationException();
     }
 
     public long memorySize()
@@ -95,15 +87,14 @@ public class RowIndexEntry implements IMeasurableMemory
         public void serialize(RowIndexEntry rie, DataOutput dos) throws 
IOException
         {
             dos.writeLong(rie.position);
-            if (!rie.columnsIndex().isEmpty() || 
rie.deletionInfo().getTopLevelDeletion() != DeletionTime.LIVE)
+            if (rie.isIndexed())
             {
-                dos.writeInt(rie.promotedSize());
+                dos.writeInt(((IndexedEntry)rie).serializedSize());
                 
DeletionInfo.serializer().serializeForSSTable(rie.deletionInfo(), dos);
                 dos.writeInt(rie.columnsIndex().size());
                 for (IndexHelper.IndexInfo info : rie.columnsIndex())
                     info.serialize(dos);
-                if (!rie.columnsIndex().isEmpty())
-                    FilterFactory.serialize(rie.bloomFilter(), dos);
+                FilterFactory.serialize(rie.bloomFilter(), dos);
             }
             else
             {
@@ -111,24 +102,38 @@ public class RowIndexEntry implements IMeasurableMemory
             }
         }
 
-        public RowIndexEntry deserialize(DataInput dis, Descriptor.Version 
version) throws IOException
+        public RowIndexEntry deserializePositionOnly(DataInput dis, 
Descriptor.Version version) throws IOException
         {
             long position = dis.readLong();
-            if (!version.hasPromotedIndexes)
-                return new RowIndexEntry(position);
+            if (version.hasPromotedIndexes)
+            {
+                int size = dis.readInt();
+                if (size > 0)
+                    FileUtils.skipBytesFully(dis, size);
+            }
+            return new RowIndexEntry(position);
+        }
 
-            int size = dis.readInt();
-            if (size > 0)
+        public RowIndexEntry deserialize(DataInput dis, Descriptor.Version 
version) throws IOException
+        {
+            long position = dis.readLong();
+            if (version.hasPromotedIndexes)
             {
-                DeletionInfo delInfo = 
DeletionInfo.serializer().deserializeFromSSTable(dis, version);
-                int entries = dis.readInt();
-                List<IndexHelper.IndexInfo> columnsIndex = new 
ArrayList<IndexHelper.IndexInfo>(entries);
-                for (int i = 0; i < entries; i++)
-                    columnsIndex.add(IndexHelper.IndexInfo.deserialize(dis));
-                IFilter bf = entries == 0
-                             ? AlwaysPresentFilter.instance
-                             : FilterFactory.deserialize(dis, 
version.filterType, false);
-                return new IndexedEntry(position, delInfo, columnsIndex, bf);
+                int size = dis.readInt();
+                if (size > 0)
+                {
+                    DeletionInfo delInfo = 
DeletionInfo.serializer().deserializeFromSSTable(dis, version);
+                    int entries = dis.readInt();
+                    List<IndexHelper.IndexInfo> columnsIndex = new 
ArrayList<IndexHelper.IndexInfo>(entries);
+                    for (int i = 0; i < entries; i++)
+                        
columnsIndex.add(IndexHelper.IndexInfo.deserialize(dis));
+                    IFilter bf = FilterFactory.deserialize(dis, 
version.filterType, false);
+                    return new IndexedEntry(position, delInfo, columnsIndex, 
bf);
+                }
+                else
+                {
+                    return new RowIndexEntry(position);
+                }
             }
             else
             {
@@ -166,7 +171,7 @@ public class RowIndexEntry implements IMeasurableMemory
         {
             super(position);
             assert deletionInfo != null;
-            assert columnsIndex != null;
+            assert columnsIndex != null && columnsIndex.size() > 1;
             this.deletionInfo = deletionInfo;
             this.columnsIndex = columnsIndex;
             this.bloomFilter = bloomFilter;
@@ -191,7 +196,7 @@ public class RowIndexEntry implements IMeasurableMemory
         }
 
         @Override
-        public int promotedSize()
+        public int serializedSize()
         {
             TypeSizes typeSizes = TypeSizes.NATIVE;
             long size = 
DeletionTime.serializer.serializedSize(deletionInfo.getTopLevelDeletion(), 
typeSizes);
@@ -199,7 +204,7 @@ public class RowIndexEntry implements IMeasurableMemory
             for (IndexHelper.IndexInfo info : columnsIndex)
                 size += info.serializedSize(typeSizes);
 
-            size += bloomFilter instanceof AlwaysPresentFilter ? 0 : 
FilterFactory.serializedSize(bloomFilter);
+            size += FilterFactory.serializedSize(bloomFilter);
             assert size <= Integer.MAX_VALUE;
             return (int)size;
         }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index 61ae00e..7289ab0 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -65,7 +65,7 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> 
implements OnDiskA
      * finish (reverse start) elements. i.e. forward: [a,b],[d,e],[g,h] 
reverse: [h,g],[e,d],[b,a]. This reader also
      * assumes that validation has been performed in terms of intervals (no 
overlapping intervals).
      */
-    public IndexedSliceReader(SSTableReader sstable, RowIndexEntry rowEntry, 
FileDataInput input, ColumnSlice[] slices, boolean reversed)
+    public IndexedSliceReader(SSTableReader sstable, RowIndexEntry indexEntry, 
FileDataInput input, ColumnSlice[] slices, boolean reversed)
     {
         this.sstable = sstable;
         this.originalInput = input;
@@ -76,53 +76,34 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
         try
         {
             Descriptor.Version version = sstable.descriptor.version;
-            emptyColumnFamily = ColumnFamily.create(sstable.metadata);
-
-            if (version.hasPromotedRowTombstones && 
!rowEntry.columnsIndex().isEmpty())
-            {
-                // skip the row header entirely
-                indexes = rowEntry.columnsIndex();
-                emptyColumnFamily.delete(rowEntry.deletionInfo());
-                fetcher = new IndexedBlockFetcher(rowEntry.position);
-                return;
-            }
-
-            // skip up to bloom filter where things get a bit more interesting
-            if (input == null)
-            {
-                file = sstable.getFileDataInput(rowEntry.position);
-            }
-            else
-            {
-                file = input;
-                file.seek(rowEntry.position);
-            }
-            this.sstable.decodeKey(ByteBufferUtil.readWithShortLength(file));
-            SSTableReader.readRowSize(file, this.sstable.descriptor);
-
-            // read the row header up to and including the row-level tombstones
             if (version.hasPromotedIndexes)
             {
-                indexes = rowEntry.columnsIndex();
-                emptyColumnFamily.delete(rowEntry.deletionInfo());
-            }
-            else
-            {
-                IndexHelper.skipSSTableBloomFilter(input, version);
-                indexes = IndexHelper.deserializeIndex(file);
-            }
-            
emptyColumnFamily.delete(DeletionInfo.serializer().deserializeFromSSTable(file, 
version));
-
-            if (indexes.isEmpty())
-            {
-                fetcher = new SimpleBlockFetcher();
+                this.indexes = indexEntry.columnsIndex();
+                if (indexes.isEmpty())
+                {
+                    setToRowStart(sstable, indexEntry, input);
+                    this.emptyColumnFamily = 
ColumnFamily.create(sstable.metadata);
+                    
emptyColumnFamily.delete(DeletionInfo.serializer().deserializeFromSSTable(file, 
version));
+                    fetcher = new SimpleBlockFetcher();
+                }
+                else
+                {
+                    this.emptyColumnFamily = 
ColumnFamily.create(sstable.metadata);
+                    emptyColumnFamily.delete(indexEntry.deletionInfo());
+                    fetcher = new IndexedBlockFetcher(indexEntry.position);
+                }
             }
             else
             {
-                // index offsets changed to be based against the row key start 
in 1.2
-                fetcher = version.hasPromotedIndexes
-                        ? new IndexedBlockFetcher(rowEntry.position)
-                        : new IndexedBlockFetcher(file.getFilePointer() + 4); 
// +4 to skip the int column count
+                setToRowStart(sstable, indexEntry, input);
+                IndexHelper.skipSSTableBloomFilter(file, version);
+                this.indexes = IndexHelper.deserializeIndex(file);
+                this.emptyColumnFamily = ColumnFamily.create(sstable.metadata);
+                
emptyColumnFamily.delete(DeletionInfo.serializer().deserializeFromSSTable(file, 
version));
+                fetcher = indexes.isEmpty()
+                        ? new SimpleBlockFetcher()
+                        : new IndexedBlockFetcher(file.getFilePointer() + 4); 
// We still have the column count to
+                                                                              
// skip to get the basePosition
             }
         }
         catch (IOException e)
@@ -132,6 +113,24 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
         }
     }
 
+    /**
+     * Sets the seek position to the start of the row for column scanning.
+     */
+    private void setToRowStart(SSTableReader reader, RowIndexEntry indexEntry, 
FileDataInput input) throws IOException
+    {
+        if (input == null)
+        {
+            this.file = sstable.getFileDataInput(indexEntry.position);
+        }
+        else
+        {
+            this.file = input;
+            input.seek(indexEntry.position);
+        }
+        sstable.decodeKey(ByteBufferUtil.readWithShortLength(file));
+        SSTableReader.readRowSize(file, sstable.descriptor);
+    }
+
     public ColumnFamily getColumnFamily()
     {
         return emptyColumnFamily;
@@ -198,6 +197,8 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
             return reversed ? slices[currentSliceIdx].start : 
slices[currentSliceIdx].finish;
         }
 
+        protected abstract boolean setNextSlice();
+
         protected abstract boolean fetchMoreData();
 
         protected boolean isColumnBeforeSliceStart(OnDiskAtom column)
@@ -247,7 +248,7 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
             setNextSlice();
         }
 
-        private boolean setNextSlice()
+        protected boolean setNextSlice()
         {
             while (++currentSliceIdx < slices.length)
             {
@@ -349,7 +350,7 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
             /* seek to the correct offset to the data, and calculate the data 
size */
             long positionToSeek = basePosition + currentIndex.offset;
 
-            // With 1.2 promoted indexes, our first seek in the data file will 
happen at this point
+            // With new promoted indexes, our first seek in the data file will 
happen at that point.
             if (file == null)
                 file = originalInput == null ? 
sstable.getFileDataInput(positionToSeek) : originalInput;
 
@@ -463,7 +464,7 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
             }
         }
 
-        private boolean setNextSlice()
+        protected boolean setNextSlice()
         {
             if (reversed)
             {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java 
b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index 326447f..da4631d 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -19,22 +19,25 @@ package org.apache.cassandra.db.columniterator;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.SortedSet;
+import java.util.*;
 
 import org.apache.cassandra.config.CFMetaData;
-import org.apache.cassandra.db.*;
+import org.apache.cassandra.db.ColumnFamily;
+import org.apache.cassandra.db.ColumnFamilySerializer;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.DeletionInfo;
+import org.apache.cassandra.db.IColumn;
+import org.apache.cassandra.db.RowIndexEntry;
+import org.apache.cassandra.db.OnDiskAtom;
 import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.io.sstable.CorruptSSTableException;
-import org.apache.cassandra.io.sstable.Descriptor;
 import org.apache.cassandra.io.sstable.IndexHelper;
 import org.apache.cassandra.io.sstable.SSTableReader;
 import org.apache.cassandra.io.util.FileDataInput;
 import org.apache.cassandra.io.util.FileMark;
 import org.apache.cassandra.io.util.FileUtils;
 import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.IFilter;
 
 public class SSTableNamesIterator extends SimpleAbstractColumnIterator 
implements ISSTableColumnIterator
 {
@@ -52,13 +55,13 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
         this.columns = columns;
         this.key = key;
 
-        RowIndexEntry rowEntry = sstable.getPosition(key, 
SSTableReader.Operator.EQ);
-        if (rowEntry == null)
+        RowIndexEntry indexEntry = sstable.getPosition(key, 
SSTableReader.Operator.EQ);
+        if (indexEntry == null)
             return;
 
         try
         {
-            read(sstable, null, rowEntry);
+            read(sstable, null, indexEntry);
         }
         catch (IOException e)
         {
@@ -72,7 +75,7 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
         }
     }
 
-    public SSTableNamesIterator(SSTableReader sstable, FileDataInput file, 
DecoratedKey key, SortedSet<ByteBuffer> columns, RowIndexEntry rowEntry)
+    public SSTableNamesIterator(SSTableReader sstable, FileDataInput file, 
DecoratedKey key, SortedSet<ByteBuffer> columns, RowIndexEntry indexEntry)
     {
         assert columns != null;
         this.sstable = sstable;
@@ -81,7 +84,7 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
 
         try
         {
-            read(sstable, file, rowEntry);
+            read(sstable, file, indexEntry);
         }
         catch (IOException e)
         {
@@ -101,66 +104,101 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
         return sstable;
     }
 
-    private void read(SSTableReader sstable, FileDataInput file, RowIndexEntry 
rowEntry)
-            throws IOException
+    private void read(SSTableReader sstable, FileDataInput file, RowIndexEntry 
indexEntry)
+    throws IOException
     {
+        IFilter bf;
         List<IndexHelper.IndexInfo> indexList;
 
-        Descriptor.Version version = sstable.descriptor.version;
-        cf = ColumnFamily.create(sstable.metadata);
-        List<OnDiskAtom> result = new ArrayList<OnDiskAtom>(columns.size());
-
-        if (version.hasPromotedRowTombstones && 
!rowEntry.columnsIndex().isEmpty())
+        // If the entry is not indexed or the index is not promoted, read from 
the row start
+        if (!indexEntry.isIndexed())
         {
-            // skip the row header entirely
-            cf.delete(rowEntry.deletionInfo());
+            if (file == null)
+                file = createFileDataInput(indexEntry.position);
+            else
+                file.seek(indexEntry.position);
 
-            readIndexedColumns(sstable.metadata, file, columns, 
rowEntry.columnsIndex(), rowEntry.position, result);
-            iter = result.iterator();
-            return;
+            DecoratedKey keyInDisk = 
SSTableReader.decodeKey(sstable.partitioner,
+                                                             
sstable.descriptor,
+                                                             
ByteBufferUtil.readWithShortLength(file));
+            assert keyInDisk.equals(key) : String.format("%s != %s in %s", 
keyInDisk, key, file.getPath());
+            SSTableReader.readRowSize(file, sstable.descriptor);
         }
 
-        if (file == null)
-            file = createFileDataInput(rowEntry.position);
-        else
-            file.seek(rowEntry.position);
-
-        DecoratedKey keyInDisk = SSTableReader.decodeKey(sstable.partitioner,
-                                                         sstable.descriptor,
-                                                         
ByteBufferUtil.readWithShortLength(file));
-        assert keyInDisk.equals(key) : String.format("%s != %s in %s", 
keyInDisk, key, file.getPath());
-        SSTableReader.readRowSize(file, sstable.descriptor);
-
         if (sstable.descriptor.version.hasPromotedIndexes)
         {
-            indexList = rowEntry.columnsIndex();
-            cf.delete(rowEntry.deletionInfo());
+            bf = indexEntry.isIndexed() ? indexEntry.bloomFilter() : null;
+            indexList = indexEntry.columnsIndex();
         }
         else
         {
+            assert file != null;
+            bf = IndexHelper.defreezeBloomFilter(file, 
sstable.descriptor.version.filterType);
             indexList = IndexHelper.deserializeIndex(file);
         }
 
-        cf.delete(DeletionInfo.serializer().deserializeFromSSTable(file, 
sstable.descriptor.version));
+        if (!indexEntry.isIndexed())
+        {
+            // we can stop early if bloom filter says none of the columns 
actually exist -- but,
+            // we can't stop before initializing the cf above, in case there's 
a relevant tombstone
+            ColumnFamilySerializer serializer = ColumnFamily.serializer;
+            try
+            {
+                cf = ColumnFamily.create(sstable.metadata);
+                
cf.delete(DeletionInfo.serializer().deserializeFromSSTable(file, 
sstable.descriptor.version));
+            }
+            catch (Exception e)
+            {
+                throw new IOException(serializer + " failed to deserialize " + 
sstable.getColumnFamilyName() + " with " + sstable.metadata + " from " + file, 
e);
+            }
+        }
+        else
+        {
+            cf = ColumnFamily.create(sstable.metadata);
+            cf.delete(indexEntry.deletionInfo());
+        }
+
+        List<OnDiskAtom> result = new ArrayList<OnDiskAtom>();
+        List<ByteBuffer> filteredColumnNames = new 
ArrayList<ByteBuffer>(columns.size());
+        for (ByteBuffer name : columns)
+        {
+            if (bf == null || bf.isPresent(name))
+            {
+                filteredColumnNames.add(name);
+            }
+        }
+        if (filteredColumnNames.isEmpty())
+            return;
 
         if (indexList.isEmpty())
         {
-            readSimpleColumns(file, columns, result);
+            readSimpleColumns(file, columns, filteredColumnNames, result);
         }
         else
         {
-            long basePosition = version.hasPromotedIndexes ? rowEntry.position 
: file.getFilePointer() + 4;
-            readIndexedColumns(sstable.metadata, file, columns, indexList, 
basePosition, result);
+            long basePosition;
+            if (sstable.descriptor.version.hasPromotedIndexes)
+            {
+                basePosition = indexEntry.position;
+            }
+            else
+            {
+                assert file != null;
+                file.readInt(); // column count
+                basePosition = file.getFilePointer();
+            }
+            readIndexedColumns(sstable.metadata, file, columns, 
filteredColumnNames, indexList, basePosition, result);
         }
 
         // create an iterator view of the columns we read
         iter = result.iterator();
     }
 
-    private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> 
columnNames, List<OnDiskAtom> result) throws IOException
+    private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> 
columnNames, List<ByteBuffer> filteredColumnNames, List<OnDiskAtom> result) 
throws IOException
     {
         OnDiskAtom.Serializer atomSerializer = cf.getOnDiskSerializer();
         int columns = file.readInt();
+        int n = 0;
         for (int i = 0; i < columns; i++)
         {
             OnDiskAtom column = atomSerializer.deserializeFromSSTable(file, 
sstable.descriptor.version);
@@ -169,7 +207,7 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
                 if (columnNames.contains(column.name()))
                 {
                     result.add(column);
-                    if (result.size() >= columnNames.size())
+                    if (n++ > filteredColumnNames.size())
                         break;
                 }
             }
@@ -183,16 +221,17 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
     private void readIndexedColumns(CFMetaData metadata,
                                     FileDataInput file,
                                     SortedSet<ByteBuffer> columnNames,
+                                    List<ByteBuffer> filteredColumnNames,
                                     List<IndexHelper.IndexInfo> indexList,
                                     long basePosition,
                                     List<OnDiskAtom> result)
-            throws IOException
+    throws IOException
     {
         /* get the various column ranges we have to read */
         AbstractType<?> comparator = metadata.comparator;
         List<IndexHelper.IndexInfo> ranges = new 
ArrayList<IndexHelper.IndexInfo>();
         int lastIndexIdx = -1;
-        for (ByteBuffer name : columnNames)
+        for (ByteBuffer name : filteredColumnNames)
         {
             int index = IndexHelper.indexFor(name, indexList, comparator, 
false, lastIndexIdx);
             if (index < 0 || index == indexList.size())
@@ -212,7 +251,7 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
         {
             long positionToSeek = basePosition + indexInfo.offset;
 
-            // With 1.2 promoted indexes, our first seek in the data file will 
happen at this point
+            // With new promoted indexes, our first seek in the data file will 
happen at that point.
             if (file == null)
                 file = createFileDataInput(positionToSeek);
 
@@ -223,6 +262,7 @@ public class SSTableNamesIterator extends 
SimpleAbstractColumnIterator implement
             while (file.bytesPastMark(mark) < indexInfo.width)
             {
                 OnDiskAtom column = 
atomSerializer.deserializeFromSSTable(file, sstable.descriptor.version);
+                // we check vs the original Set, not the filtered List, for 
efficiency
                 if (!(column instanceof IColumn) || 
columnNames.contains(column.name()))
                     result.add(column);
             }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java
index 0cf9af6..b30d360 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java
@@ -49,7 +49,7 @@ class SimpleSliceReader extends AbstractIterator<OnDiskAtom> 
implements OnDiskAt
     private FileMark mark;
     private final OnDiskAtom.Serializer atomSerializer;
 
-    public SimpleSliceReader(SSTableReader sstable, RowIndexEntry rowEntry, 
FileDataInput input, ByteBuffer finishColumn)
+    public SimpleSliceReader(SSTableReader sstable, RowIndexEntry indexEntry, 
FileDataInput input, ByteBuffer finishColumn)
     {
         this.sstable = sstable;
         this.finishColumn = finishColumn;
@@ -58,13 +58,13 @@ class SimpleSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskAt
         {
             if (input == null)
             {
-                this.file = sstable.getFileDataInput(rowEntry.position);
+                this.file = sstable.getFileDataInput(indexEntry.position);
                 this.needsClosing = true;
             }
             else
             {
                 this.file = input;
-                input.seek(rowEntry.position);
+                input.seek(indexEntry.position);
                 this.needsClosing = false;
             }
 
@@ -72,19 +72,14 @@ class SimpleSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskAt
             ByteBufferUtil.skipShortLength(file);
             SSTableReader.readRowSize(file, sstable.descriptor);
 
-            emptyColumnFamily = ColumnFamily.create(sstable.metadata);
-
             Descriptor.Version version = sstable.descriptor.version;
-            if (version.hasPromotedIndexes)
-            {
-                emptyColumnFamily.delete(rowEntry.deletionInfo());
-            }
-            else
+            if (!version.hasPromotedIndexes)
             {
                 IndexHelper.skipSSTableBloomFilter(file, version);
                 IndexHelper.skipIndex(file);
             }
 
+            emptyColumnFamily = ColumnFamily.create(sstable.metadata);
             
emptyColumnFamily.delete(DeletionInfo.serializer().deserializeFromSSTable(file, 
version));
             atomSerializer = emptyColumnFamily.getOnDiskSerializer();
             columns = file.readInt();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/io/sstable/Descriptor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/Descriptor.java 
b/src/java/org/apache/cassandra/io/sstable/Descriptor.java
index 7b916cb..f21a0d5 100644
--- a/src/java/org/apache/cassandra/io/sstable/Descriptor.java
+++ b/src/java/org/apache/cassandra/io/sstable/Descriptor.java
@@ -47,7 +47,7 @@ public class Descriptor
     public static class Version
     {
         // This needs to be at the begining for initialization sake
-        public static final String current_version = "ic";
+        public static final String current_version = "ib";
 
         public static final Version LEGACY = new Version("a"); // "pre-history"
         // b (0.7.0): added version to sstable filenames
@@ -62,11 +62,10 @@ public class Descriptor
         // hd (1.0.10): includes row tombstones in maxtimestamp
         // he (1.1.3): includes ancestors generation in metadata component
         // hf (1.1.6): marker that replay position corresponds to 1.1.5+ 
millis-based id (see CASSANDRA-4782)
-        // ia (1.2.0): column indexes are promoted to the index file.  (this 
means index offsets are now against the start of the row key, rather than the 
start of columns data, since the former allows us to skip the row header)
+        // ia (1.2.0): column indexes are promoted to the index file
         //             records estimated histogram of deletion times in 
tombstones
         //             bloom filter (keys and columns) upgraded to Murmur3
         // ib (1.2.1): tracks min client timestamp in metadata component
-        // ic (1.2.6): always promotes row-level tombstones into index file; 
previously this was unreliable
 
         public static final Version CURRENT = new Version(current_version);
 
@@ -84,7 +83,6 @@ public class Descriptor
         public final boolean hasPartitioner;
         public final boolean tracksTombstones;
         public final boolean hasPromotedIndexes;
-        public final boolean hasPromotedRowTombstones;
         public final FilterFactory.Type filterType;
         public final boolean hasAncestors;
         public final boolean hasBloomFilterSizeInHeader;
@@ -104,7 +102,6 @@ public class Descriptor
             metadataIncludesModernReplayPosition = version.compareTo("hf") >= 
0;
             tracksTombstones = version.compareTo("ia") >= 0;
             hasPromotedIndexes = version.compareTo("ia") >= 0;
-            hasPromotedRowTombstones = version.compareTo("ic") >= 0;
             isLatestVersion = version.compareTo(current_version) == 0;
             if (version.compareTo("f") < 0)
                 filterType = FilterFactory.Type.SHA;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java 
b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
index 61f505d..21a8673 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
@@ -323,7 +323,7 @@ public class SSTableReader extends SSTable
     {
         if (!components.contains(Component.FILTER))
         {
-            bf = AlwaysPresentFilter.instance;
+            bf = new AlwaysPresentFilter();
             return;
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java 
b/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
index 39b3d5d..67ac111 100644
--- a/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
+++ b/src/java/org/apache/cassandra/utils/AlwaysPresentFilter.java
@@ -26,10 +26,6 @@ import java.nio.ByteBuffer;
 
 public class AlwaysPresentFilter implements IFilter
 {
-    public static final AlwaysPresentFilter instance = new 
AlwaysPresentFilter();
-
-    private AlwaysPresentFilter() { }
-
     public boolean isPresent(ByteBuffer key)
     {
         return true;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/src/java/org/apache/cassandra/utils/FilterFactory.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/FilterFactory.java 
b/src/java/org/apache/cassandra/utils/FilterFactory.java
index 1b9027d..88c8973 100644
--- a/src/java/org/apache/cassandra/utils/FilterFactory.java
+++ b/src/java/org/apache/cassandra/utils/FilterFactory.java
@@ -131,7 +131,7 @@ public class FilterFactory
     {
         assert maxFalsePosProbability <= 1.0 : "Invalid probability";
         if (maxFalsePosProbability == 1.0)
-            return AlwaysPresentFilter.instance;
+            return new AlwaysPresentFilter();
         int bucketsPerElement = 
BloomCalculations.maxBucketsPerElement(numElements);
         BloomCalculations.BloomSpecification spec = 
BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
         return createFilter(spec.K, numElements, spec.bucketsPerElement, type, 
offheap);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/17034c08/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java 
b/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
index c531461..1bc846b 100644
--- a/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
+++ b/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
@@ -164,6 +164,10 @@ public class RangeTombstoneTest extends SchemaLoader
         return ByteBufferUtil.bytes(i);
     }
 
+    private static void insertData(ColumnFamilyStore cfs, String key) throws 
Exception
+    {
+    }
+
     private static void add(RowMutation rm, int value, long timestamp)
     {
         rm.add(new QueryPath(CFNAME, null, b(value)), b(value), timestamp);

Reply via email to