IndexHelper.java

jbellis Mon, 02 Aug 2010 08:07:36 -0700

Author: jbellis
Date: Mon Aug  2 15:06:12 2010
New Revision: 981546

URL: http://svn.apache.org/viewvc?rev=981546&view=rev
Log:
avoid writing index for rows that fit within a single index block
patch by jbellis; reviewed by gdusbabek for CASSANDRA-1338



Modified:
    cassandra/trunk/src/java/org/apache/cassandra/db/ColumnIndexer.java
    
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
    
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
    
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableSliceIterator.java
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexHelper.java

Modified: cassandra/trunk/src/java/org/apache/cassandra/db/ColumnIndexer.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/ColumnIndexer.java?rev=981546&r1=981545&r2=981546&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/db/ColumnIndexer.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/db/ColumnIndexer.java Mon Aug 
 2 15:06:12 2010
@@ -113,12 +113,19 @@ public class ColumnIndexer
         /* Write out the bloom filter. */
         writeBloomFilter(dos, bf);
 
-        // write the index
+        // write the index.  we should always have at least one computed index 
block, but we only write it out if there is more than that.
         assert indexSizeInBytes > 0;
-        dos.writeInt(indexSizeInBytes);
-        for (IndexHelper.IndexInfo cIndexInfo : indexList)
+        if (indexList.size() > 1)
         {
-            cIndexInfo.serialize(dos);
+            dos.writeInt(indexSizeInBytes);
+            for (IndexHelper.IndexInfo cIndexInfo : indexList)
+            {
+                cIndexInfo.serialize(dos);
+            }
+        }
+        else
+        {
+            dos.writeInt(0);
         }
        }
 

Modified: 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java?rev=981546&r1=981545&r2=981546&view=diff
==============================================================================
--- 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
 (original)
+++ 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
 Mon Aug  2 15:06:12 2010
@@ -32,9 +32,8 @@ class IndexedSliceReader extends Abstrac
     private final byte[] finishColumn;
     private final boolean reversed;
 
-    private int curRangeIndex;
+    private BlockFetcher fetcher;
     private Deque<IColumn> blockColumns = new ArrayDeque<IColumn>();
-    private final FileMark mark;
     private AbstractType comparator;
 
     public IndexedSliceReader(SSTableReader sstable, FileDataInput input, 
byte[] startColumn, byte[] finishColumn, boolean reversed)
@@ -50,16 +49,12 @@ class IndexedSliceReader extends Abstrac
             indexes = IndexHelper.deserializeIndex(file);
 
             emptyColumnFamily = 
ColumnFamily.serializer().deserializeFromSSTableNoColumns(sstable.makeColumnFamily(),
 file);
-            file.readInt(); // column count
+            fetcher = indexes == null ? new SimpleBlockFetcher() : new 
IndexedBlockFetcher();
         }
         catch (IOException e)
         {
             throw new IOError(e);
         }
-        this.mark = file.mark();
-        curRangeIndex = IndexHelper.indexFor(startColumn, indexes, comparator, 
reversed);
-        if (reversed && curRangeIndex == indexes.size())
-            curRangeIndex--;
     }
 
     public ColumnFamily getColumnFamily()
@@ -99,7 +94,7 @@ class IndexedSliceReader extends Abstrac
                 return column;
             try
             {
-                if (column == null && !getNextBlock())
+                if (column == null && !fetcher.getNextBlock())
                     return endOfData();
             }
             catch (IOException e)
@@ -109,59 +104,105 @@ class IndexedSliceReader extends Abstrac
         }
     }
 
-    public boolean getNextBlock() throws IOException
+    public void close()
     {
-        if (curRangeIndex < 0 || curRangeIndex >= indexes.size())
-            return false;
+    }
+
+    interface BlockFetcher
+    {
+        public boolean getNextBlock() throws IOException;
+    }
 
-        /* seek to the correct offset to the data, and calculate the data size 
*/
-        IndexHelper.IndexInfo curColPosition = indexes.get(curRangeIndex);
+    private class IndexedBlockFetcher implements BlockFetcher
+    {
+        private final FileMark mark;
+        private int curRangeIndex;
 
-        /* see if this read is really necessary. */
-        if (reversed)
+        IndexedBlockFetcher() throws IOException
         {
-            if ((finishColumn.length > 0 && comparator.compare(finishColumn, 
curColPosition.lastName) > 0) ||
-                (startColumn.length > 0 && comparator.compare(startColumn, 
curColPosition.firstName) < 0))
-                return false;
+            file.readInt(); // column count
+            this.mark = file.mark();
+            curRangeIndex = IndexHelper.indexFor(startColumn, indexes, 
comparator, reversed);
+            if (reversed && curRangeIndex == indexes.size())
+                curRangeIndex--;
         }
-        else
+
+        public boolean getNextBlock() throws IOException
         {
-            if ((startColumn.length > 0 && comparator.compare(startColumn, 
curColPosition.lastName) > 0) ||
-                (finishColumn.length > 0 && comparator.compare(finishColumn, 
curColPosition.firstName) < 0))
+            if (curRangeIndex < 0 || curRangeIndex >= indexes.size())
                 return false;
-        }
 
-        boolean outOfBounds = false;
+            /* seek to the correct offset to the data, and calculate the data 
size */
+            IndexHelper.IndexInfo curColPosition = indexes.get(curRangeIndex);
 
-        file.reset(mark);
-        long curOffset = file.skipBytes((int) curColPosition.offset);
-        assert curOffset == curColPosition.offset;
-        while (file.bytesPastMark(mark) < curColPosition.offset + 
curColPosition.width && !outOfBounds)
-        {
-            IColumn column = 
emptyColumnFamily.getColumnSerializer().deserialize(file);
+            /* see if this read is really necessary. */
             if (reversed)
-                blockColumns.addFirst(column);
+            {
+                if ((finishColumn.length > 0 && 
comparator.compare(finishColumn, curColPosition.lastName) > 0) ||
+                    (startColumn.length > 0 && comparator.compare(startColumn, 
curColPosition.firstName) < 0))
+                    return false;
+            }
             else
-                blockColumns.addLast(column);
+            {
+                if ((startColumn.length > 0 && comparator.compare(startColumn, 
curColPosition.lastName) > 0) ||
+                    (finishColumn.length > 0 && 
comparator.compare(finishColumn, curColPosition.firstName) < 0))
+                    return false;
+            }
 
-            /* see if we can stop seeking. */
-            if (!reversed && finishColumn.length > 0)
-                outOfBounds = comparator.compare(column.name(), finishColumn) 
>= 0;
-            else if (reversed && startColumn.length > 0)
-                outOfBounds = comparator.compare(column.name(), startColumn) 
>= 0;
+            boolean outOfBounds = false;
+            file.reset(mark);
+            long curOffset = file.skipBytes((int) curColPosition.offset);
+            assert curOffset == curColPosition.offset;
+            while (file.bytesPastMark(mark) < curColPosition.offset + 
curColPosition.width && !outOfBounds)
+            {
+                IColumn column = 
emptyColumnFamily.getColumnSerializer().deserialize(file);
+                if (reversed)
+                    blockColumns.addFirst(column);
+                else
+                    blockColumns.addLast(column);
+
+                /* see if we can stop seeking. */
+                if (!reversed && finishColumn.length > 0)
+                    outOfBounds = comparator.compare(column.name(), 
finishColumn) >= 0;
+                else if (reversed && startColumn.length > 0)
+                    outOfBounds = comparator.compare(column.name(), 
startColumn) >= 0;
+            }
 
-            if (outOfBounds)
-                break;
+            if (reversed)
+                curRangeIndex--;
+            else
+                curRangeIndex++;
+            return true;
         }
-
-        if (reversed)
-            curRangeIndex--;
-        else
-            curRangeIndex++;
-        return true;
     }
 
-    public void close() throws IOException
+    private class SimpleBlockFetcher implements BlockFetcher
     {
+        private SimpleBlockFetcher() throws IOException
+        {
+            int columns = file.readInt();
+            for (int i = 0; i < columns; i++)
+            {
+                IColumn column = 
emptyColumnFamily.getColumnSerializer().deserialize(file);
+                if (reversed)
+                    blockColumns.addFirst(column);
+                else
+                    blockColumns.addLast(column);
+
+                /* see if we can stop seeking. */
+                boolean outOfBounds = false;
+                if (!reversed && finishColumn.length > 0)
+                    outOfBounds = comparator.compare(column.name(), 
finishColumn) >= 0;
+                else if (reversed && startColumn.length > 0)
+                    outOfBounds = comparator.compare(column.name(), 
startColumn) >= 0;
+                if (outOfBounds)
+                    break;
+            }
+        }
+
+        public boolean getNextBlock() throws IOException
+        {
+            return false;
+        }
     }
 }

Modified: 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java?rev=981546&r1=981545&r2=981546&view=diff
==============================================================================
--- 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
 (original)
+++ 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
 Mon Aug  2 15:06:12 2010
@@ -53,7 +53,7 @@ public class SSTableNamesIterator extend
         this (ssTable, null, key, columnNames);
     }
 
-    public SSTableNamesIterator(SSTableReader ssTable, FileDataInput file, 
DecoratedKey key, SortedSet<byte[]> columnNames)
+    public SSTableNamesIterator(SSTableReader sstable, FileDataInput file, 
DecoratedKey key, SortedSet<byte[]> columnNames)
     {
         boolean closeFileWhenDone = file == null;
         
@@ -67,15 +67,15 @@ public class SSTableNamesIterator extend
             // open the sstable file, if we don't have one passed to use from 
range scan
             if (file == null)
             {
-                file = ssTable.getFileDataInput(decoratedKey, 
DatabaseDescriptor.getIndexedReadBufferSizeInKB() * 1024);
+                file = sstable.getFileDataInput(decoratedKey, 
DatabaseDescriptor.getIndexedReadBufferSizeInKB() * 1024);
                 if (file == null)
                     return;
-                DecoratedKey keyInDisk = 
SSTableReader.decodeKey(ssTable.getPartitioner(),
-                                                                 
ssTable.getDescriptor(),
+                DecoratedKey keyInDisk = 
SSTableReader.decodeKey(sstable.getPartitioner(),
+                                                                 
sstable.getDescriptor(),
                                                                  
FBUtilities.readShortByteArray(file));
                 assert keyInDisk.equals(decoratedKey)
                        : String.format("%s != %s in %s", keyInDisk, 
decoratedKey, file.getPath());
-                SSTableReader.readRowSize(file, ssTable.getDescriptor());
+                SSTableReader.readRowSize(file, sstable.getDescriptor());
             }
 
             // read the requested columns into `cf`
@@ -85,7 +85,7 @@ public class SSTableNamesIterator extend
 
             // we can stop early if bloom filter says none of the columns 
actually exist -- but,
             // we can't stop before initializing the cf above, in case there's 
a relevant tombstone
-            cf = 
ColumnFamily.serializer().deserializeFromSSTableNoColumns(ssTable.makeColumnFamily(),
 file);
+            cf = 
ColumnFamily.serializer().deserializeFromSSTableNoColumns(sstable.makeColumnFamily(),
 file);
 
             List<byte[]> filteredColumnNames1 = new 
ArrayList<byte[]>(columnNames.size());
             for (byte[] name : columnNames)
@@ -99,39 +99,10 @@ public class SSTableNamesIterator extend
             if (filteredColumnNames.isEmpty())
                 return;
 
-            file.readInt(); // column count
-
-            /* get the various column ranges we have to read */
-            AbstractType comparator = ssTable.getColumnComparator();
-            SortedSet<IndexHelper.IndexInfo> ranges = new 
TreeSet<IndexHelper.IndexInfo>(IndexHelper.getComparator(comparator));
-            for (byte[] name : filteredColumnNames)
-            {
-                int index = IndexHelper.indexFor(name, indexList, comparator, 
false);
-                if (index == indexList.size())
-                    continue;
-                IndexHelper.IndexInfo indexInfo = indexList.get(index);
-                if (comparator.compare(name, indexInfo.firstName) < 0)
-                    continue;
-                ranges.add(indexInfo);
-            }
-
-            FileMark mark = file.mark();
-            for (IndexHelper.IndexInfo indexInfo : ranges)
-            {
-                file.reset(mark);
-                long curOffsert = file.skipBytes((int) indexInfo.offset);
-                assert curOffsert == indexInfo.offset;
-                // TODO only completely deserialize columns we are interested 
in
-                while (file.bytesPastMark(mark) < indexInfo.offset + 
indexInfo.width)
-                {
-                    final IColumn column = 
cf.getColumnSerializer().deserialize(file);
-                    // we check vs the original Set, not the filtered List, 
for efficiency
-                    if (columnNames.contains(column.name()))
-                    {
-                        cf.addColumn(column);
-                    }
-                }
-            }
+            if (indexList == null)
+                readSimpleColumns(file, columnNames, filteredColumnNames);
+            else
+                readIndexedColumns(sstable, file, columnNames, 
filteredColumnNames, indexList);
 
             // create an iterator view of the columns we read
             iter = cf.getSortedColumns().iterator();
@@ -156,6 +127,60 @@ public class SSTableNamesIterator extend
         }
     }
 
+    private void readSimpleColumns(FileDataInput file, SortedSet<byte[]> 
columnNames, List<byte[]> filteredColumnNames) throws IOException
+    {
+        int columns = file.readInt();
+        int n = 0;
+        for (int i = 0; i < columns; i++)
+        {
+            IColumn column = cf.getColumnSerializer().deserialize(file);
+            if (columnNames.contains(column.name()))
+            {
+                cf.addColumn(column);
+                if (n++ > filteredColumnNames.size())
+                    break;
+            }
+        }
+    }
+
+    private void readIndexedColumns(SSTableReader sstable, FileDataInput file, 
SortedSet<byte[]> columnNames, List<byte[]> filteredColumnNames, 
List<IndexHelper.IndexInfo> indexList)
+    throws IOException
+    {
+        file.readInt(); // column count
+
+        /* get the various column ranges we have to read */
+        AbstractType comparator = sstable.getColumnComparator();
+        SortedSet<IndexHelper.IndexInfo> ranges = new 
TreeSet<IndexHelper.IndexInfo>(IndexHelper.getComparator(comparator));
+        for (byte[] name : filteredColumnNames)
+        {
+            int index = IndexHelper.indexFor(name, indexList, comparator, 
false);
+            if (index == indexList.size())
+                continue;
+            IndexHelper.IndexInfo indexInfo = indexList.get(index);
+            if (comparator.compare(name, indexInfo.firstName) < 0)
+                continue;
+            ranges.add(indexInfo);
+        }
+
+        FileMark mark = file.mark();
+        for (IndexHelper.IndexInfo indexInfo : ranges)
+        {
+            file.reset(mark);
+            long curOffsert = file.skipBytes((int) indexInfo.offset);
+            assert curOffsert == indexInfo.offset;
+            // TODO only completely deserialize columns we are interested in
+            while (file.bytesPastMark(mark) < indexInfo.offset + 
indexInfo.width)
+            {
+                IColumn column = cf.getColumnSerializer().deserialize(file);
+                // we check vs the original Set, not the filtered List, for 
efficiency
+                if (columnNames.contains(column.name()))
+                {
+                    cf.addColumn(column);
+                }
+            }
+        }
+    }
+
     public DecoratedKey getKey()
     {
         return decoratedKey;

Modified: 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableSliceIterator.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableSliceIterator.java?rev=981546&r1=981545&r2=981546&view=diff
==============================================================================
--- 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableSliceIterator.java
 (original)
+++ 
cassandra/trunk/src/java/org/apache/cassandra/db/columniterator/SSTableSliceIterator.java
 Mon Aug  2 15:06:12 2010
@@ -118,7 +118,7 @@ public class SSTableSliceIterator implem
 
     public void close() throws IOException
     {
-        if (closeFileWhenDone)
+        if (closeFileWhenDone && file != null)
             file.close();
     }
 

Modified: 
cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexHelper.java?rev=981546&r1=981545&r2=981546&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexHelper.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexHelper.java 
Mon Aug  2 15:06:12 2010
@@ -68,9 +68,10 @@ public class IndexHelper
      */
        public static ArrayList<IndexInfo> deserializeIndex(FileDataInput in) 
throws IOException
        {
-        ArrayList<IndexInfo> indexList = new ArrayList<IndexInfo>();
-
                int columnIndexSize = in.readInt();
+        if (columnIndexSize == 0)
+            return null;
+        ArrayList<IndexInfo> indexList = new ArrayList<IndexInfo>();
         FileMark mark = in.mark();
         while (in.bytesPastMark(mark) < columnIndexSize)
         {

svn commit: r981546 - in /cassandra/trunk/src/java/org/apache/cassandra: db/ColumnIndexer.java db/columniterator/IndexedSliceReader.java db/columniterator/SSTableNamesIterator.java db/columniterator/SSTableSliceIterator.java io/sstable/IndexHelper.java

Reply via email to