[1/2] git commit: Reduce memory used by primary index sample patch by Radim Kolar and yukim; reviewed by jbellis for CASSANDRA-3743

jbellis Tue, 24 Jan 2012 18:20:02 -0800

Updated Branches:
  refs/heads/trunk 37b079352 -> 86a7a3d1a


Reduce memory used by primary index sample
patch by Radim Kolar and yukim; reviewed by jbellis for CASSANDRA-3743


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/86a7a3d1
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/86a7a3d1
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/86a7a3d1

Branch: refs/heads/trunk
Commit: 86a7a3d1a0ba38e9c1f110814517f1b7630cfa51
Parents: 372a3ad
Author: Jonathan Ellis <[email protected]>
Authored: Tue Jan 24 20:17:47 2012 -0600
Committer: Jonathan Ellis <[email protected]>
Committed: Tue Jan 24 20:19:27 2012 -0600

----------------------------------------------------------------------
 CHANGES.txt                                        |    1 +
 .../apache/cassandra/io/sstable/IndexSummary.java  |   55 +++++----------
 .../apache/cassandra/io/sstable/SSTableReader.java |   46 +++++-------
 3 files changed, 37 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/86a7a3d1/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 06ac8a0..07b6213 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 1.1-dev
+ * Reduce memory used by primary index sample (CASSANDRA-3743)
  * (Hadoop) separate input/output configurations (CASSANDRA-3197, 3765)
  * avoid returning internal Cassandra classes over JMX (CASSANDRA-2805)
  * add row-level isolation via SnapTree (CASSANDRA-2893)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/86a7a3d1/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/IndexSummary.java 
b/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
index 68530cf..4c6efb5 100644
--- a/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
+++ b/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
@@ -1,6 +1,6 @@
 package org.apache.cassandra.io.sstable;
 /*
- * 
+ *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -8,16 +8,16 @@ package org.apache.cassandra.io.sstable;
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
- * 
+ *
  */
 
 
@@ -35,7 +35,8 @@ import org.apache.cassandra.db.RowPosition;
  */
 public class IndexSummary
 {
-    private ArrayList<KeyPosition> indexPositions;
+    private ArrayList<Long> positions;
+    private ArrayList<DecoratedKey> keys;
     private long keysWritten = 0;
 
     public IndexSummary(long expectedKeys)
@@ -44,7 +45,8 @@ public class IndexSummary
         if (expectedEntries > Integer.MAX_VALUE)
             // TODO: that's a _lot_ of keys, or a very low interval
             throw new RuntimeException("Cannot use index_interval of " + 
DatabaseDescriptor.getIndexInterval() + " with " + expectedKeys + " (expected) 
keys.");
-        indexPositions = new ArrayList<KeyPosition>((int)expectedEntries);
+        positions = new ArrayList<Long>((int)expectedEntries);
+        keys = new ArrayList<DecoratedKey>((int)expectedEntries);
     }
 
     public void incrementRowid()
@@ -59,7 +61,8 @@ public class IndexSummary
 
     public void addEntry(DecoratedKey<?> key, long indexPosition)
     {
-        indexPositions.add(new KeyPosition(SSTable.getMinimalKey(key), 
indexPosition));
+        keys.add(SSTable.getMinimalKey(key));
+        positions.add(indexPosition);
     }
 
     public void maybeAddEntry(DecoratedKey<?> decoratedKey, long indexPosition)
@@ -69,43 +72,19 @@ public class IndexSummary
         incrementRowid();
     }
 
-    public List<KeyPosition> getIndexPositions()
+    public List<DecoratedKey> getKeys()
     {
-        return indexPositions;
+        return keys;
     }
 
-    public void complete()
+    public long getPosition(int index)
     {
-        indexPositions.trimToSize();
+        return positions.get(index);
     }
 
-    /**
-     * This is a simple container for the index Key and its corresponding 
position
-     * in the index file. Binary search is performed on a list of these objects
-     * to find where to start looking for the index entry containing the data 
position
-     * (which will be turned into a PositionSize object)
-     */
-    public static final class KeyPosition implements Comparable<KeyPosition>
+    public void complete()
     {
-        // We allow RowPosition for the purpose of being able to select keys 
given a token, but the index
-        // should only contain true user provided keys, i.e. DecoratedKey, 
which is enforced by addEntry.
-        public final RowPosition key;
-        public final long indexPosition;
-
-        public KeyPosition(RowPosition key, long indexPosition)
-        {
-            this.key = key;
-            this.indexPosition = indexPosition;
-        }
-
-        public int compareTo(KeyPosition kp)
-        {
-            return key.compareTo(kp.key);
-        }
-
-        public String toString()
-        {
-            return key + ":" + indexPosition;
-        }
+        keys.trimToSize();
+        positions.trimToSize();
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/86a7a3d1/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java 
b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
index 324c460..0162249 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -413,22 +413,22 @@ public class SSTableReader extends SSTable
     }
 
     /** get the position in the index file to start scanning to find the given 
key (at most indexInterval keys away) */
-    private IndexSummary.KeyPosition getIndexScanPosition(RowPosition key)
+    private long getIndexScanPosition(RowPosition key)
     {
-        assert indexSummary.getIndexPositions() != null && 
indexSummary.getIndexPositions().size() > 0;
-        int index = Collections.binarySearch(indexSummary.getIndexPositions(), 
new IndexSummary.KeyPosition(key, -1));
+        assert indexSummary.getKeys() != null && indexSummary.getKeys().size() 
> 0;
+        int index = Collections.binarySearch(indexSummary.getKeys(), key);
         if (index < 0)
         {
             // binary search gives us the first index _greater_ than the key 
searched for,
             // i.e., its insertion position
             int greaterThan = (index + 1) * -1;
             if (greaterThan == 0)
-                return null;
-            return indexSummary.getIndexPositions().get(greaterThan - 1);
+                return -1;
+            return indexSummary.getPosition(greaterThan - 1);
         }
         else
         {
-            return indexSummary.getIndexPositions().get(index);
+            return indexSummary.getPosition(index);
         }
     }
 
@@ -470,7 +470,7 @@ public class SSTableReader extends SSTable
      */
     public long estimatedKeys()
     {
-        return indexSummary.getIndexPositions().size() * 
DatabaseDescriptor.getIndexInterval();
+        return indexSummary.getKeys().size() * 
DatabaseDescriptor.getIndexInterval();
     }
 
     /**
@@ -480,7 +480,7 @@ public class SSTableReader extends SSTable
     public long estimatedKeysForRanges(Collection<Range<Token>> ranges)
     {
         long sampleKeyCount = 0;
-        List<Pair<Integer, Integer>> sampleIndexes = 
getSampleIndexesForRanges(indexSummary.getIndexPositions(), ranges);
+        List<Pair<Integer, Integer>> sampleIndexes = 
getSampleIndexesForRanges(indexSummary.getKeys(), ranges);
         for (Pair<Integer, Integer> sampleIndexRange : sampleIndexes)
             sampleKeyCount += (sampleIndexRange.right - sampleIndexRange.left 
+ 1);
         return Math.max(1, sampleKeyCount * 
DatabaseDescriptor.getIndexInterval());
@@ -491,18 +491,10 @@ public class SSTableReader extends SSTable
      */
     public Collection<DecoratedKey> getKeySamples()
     {
-        return Collections2.transform(indexSummary.getIndexPositions(),
-                                      new Function<IndexSummary.KeyPosition, 
DecoratedKey>(){
-                                          public DecoratedKey 
apply(IndexSummary.KeyPosition kp)
-                                          {
-                                              // the index should only contain 
valid row key, we only allow RowPosition in KeyPosition for search purposes
-                                              assert kp.key instanceof 
DecoratedKey;
-                                              return (DecoratedKey)kp.key;
-                                          }
-                                      });
+        return indexSummary.getKeys();
     }
 
-    private static List<Pair<Integer,Integer>> 
getSampleIndexesForRanges(List<IndexSummary.KeyPosition> samples, 
Collection<Range<Token>> ranges)
+    private static List<Pair<Integer,Integer>> 
getSampleIndexesForRanges(List<DecoratedKey> samples, Collection<Range<Token>> 
ranges)
     {
         // use the index to determine a minimal section for each range
         List<Pair<Integer,Integer>> positions = new 
ArrayList<Pair<Integer,Integer>>();
@@ -514,7 +506,7 @@ public class SSTableReader extends SSTable
             RowPosition leftPosition = range.left.maxKeyBound();
             RowPosition rightPosition = range.left.maxKeyBound();
 
-            int left = Collections.binarySearch(samples, new 
IndexSummary.KeyPosition(leftPosition, -1));
+            int left = Collections.binarySearch(samples, leftPosition);
             if (left < 0)
                 left = (left + 1) * -1;
             else
@@ -526,7 +518,7 @@ public class SSTableReader extends SSTable
 
             int right = Range.isWrapAround(range.left, range.right)
                       ? samples.size() - 1
-                      : Collections.binarySearch(samples, new 
IndexSummary.KeyPosition(rightPosition, -1));
+                      : Collections.binarySearch(samples, rightPosition);
             if (right < 0)
             {
                 // range are end inclusive so we use the previous index from 
what binarySearch give us
@@ -548,7 +540,7 @@ public class SSTableReader extends SSTable
 
     public Iterable<DecoratedKey> getKeySamples(final Range<Token> range)
     {
-        final List<IndexSummary.KeyPosition> samples = 
indexSummary.getIndexPositions();
+        final List<DecoratedKey> samples = indexSummary.getKeys();
 
         final List<Pair<Integer, Integer>> indexRanges = 
getSampleIndexesForRanges(samples, Collections.singletonList(range));
 
@@ -583,7 +575,7 @@ public class SSTableReader extends SSTable
 
                     public DecoratedKey next()
                     {
-                        RowPosition k = samples.get(idx++).key;
+                        RowPosition k = samples.get(idx++);
                         // the index should only contain valid row key, we 
only allow RowPosition in KeyPosition for search purposes
                         assert k instanceof DecoratedKey;
                         return (DecoratedKey)k;
@@ -674,8 +666,8 @@ public class SSTableReader extends SSTable
         }
 
         // next, see if the sampled index says it's impossible for the key to 
be present
-        IndexSummary.KeyPosition sampledPosition = getIndexScanPosition(key);
-        if (sampledPosition == null)
+        long sampledPosition = getIndexScanPosition(key);
+        if (sampledPosition == -1)
         {
             if (op == Operator.EQ)
                 bloomFilterTracker.addFalsePositive();
@@ -684,7 +676,7 @@ public class SSTableReader extends SSTable
         }
 
         // scan the on-disk index, starting at the nearest sampled position
-        Iterator<FileDataInput> segments = 
ifile.iterator(sampledPosition.indexPosition, INDEX_FILE_BUFFER_BYTES);
+        Iterator<FileDataInput> segments = ifile.iterator(sampledPosition, 
INDEX_FILE_BUFFER_BYTES);
         while (segments.hasNext())
         {
             FileDataInput input = segments.next();

[1/2] git commit: Reduce memory used by primary index sample patch by Radim Kolar and yukim; reviewed by jbellis for CASSANDRA-3743

Reply via email to