Merge branch 'cassandra-2.0' into cassandra-2.1

Conflicts:
        CHANGES.txt
        src/java/org/apache/cassandra/db/DeletionTime.java
        src/java/org/apache/cassandra/db/RangeTombstone.java


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/655f0569
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/655f0569
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/655f0569

Branch: refs/heads/trunk
Commit: 655f0569874b7f1997214cb9fe0bda64c7cdf0d5
Parents: b9a89a3 b0dbea3
Author: Sylvain Lebresne <sylv...@datastax.com>
Authored: Wed Jun 3 14:22:44 2015 +0200
Committer: Sylvain Lebresne <sylv...@datastax.com>
Committed: Wed Jun 3 14:22:44 2015 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../org/apache/cassandra/db/DeletionTime.java   |   5 +
 .../org/apache/cassandra/db/RangeTombstone.java | 194 +++++++++++++------
 .../db/compaction/LazilyCompactedRow.java       |   3 +
 4 files changed, 148 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 57bbfcc,16ce060..f5c3b41
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,38 -1,5 +1,39 @@@
 -2.0.16:
 +2.1.6
 + * Ensure truncate without snapshot cannot produce corrupt responses 
(CASSANDRA-9388) 
 + * Consistent error message when a table mixes counter and non-counter
 +   columns (CASSANDRA-9492)
 + * Avoid getting unreadable keys during anticompaction (CASSANDRA-9508)
 + * (cqlsh) Better float precision by default (CASSANDRA-9224)
 + * Improve estimated row count (CASSANDRA-9107)
 + * Optimize range tombstone memory footprint (CASSANDRA-8603)
 + * Use configured gcgs in anticompaction (CASSANDRA-9397)
 + * Warn on misuse of unlogged batches (CASSANDRA-9282)
 + * Failure detector detects and ignores local pauses (CASSANDRA-9183)
 + * Add utility class to support for rate limiting a given log statement 
(CASSANDRA-9029)
 + * Add missing consistency levels to cassandra-stess (CASSANDRA-9361)
 + * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339)
 + * Fix for harmless exceptions logged as ERROR (CASSANDRA-8564)
 + * Delete processed sstables in sstablesplit/sstableupgrade (CASSANDRA-8606)
 + * Improve sstable exclusion from partition tombstones (CASSANDRA-9298)
 + * Validate the indexed column rather than the cell's contents for 2i 
(CASSANDRA-9057)
 + * Add support for top-k custom 2i queries (CASSANDRA-8717)
 + * Fix error when dropping table during compaction (CASSANDRA-9251)
 + * cassandra-stress supports validation operations over user profiles 
(CASSANDRA-8773)
 + * Add support for rate limiting log messages (CASSANDRA-9029)
 + * Log the partition key with tombstone warnings (CASSANDRA-8561)
 + * Reduce runWithCompactionsDisabled poll interval to 1ms (CASSANDRA-9271)
 + * Fix PITR commitlog replay (CASSANDRA-9195)
 + * GCInspector logs very different times (CASSANDRA-9124)
 + * Fix deleting from an empty list (CASSANDRA-9198)
 + * Update tuple and collection types that use a user-defined type when that 
UDT
 +   is modified (CASSANDRA-9148, CASSANDRA-9192)
 + * Use higher timeout for prepair and snapshot in repair (CASSANDRA-9261)
 + * Fix anticompaction blocking ANTI_ENTROPY stage (CASSANDRA-9151)
 + * Repair waits for anticompaction to finish (CASSANDRA-9097)
 + * Fix streaming not holding ref when stream error (CASSANDRA-9295)
 + * Fix canonical view returning early opened SSTables (CASSANDRA-9396)
 +Merged from 2.0:
+  * Don't accumulate more range than necessary in RangeTombstone.Tracker 
(CASSANDRA-9486)
   * Add broadcast and rpc addresses to system.local (CASSANDRA-9436)
   * Always mark sstable suspect when corrupted (CASSANDRA-9478)
   * Add database users and permissions to CQL3 documentation (CASSANDRA-7558)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/src/java/org/apache/cassandra/db/DeletionTime.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/DeletionTime.java
index 0e5f13f,b39d681..7165417
--- a/src/java/org/apache/cassandra/db/DeletionTime.java
+++ b/src/java/org/apache/cassandra/db/DeletionTime.java
@@@ -113,19 -109,25 +113,24 @@@ public class DeletionTime implements Co
          return localDeletionTime < gcBefore;
      }
  
 -    public boolean isDeleted(Column column)
 +    public boolean isDeleted(OnDiskAtom atom)
      {
 -        return column.timestamp() <= markedForDeleteAt;
 +        return atom.timestamp() <= markedForDeleteAt;
      }
  
+     public boolean supersedes(DeletionTime dt)
+     {
+         return this.markedForDeleteAt > dt.markedForDeleteAt;
+     }
+ 
 -    public long memorySize()
 +    public long unsharedHeapSize()
      {
 -        long fields = TypeSizes.NATIVE.sizeof(markedForDeleteAt) + 
TypeSizes.NATIVE.sizeof(localDeletionTime);
 -        return ObjectSizes.getFieldSize(fields);
 +        return EMPTY_SIZE;
      }
  
 -    private static class Serializer implements ISerializer<DeletionTime>
 +    public static class Serializer implements ISerializer<DeletionTime>
      {
 -        public void serialize(DeletionTime delTime, DataOutput out) throws 
IOException
 +        public void serialize(DeletionTime delTime, DataOutputPlus out) 
throws IOException
          {
              out.writeInt(delTime.localDeletionTime);
              out.writeLong(delTime.markedForDeleteAt);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/src/java/org/apache/cassandra/db/RangeTombstone.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/RangeTombstone.java
index feeadbb,fe9da20..590b005
--- a/src/java/org/apache/cassandra/db/RangeTombstone.java
+++ b/src/java/org/apache/cassandra/db/RangeTombstone.java
@@@ -94,40 -114,57 +94,62 @@@ public class RangeTombstone extends Int
          return comparator.compare(min, rt.min) <= 0 && 
comparator.compare(max, rt.max) >= 0;
      }
  
 +    public boolean includes(Comparator<Composite> comparator, Composite name)
 +    {
 +        return comparator.compare(name, min) >= 0 && comparator.compare(name, 
max) <= 0;
 +    }
 +
+     /**
+      * Tracks opened RangeTombstones when iterating over a partition.
+      * <p>
+      * This tracker must be provided all the atoms of a given partition in
+      * order (to the {@code update} method). Given this, it keeps enough
+      * information to be able to decide if one of an atom is deleted 
(shadowed)
+      * by a previously open RT. One the tracker can prove a given range
+      * tombstone cannot be useful anymore (that is, as soon as we've seen an
+      * atom that is after the end of that RT), it discards this RT. In other
+      * words, the maximum memory used by this object should be proportional to
+      * the maximum number of RT that can be simultaneously open (and this
+      * should fairly low in practice).
+      */
      public static class Tracker
      {
 -        private final Comparator<ByteBuffer> comparator;
 +        private final Comparator<Composite> comparator;
-         private final Deque<RangeTombstone> ranges = new 
ArrayDeque<RangeTombstone>();
-         private final SortedSet<RangeTombstone> maxOrderingSet = new 
TreeSet<RangeTombstone>(new Comparator<RangeTombstone>()
-         {
-             public int compare(RangeTombstone t1, RangeTombstone t2)
-             {
-                 return comparator.compare(t1.max, t2.max);
-             }
-         });
-         public final Set<RangeTombstone> expired = new 
HashSet<RangeTombstone>();
+ 
+         // A list the currently open RTs. We keep the list sorted in order of 
growing end bounds as for a
+         // new atom, this allows to efficiently find the RTs that are now 
useless (if any). Also note that because
+         // atom are passed to the tracker in order, any RT that is tracked 
can be assumed as opened, i.e. we
+         // never have to test the RTs start since it's always assumed to be 
less than what we have.
+         // Also note that this will store expired RTs (#7810). Those will be 
of type ExpiredRangeTombstone and
+         // will be ignored by writeOpenedMarker.
+         private final List<RangeTombstone> openedTombstones = new 
LinkedList<RangeTombstone>();
+ 
+         // Total number of atoms written by writeOpenedMarker().
          private int atomCount;
  
+         /**
+          * Creates a new tracker given the table comparator.
+          *
+          * @param comparator the comparator for the table this will track 
atoms
+          * for. The tracker assumes that atoms will be later provided to the
+          * tracker in {@code comparator} order.
+          */
 -        public Tracker(Comparator<ByteBuffer> comparator)
 +        public Tracker(Comparator<Composite> comparator)
          {
              this.comparator = comparator;
          }
  
          /**
-          * Compute RangeTombstone that are needed at the beginning of an index
+          * Computes the RangeTombstone that are needed at the beginning of an 
index
           * block starting with {@code firstColumn}.
-          * Returns the total serialized size of said tombstones and write them
-          * to {@code out} it if isn't null.
+          *
+          * @return the total serialized size of said tombstones and write 
them to
+          * {@code out} it if isn't null.
           */
 -        public long writeOpenedMarker(OnDiskAtom firstColumn, DataOutput out, 
OnDiskAtom.Serializer atomSerializer) throws IOException
 +        public long writeOpenedMarker(OnDiskAtom firstColumn, DataOutputPlus 
out, OnDiskAtom.Serializer atomSerializer) throws IOException
          {
              long size = 0;
-             if (ranges.isEmpty())
+             if (openedTombstones.isEmpty())
                  return size;
  
              /*
@@@ -228,16 -290,41 +275,41 @@@
              }
          }
  
+         /**
+          * Adds the provided {@code tombstone} _before_ the last element 
returned by {@code iterator.next()}.
+          * <p>
+          * This method assumes that {@code iterator.next()} has been called 
prior to this method call, i.e. that
+          * {@code iterator.hasPrevious() == true}.
+          */
+         private static void insertBefore(RangeTombstone tombstone, 
ListIterator<RangeTombstone> iterator)
+         {
+             assert iterator.hasPrevious();
+             iterator.previous();
+             iterator.add(tombstone);
+             iterator.next();
+         }
+ 
+         /**
+          * Tests if the provided column is deleted by one of the tombstone
+          * tracked by this tracker.
+          * <p>
+          * This method should be called on columns in the same order than for 
the update()
+          * method. Note that this method does not update the tracker so the 
update() method
+          * should still be called on {@code column} (it doesn't matter if 
update is called
+          * before or after this call).
+          */
 -        public boolean isDeleted(Column column)
 +        public boolean isDeleted(Cell cell)
          {
-             for (RangeTombstone tombstone : ranges)
+             // We know every tombstone kept are "open", start before the 
column. So the
+             // column is deleted if any of the tracked tombstone ends after 
the column
+             // (this will be the case of every RT if update() has been called 
before this
+             // method, but we might have a few RT to skip otherwise) and the 
RT deletion is
+             // actually more recent than the column timestamp.
+             for (RangeTombstone tombstone : openedTombstones)
              {
-                 if (comparator.compare(cell.name(), tombstone.min) >= 0
-                     && comparator.compare(cell.name(), tombstone.max) <= 0
 -                if (comparator.compare(column.name(), tombstone.max) <= 0
 -                    && tombstone.maxTimestamp() >= column.timestamp())
++                if (comparator.compare(cell.name(), tombstone.max) <= 0
 +                    && tombstone.timestamp() >= cell.timestamp())
-                 {
                      return true;
-                 }
              }
              return false;
          }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
index 56a4ede,43801c6..941557b
--- a/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
+++ b/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
@@@ -291,29 -290,23 +291,32 @@@ public class LazilyCompactedRow extend
              {
                  // when we clear() the container, it removes the deletion 
info, so this needs to be reset each time
                  container.delete(maxRowTombstone);
 -                ColumnFamily purged = PrecompactedRow.removeDeleted(key, 
shouldPurge, controller, container);
 -                if (purged == null || !purged.iterator().hasNext())
 +                Iterator<Cell> iter = container.iterator();
 +                Cell c = iter.next();
 +                boolean shouldPurge = c.getLocalDeletionTime() < 
Integer.MAX_VALUE && c.timestamp() < getMaxPurgeableTimestamp();
 +                removeDeleted(container, shouldPurge, key, controller);
 +                iter = container.iterator();
 +                if (!iter.hasNext())
                  {
                      // don't call clear() because that resets the deletion 
time. See CASSANDRA-7808.
 -                    container = 
ArrayBackedSortedColumns.factory.create(emptyColumnFamily.metadata());;
 +                    container = 
ArrayBackedSortedColumns.factory.create(emptyColumnFamily.metadata());
                      return null;
                  }
 -                Column reduced = purged.iterator().next();
 +
 +                int localDeletionTime = 
container.deletionInfo().getTopLevelDeletion().localDeletionTime;
 +                if (localDeletionTime < Integer.MAX_VALUE)
 +                    tombstones.update(localDeletionTime);
 +
 +                Cell reduced = iter.next();
                  container = 
ArrayBackedSortedColumns.factory.create(emptyColumnFamily.metadata());
  
 -                // PrecompactedRow.removeDeleted has only checked the 
top-level CF deletion times,
 -                // not the range tombstones. For that we use the 
columnIndexer tombstone tracker.
 +                // removeDeleted have only checked the top-level CF deletion 
times,
 +                // not the range tombstone. For that we use the columnIndexer 
tombstone tracker.
                  if (indexBuilder.tombstoneTracker().isDeleted(reduced))
                  {
+                     // We skip that column so it won't be passed to the 
tracker by the index builded. So pass it now to
+                     // make sure we still discard potentially un-needed RT as 
soon as possible.
+                     indexBuilder.tombstoneTracker().update(reduced, false);
                      indexer.remove(reduced);
                      return null;
                  }

Reply via email to