Merge branch 'cassandra-2.0' into cassandra-2.1
Conflicts:
CHANGES.txt
src/java/org/apache/cassandra/db/DeletionTime.java
src/java/org/apache/cassandra/db/RangeTombstone.java
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/655f0569
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/655f0569
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/655f0569
Branch: refs/heads/trunk
Commit: 655f0569874b7f1997214cb9fe0bda64c7cdf0d5
Parents: b9a89a3 b0dbea3
Author: Sylvain Lebresne <[email protected]>
Authored: Wed Jun 3 14:22:44 2015 +0200
Committer: Sylvain Lebresne <[email protected]>
Committed: Wed Jun 3 14:22:44 2015 +0200
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../org/apache/cassandra/db/DeletionTime.java | 5 +
.../org/apache/cassandra/db/RangeTombstone.java | 194 +++++++++++++------
.../db/compaction/LazilyCompactedRow.java | 3 +
4 files changed, 148 insertions(+), 55 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 57bbfcc,16ce060..f5c3b41
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,38 -1,5 +1,39 @@@
-2.0.16:
+2.1.6
+ * Ensure truncate without snapshot cannot produce corrupt responses
(CASSANDRA-9388)
+ * Consistent error message when a table mixes counter and non-counter
+ columns (CASSANDRA-9492)
+ * Avoid getting unreadable keys during anticompaction (CASSANDRA-9508)
+ * (cqlsh) Better float precision by default (CASSANDRA-9224)
+ * Improve estimated row count (CASSANDRA-9107)
+ * Optimize range tombstone memory footprint (CASSANDRA-8603)
+ * Use configured gcgs in anticompaction (CASSANDRA-9397)
+ * Warn on misuse of unlogged batches (CASSANDRA-9282)
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
+ * Add utility class to support for rate limiting a given log statement
(CASSANDRA-9029)
+ * Add missing consistency levels to cassandra-stess (CASSANDRA-9361)
+ * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339)
+ * Fix for harmless exceptions logged as ERROR (CASSANDRA-8564)
+ * Delete processed sstables in sstablesplit/sstableupgrade (CASSANDRA-8606)
+ * Improve sstable exclusion from partition tombstones (CASSANDRA-9298)
+ * Validate the indexed column rather than the cell's contents for 2i
(CASSANDRA-9057)
+ * Add support for top-k custom 2i queries (CASSANDRA-8717)
+ * Fix error when dropping table during compaction (CASSANDRA-9251)
+ * cassandra-stress supports validation operations over user profiles
(CASSANDRA-8773)
+ * Add support for rate limiting log messages (CASSANDRA-9029)
+ * Log the partition key with tombstone warnings (CASSANDRA-8561)
+ * Reduce runWithCompactionsDisabled poll interval to 1ms (CASSANDRA-9271)
+ * Fix PITR commitlog replay (CASSANDRA-9195)
+ * GCInspector logs very different times (CASSANDRA-9124)
+ * Fix deleting from an empty list (CASSANDRA-9198)
+ * Update tuple and collection types that use a user-defined type when that
UDT
+ is modified (CASSANDRA-9148, CASSANDRA-9192)
+ * Use higher timeout for prepair and snapshot in repair (CASSANDRA-9261)
+ * Fix anticompaction blocking ANTI_ENTROPY stage (CASSANDRA-9151)
+ * Repair waits for anticompaction to finish (CASSANDRA-9097)
+ * Fix streaming not holding ref when stream error (CASSANDRA-9295)
+ * Fix canonical view returning early opened SSTables (CASSANDRA-9396)
+Merged from 2.0:
+ * Don't accumulate more range than necessary in RangeTombstone.Tracker
(CASSANDRA-9486)
* Add broadcast and rpc addresses to system.local (CASSANDRA-9436)
* Always mark sstable suspect when corrupted (CASSANDRA-9478)
* Add database users and permissions to CQL3 documentation (CASSANDRA-7558)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/src/java/org/apache/cassandra/db/DeletionTime.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/DeletionTime.java
index 0e5f13f,b39d681..7165417
--- a/src/java/org/apache/cassandra/db/DeletionTime.java
+++ b/src/java/org/apache/cassandra/db/DeletionTime.java
@@@ -113,19 -109,25 +113,24 @@@ public class DeletionTime implements Co
return localDeletionTime < gcBefore;
}
- public boolean isDeleted(Column column)
+ public boolean isDeleted(OnDiskAtom atom)
{
- return column.timestamp() <= markedForDeleteAt;
+ return atom.timestamp() <= markedForDeleteAt;
}
+ public boolean supersedes(DeletionTime dt)
+ {
+ return this.markedForDeleteAt > dt.markedForDeleteAt;
+ }
+
- public long memorySize()
+ public long unsharedHeapSize()
{
- long fields = TypeSizes.NATIVE.sizeof(markedForDeleteAt) +
TypeSizes.NATIVE.sizeof(localDeletionTime);
- return ObjectSizes.getFieldSize(fields);
+ return EMPTY_SIZE;
}
- private static class Serializer implements ISerializer<DeletionTime>
+ public static class Serializer implements ISerializer<DeletionTime>
{
- public void serialize(DeletionTime delTime, DataOutput out) throws
IOException
+ public void serialize(DeletionTime delTime, DataOutputPlus out)
throws IOException
{
out.writeInt(delTime.localDeletionTime);
out.writeLong(delTime.markedForDeleteAt);
http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/src/java/org/apache/cassandra/db/RangeTombstone.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/RangeTombstone.java
index feeadbb,fe9da20..590b005
--- a/src/java/org/apache/cassandra/db/RangeTombstone.java
+++ b/src/java/org/apache/cassandra/db/RangeTombstone.java
@@@ -94,40 -114,57 +94,62 @@@ public class RangeTombstone extends Int
return comparator.compare(min, rt.min) <= 0 &&
comparator.compare(max, rt.max) >= 0;
}
+ public boolean includes(Comparator<Composite> comparator, Composite name)
+ {
+ return comparator.compare(name, min) >= 0 && comparator.compare(name,
max) <= 0;
+ }
+
+ /**
+ * Tracks opened RangeTombstones when iterating over a partition.
+ * <p>
+ * This tracker must be provided all the atoms of a given partition in
+ * order (to the {@code update} method). Given this, it keeps enough
+ * information to be able to decide if one of an atom is deleted
(shadowed)
+ * by a previously open RT. One the tracker can prove a given range
+ * tombstone cannot be useful anymore (that is, as soon as we've seen an
+ * atom that is after the end of that RT), it discards this RT. In other
+ * words, the maximum memory used by this object should be proportional to
+ * the maximum number of RT that can be simultaneously open (and this
+ * should fairly low in practice).
+ */
public static class Tracker
{
- private final Comparator<ByteBuffer> comparator;
+ private final Comparator<Composite> comparator;
- private final Deque<RangeTombstone> ranges = new
ArrayDeque<RangeTombstone>();
- private final SortedSet<RangeTombstone> maxOrderingSet = new
TreeSet<RangeTombstone>(new Comparator<RangeTombstone>()
- {
- public int compare(RangeTombstone t1, RangeTombstone t2)
- {
- return comparator.compare(t1.max, t2.max);
- }
- });
- public final Set<RangeTombstone> expired = new
HashSet<RangeTombstone>();
+
+ // A list the currently open RTs. We keep the list sorted in order of
growing end bounds as for a
+ // new atom, this allows to efficiently find the RTs that are now
useless (if any). Also note that because
+ // atom are passed to the tracker in order, any RT that is tracked
can be assumed as opened, i.e. we
+ // never have to test the RTs start since it's always assumed to be
less than what we have.
+ // Also note that this will store expired RTs (#7810). Those will be
of type ExpiredRangeTombstone and
+ // will be ignored by writeOpenedMarker.
+ private final List<RangeTombstone> openedTombstones = new
LinkedList<RangeTombstone>();
+
+ // Total number of atoms written by writeOpenedMarker().
private int atomCount;
+ /**
+ * Creates a new tracker given the table comparator.
+ *
+ * @param comparator the comparator for the table this will track
atoms
+ * for. The tracker assumes that atoms will be later provided to the
+ * tracker in {@code comparator} order.
+ */
- public Tracker(Comparator<ByteBuffer> comparator)
+ public Tracker(Comparator<Composite> comparator)
{
this.comparator = comparator;
}
/**
- * Compute RangeTombstone that are needed at the beginning of an index
+ * Computes the RangeTombstone that are needed at the beginning of an
index
* block starting with {@code firstColumn}.
- * Returns the total serialized size of said tombstones and write them
- * to {@code out} it if isn't null.
+ *
+ * @return the total serialized size of said tombstones and write
them to
+ * {@code out} it if isn't null.
*/
- public long writeOpenedMarker(OnDiskAtom firstColumn, DataOutput out,
OnDiskAtom.Serializer atomSerializer) throws IOException
+ public long writeOpenedMarker(OnDiskAtom firstColumn, DataOutputPlus
out, OnDiskAtom.Serializer atomSerializer) throws IOException
{
long size = 0;
- if (ranges.isEmpty())
+ if (openedTombstones.isEmpty())
return size;
/*
@@@ -228,16 -290,41 +275,41 @@@
}
}
+ /**
+ * Adds the provided {@code tombstone} _before_ the last element
returned by {@code iterator.next()}.
+ * <p>
+ * This method assumes that {@code iterator.next()} has been called
prior to this method call, i.e. that
+ * {@code iterator.hasPrevious() == true}.
+ */
+ private static void insertBefore(RangeTombstone tombstone,
ListIterator<RangeTombstone> iterator)
+ {
+ assert iterator.hasPrevious();
+ iterator.previous();
+ iterator.add(tombstone);
+ iterator.next();
+ }
+
+ /**
+ * Tests if the provided column is deleted by one of the tombstone
+ * tracked by this tracker.
+ * <p>
+ * This method should be called on columns in the same order than for
the update()
+ * method. Note that this method does not update the tracker so the
update() method
+ * should still be called on {@code column} (it doesn't matter if
update is called
+ * before or after this call).
+ */
- public boolean isDeleted(Column column)
+ public boolean isDeleted(Cell cell)
{
- for (RangeTombstone tombstone : ranges)
+ // We know every tombstone kept are "open", start before the
column. So the
+ // column is deleted if any of the tracked tombstone ends after
the column
+ // (this will be the case of every RT if update() has been called
before this
+ // method, but we might have a few RT to skip otherwise) and the
RT deletion is
+ // actually more recent than the column timestamp.
+ for (RangeTombstone tombstone : openedTombstones)
{
- if (comparator.compare(cell.name(), tombstone.min) >= 0
- && comparator.compare(cell.name(), tombstone.max) <= 0
- if (comparator.compare(column.name(), tombstone.max) <= 0
- && tombstone.maxTimestamp() >= column.timestamp())
++ if (comparator.compare(cell.name(), tombstone.max) <= 0
+ && tombstone.timestamp() >= cell.timestamp())
- {
return true;
- }
}
return false;
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/655f0569/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
index 56a4ede,43801c6..941557b
--- a/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
+++ b/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java
@@@ -291,29 -290,23 +291,32 @@@ public class LazilyCompactedRow extend
{
// when we clear() the container, it removes the deletion
info, so this needs to be reset each time
container.delete(maxRowTombstone);
- ColumnFamily purged = PrecompactedRow.removeDeleted(key,
shouldPurge, controller, container);
- if (purged == null || !purged.iterator().hasNext())
+ Iterator<Cell> iter = container.iterator();
+ Cell c = iter.next();
+ boolean shouldPurge = c.getLocalDeletionTime() <
Integer.MAX_VALUE && c.timestamp() < getMaxPurgeableTimestamp();
+ removeDeleted(container, shouldPurge, key, controller);
+ iter = container.iterator();
+ if (!iter.hasNext())
{
// don't call clear() because that resets the deletion
time. See CASSANDRA-7808.
- container =
ArrayBackedSortedColumns.factory.create(emptyColumnFamily.metadata());;
+ container =
ArrayBackedSortedColumns.factory.create(emptyColumnFamily.metadata());
return null;
}
- Column reduced = purged.iterator().next();
+
+ int localDeletionTime =
container.deletionInfo().getTopLevelDeletion().localDeletionTime;
+ if (localDeletionTime < Integer.MAX_VALUE)
+ tombstones.update(localDeletionTime);
+
+ Cell reduced = iter.next();
container =
ArrayBackedSortedColumns.factory.create(emptyColumnFamily.metadata());
- // PrecompactedRow.removeDeleted has only checked the
top-level CF deletion times,
- // not the range tombstones. For that we use the
columnIndexer tombstone tracker.
+ // removeDeleted have only checked the top-level CF deletion
times,
+ // not the range tombstone. For that we use the columnIndexer
tombstone tracker.
if (indexBuilder.tombstoneTracker().isDeleted(reduced))
{
+ // We skip that column so it won't be passed to the
tracker by the index builded. So pass it now to
+ // make sure we still discard potentially un-needed RT as
soon as possible.
+ indexBuilder.tombstoneTracker().update(reduced, false);
indexer.remove(reduced);
return null;
}