Repository: cassandra Updated Branches: refs/heads/trunk c0400b312 -> f713be4aa
Fix reading 2.1 sstable post-9705 (CASSANDR-9705 followup) Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f713be4a Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f713be4a Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f713be4a Branch: refs/heads/trunk Commit: f713be4aa2c80c1a72927a74c19c3adfafb55994 Parents: c0400b3 Author: Sylvain Lebresne <[email protected]> Authored: Thu Jul 23 12:56:44 2015 +0200 Committer: Sylvain Lebresne <[email protected]> Committed: Thu Jul 23 14:38:50 2015 +0200 ---------------------------------------------------------------------- src/java/org/apache/cassandra/db/Keyspace.java | 1 - .../org/apache/cassandra/db/LegacyLayout.java | 92 ++++++++++++++++++-- .../cassandra/db/UnfilteredDeserializer.java | 12 ++- .../cassandra/db/rows/ArrayBackedRow.java | 2 +- .../cassandra/schema/LegacySchemaMigrator.java | 5 +- 5 files changed, 99 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/f713be4a/src/java/org/apache/cassandra/db/Keyspace.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/Keyspace.java b/src/java/org/apache/cassandra/db/Keyspace.java index 9d3b178..07f3e6f 100644 --- a/src/java/org/apache/cassandra/db/Keyspace.java +++ b/src/java/org/apache/cassandra/db/Keyspace.java @@ -395,7 +395,6 @@ public class Keyspace replayPosition = CommitLog.instance.add(mutation); } - DecoratedKey key = mutation.key(); for (PartitionUpdate upd : mutation.getPartitionUpdates()) { ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().cfId); http://git-wip-us.apache.org/repos/asf/cassandra/blob/f713be4a/src/java/org/apache/cassandra/db/LegacyLayout.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/LegacyLayout.java b/src/java/org/apache/cassandra/db/LegacyLayout.java index 8242ab7..dc13700 100644 --- a/src/java/org/apache/cassandra/db/LegacyLayout.java +++ b/src/java/org/apache/cassandra/db/LegacyLayout.java @@ -140,7 +140,16 @@ public abstract class LegacyLayout ByteBuffer column = metadata.isCompound() ? CompositeType.extractComponent(cellname, metadata.comparator.size()) : cellname; if (column == null) + { + // 2ndary indexes tables used to be compound but dense, but we've transformed then into regular tables + // (non compact ones) but with no regular column (i.e. we only care about the clustering). So we'll get here + // in that case, and what we want to return is basically a row marker. + if (metadata.partitionColumns().isEmpty()) + return new LegacyCellName(clustering, null, null); + + // Otherwise, we shouldn't get there throw new IllegalArgumentException("No column name component found in cell name"); + } // Row marker, this is ok if (!column.hasRemaining()) @@ -345,6 +354,7 @@ public abstract class LegacyLayout columnsToFetch.add(column.name.bytes); Row.Builder builder = ArrayBackedRow.unsortedBuilder(statics, FBUtilities.nowInSeconds()); + builder.newRow(Clustering.STATIC_CLUSTERING); boolean foundOne = false; LegacyAtom atom; @@ -416,7 +426,7 @@ public abstract class LegacyLayout } }; List<Iterator<LegacyAtom>> iterators = Arrays.asList(asLegacyAtomIterator(cells), asLegacyAtomIterator(delInfo.inRowRangeTombstones())); - Iterator<LegacyAtom> merged = MergeIterator.get(iterators, grouper.metadata.comparator, reducer); + Iterator<LegacyAtom> merged = MergeIterator.get(iterators, legacyAtomComparator(grouper.metadata), reducer); final PeekingIterator<LegacyAtom> atoms = Iterators.peekingIterator(merged); return new AbstractIterator<Row>() @@ -602,6 +612,74 @@ public abstract class LegacyLayout }; } + private static Comparator<LegacyAtom> legacyAtomComparator(CFMetaData metadata) + { + return (o1, o2) -> + { + // First we want to compare by clustering, but we have to be careful with range tombstone, because + // we can have collection deletion and we want those to sort properly just before the column they + // delete, not before the whole row. + ClusteringPrefix c1 = o1.clustering(); + ClusteringPrefix c2 = o2.clustering(); + + int clusteringComparison; + if (c1.size() != c2.size() || (o1.isCell() == o2.isCell())) + { + clusteringComparison = metadata.comparator.compare(c1, c2); + } + else + { + // one is a cell and one is a range tombstone, and both have the same prefix size (that is, the + // range tombstone is either a row deletion or a collection deletion). + LegacyRangeTombstone rt = o1.isCell() ? o2.asRangeTombstone() : o1.asRangeTombstone(); + clusteringComparison = rt.isCollectionTombstone() + ? 0 + : metadata.comparator.compare(c1, c2); + } + + // Note that if both are range tombstones and have the same clustering, then they are equal. + if (clusteringComparison != 0) + return clusteringComparison; + + if (o1.isCell()) + { + LegacyCell cell1 = o1.asCell(); + if (o2.isCell()) + { + LegacyCell cell2 = o2.asCell(); + // Check for row marker cells + if (cell1.name.column == null) + return cell2.name.column == null ? 0 : -1; + return cell2.name.column == null ? 1 : cell1.name.column.compareTo(cell2.name.column); + } + + LegacyRangeTombstone rt2 = o2.asRangeTombstone(); + assert rt2.isCollectionTombstone(); // otherwise, we shouldn't have got a clustering equality + if (cell1.name.column == null) + return -1; + int cmp = cell1.name.column.compareTo(rt2.start.collectionName); + // If both are for the same column, then the RT should come first + return cmp == 0 ? 1 : cmp; + } + else + { + assert o2.isCell(); + LegacyCell cell2 = o2.asCell(); + + LegacyRangeTombstone rt1 = o1.asRangeTombstone(); + assert rt1.isCollectionTombstone(); // otherwise, we shouldn't have got a clustering equality + + if (cell2.name.column == null) + return 1; + + int cmp = rt1.start.collectionName.compareTo(cell2.name.column); + // If both are for the same column, then the RT should come first + return cmp == 0 ? -1 : cmp; + } + }; + } + + public static LegacyAtom readLegacyAtom(CFMetaData metadata, DataInputPlus in, boolean readAllAsDynamic) throws IOException { while (true) @@ -768,14 +846,10 @@ public abstract class LegacyLayout public boolean addCell(LegacyCell cell) { - if (isStatic) - { - if (cell.name.clustering != Clustering.STATIC_CLUSTERING) - return false; - } - else if (clustering == null) + if (clustering == null) { clustering = cell.name.clustering; + assert !isStatic || clustering == Clustering.STATIC_CLUSTERING; builder.newRow(clustering); } else if (!clustering.equals(cell.name.clustering)) @@ -941,7 +1015,7 @@ public abstract class LegacyLayout } } - public interface LegacyAtom extends Clusterable + public interface LegacyAtom { public boolean isCell(); @@ -1259,7 +1333,7 @@ public abstract class LegacyLayout while (iter.hasNext()) { LegacyRangeTombstone tombstone = iter.next(); - if (metadata.comparator.compare(atom, tombstone.stop.bound) >= 0) + if (metadata.comparator.compare(atom.clustering(), tombstone.stop.bound) >= 0) iter.remove(); } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/f713be4a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java index b3709d2..36a372f 100644 --- a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java +++ b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java @@ -340,7 +340,17 @@ public abstract class UnfilteredDeserializer public int compareNextTo(Slice.Bound bound) throws IOException { checkReady(); - return metadata.comparator.compare(nextAtom, bound); + int cmp = metadata.comparator.compare(nextAtom.clustering(), bound); + if (cmp != 0 || nextAtom.isCell() || !nextIsRow()) + return cmp; + + // Comparing the clustering of the LegacyAtom to the bound work most of the time. There is the case + // of LegacyRangeTombstone that are either a collectionTombstone or a rowDeletion. In those case, their + // clustering will be the inclusive start of the row they are a tombstone for, which can be equal to + // the slice bound. But we don't want to return equality because the LegacyTombstone should stand for + // it's row and should sort accordingly. This matter particularly because SSTableIterator will skip + // equal results for the start bound (see SSTableIterator.handlePreSliceData for details). + return bound.isStart() ? 1 : -1; } public boolean nextIsRow() throws IOException http://git-wip-us.apache.org/repos/asf/cassandra/blob/f713be4a/src/java/org/apache/cassandra/db/rows/ArrayBackedRow.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/rows/ArrayBackedRow.java b/src/java/org/apache/cassandra/db/rows/ArrayBackedRow.java index 016b4fa..12b23e1 100644 --- a/src/java/org/apache/cassandra/db/rows/ArrayBackedRow.java +++ b/src/java/org/apache/cassandra/db/rows/ArrayBackedRow.java @@ -510,7 +510,7 @@ public class ArrayBackedRow extends AbstractRow public void newRow(Clustering clustering) { - assert cells.isEmpty(); // Ensures we've properly called build() if we've use this builder before + assert this.clustering == null; // Ensures we've properly called build() if we've use this builder before this.clustering = clustering; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/f713be4a/src/java/org/apache/cassandra/schema/LegacySchemaMigrator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/schema/LegacySchemaMigrator.java b/src/java/org/apache/cassandra/schema/LegacySchemaMigrator.java index 50644bb..f554ffb 100644 --- a/src/java/org/apache/cassandra/schema/LegacySchemaMigrator.java +++ b/src/java/org/apache/cassandra/schema/LegacySchemaMigrator.java @@ -457,7 +457,10 @@ public final class LegacySchemaMigrator kind = ColumnDefinition.Kind.STATIC; Integer componentIndex = null; - if (row.has("component_index")) + // Note that the component_index is not useful for non-primary key parts (it never really in fact since there is + // no particular ordering of non-PK columns, we only used to use it as a simplification but that's not needed + // anymore) + if (kind.isPrimaryKeyKind() && row.has("component_index")) componentIndex = row.getInt("component_index"); // Note: we save the column name as string, but we should not assume that it is an UTF8 name, we
