krummas commented on code in PR #3299: URL: https://github.com/apache/cassandra/pull/3299#discussion_r1971481820
########## src/java/org/apache/cassandra/db/lifecycle/SSTableIntervalTree.java: ########## @@ -40,21 +39,49 @@ public class SSTableIntervalTree extends IntervalTree<PartitionPosition, SSTable super(intervals); } + private SSTableIntervalTree(Interval<PartitionPosition, SSTableReader>[] minOrder, Interval<PartitionPosition, SSTableReader>[] maxOrder) + { + super(minOrder, maxOrder); + } + + @Override + protected SSTableIntervalTree create(Interval<PartitionPosition, SSTableReader>[] minOrder, Interval<PartitionPosition, SSTableReader>[] maxOrder) + { + return new SSTableIntervalTree(minOrder, maxOrder); + } + public static SSTableIntervalTree empty() { return EMPTY; } - public static SSTableIntervalTree build(Iterable<SSTableReader> sstables) + public static SSTableIntervalTree buildSSTableIntervalTree(Collection<SSTableReader> sstables) { + if (sstables.isEmpty()) + return EMPTY; return new SSTableIntervalTree(buildIntervals(sstables)); } - public static List<Interval<PartitionPosition, SSTableReader>> buildIntervals(Iterable<SSTableReader> sstables) + public static List<Interval<PartitionPosition, SSTableReader>> buildIntervals(Collection<SSTableReader> sstables) { - List<Interval<PartitionPosition, SSTableReader>> intervals = new ArrayList<>(Iterables.size(sstables)); + if (sstables == null || sstables.isEmpty()) + return Collections.emptyList(); + return Arrays.asList(buildIntervalsArray(sstables)); + } + + public static Interval<PartitionPosition, SSTableReader>[] buildIntervalsArray(Collection<SSTableReader> sstables) + { + if (sstables == null || !sstables.isEmpty()) Review Comment: this should be `if (sstables == null || stables.isEmpty())` otherwise we only build empty trees test (though, I would hope there are at least some other tests failing due to the above, since nodes can't start): ``` @Test public void nonEmptyIT() { ColumnFamilyStore cfs = MockSchema.newCFS("st"); SSTableReader sstable1 = MockSchema.sstable(1, 10, 10, cfs); SSTableIntervalTree tree = SSTableIntervalTree.empty(); tree = SSTableIntervalTree.update(tree, Collections.emptyList(), Collections.singleton(sstable1)); SSTableReader sstable2 = MockSchema.sstable(2, 20, 20, cfs); tree = SSTableIntervalTree.update(tree, Collections.emptyList(), Collections.singleton(sstable2)); BufferDecoratedKey search = readerBounds(10); assert tree.search(search).size() == 1; } ``` ########## src/java/org/apache/cassandra/dht/LocalPartitioner.java: ########## @@ -49,9 +55,28 @@ public DecoratedKey decorateKey(ByteBuffer key) return new CachedHashDecoratedKey(getToken(key), key); } - public Token midpoint(Token left, Token right) + /** + * Convert a ByteBuffer containing the most significant of 'sigbytes' bytes + * representing a big-endian magnitude into a BigInteger. + */ + private BigInteger bigForByteBuffer(ByteBuffer buffer, int sigbytes) { - throw new UnsupportedOperationException(); + byte[] b = new byte[sigbytes]; + buffer.get(b, buffer.position(), buffer.remaining()); + return new BigInteger(1, b); + } + + public Token midpoint(Token lt, Token rt) Review Comment: Is this change needed? I don't think it is called ########## src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java: ########## @@ -194,7 +193,7 @@ private BigInteger bigForBytes(byte[] bytes, int sigbytes) * If remainder is true, an additional byte with the high order bit enabled * will be added to the end of the array */ - private byte[] bytesForBig(BigInteger big, int sigbytes, boolean remainder) + public static byte[] bytesForBig(BigInteger big, int sigbytes, boolean remainder) Review Comment: nit; could be package-private ########## src/java/org/apache/cassandra/index/sasi/conf/view/RangeTermTree.java: ########## @@ -23,10 +23,10 @@ import java.util.List; import java.util.Set; +import org.apache.cassandra.db.marshal.AbstractType; Review Comment: unrelated change ########## src/java/org/apache/cassandra/utils/Interval.java: ########## @@ -42,6 +46,21 @@ public static <C, D> Interval<C, D> create(C min, C max, D data) return new Interval(min, max, data); } + + public C midpoint(BiFunction<C, C, C> calculateMidpoint) + { + C midpoint = this.midpoint; + if (midpoint != null) + return midpoint; + midpoint = this.midpoint = calculateMidpoint.apply(min, max); + return midpoint; + } + + public void clearMidpoint() Review Comment: unused ########## src/java/org/apache/cassandra/utils/IntervalTree.java: ########## @@ -172,51 +352,85 @@ public IntervalNode(Collection<I> toBisect) intersectsRight = l; left = null; right = null; + return; + } + + low = minOrder.get(0).min; + high = maxOrder.get(maxOrder.size() - 1).max; + + int totalPoints = minOrder.size() * 2; + int midIndex = totalPoints / 2; + int i = 0, j = 0, count = 0; + while (count < midIndex) Review Comment: this could be just `while (count < minOrder.size())` and remove `totalPoints` and `midIndex` above ########## src/java/org/apache/cassandra/index/sasi/conf/view/PrefixTermTree.java: ########## @@ -22,18 +22,18 @@ import java.util.Map; import java.util.Set; +import com.google.common.collect.Sets; + +import org.apache.cassandra.db.marshal.AbstractType; Review Comment: unrelated change ########## src/java/org/apache/cassandra/utils/IntervalTree.java: ########## @@ -110,6 +159,123 @@ public List<D> search(C point) return search(Interval.<C, D>create(point, point, null)); } + /** + * The input arrays aren't defensively copied and will be sorted. The update method doesn't allow duplicates or elements to be removed + * to be missing and this differs from the constructor which does not duplicate checking at all. + * + * It made more sense for update to be stricter because it is tracking removals and additions explicitly instead of building + * a list from scratch and in the targeted use case of a list of SSTables there are no duplicates. At a given point in time + * an sstable represents exactly one interval (although it may switch via removal and addition as in early open). + */ + public IntervalTree<C, D, I> update(I[] removals, I[] additions) + { + if (removals == null) + removals = (I[])EMPTY_ARRAY; + if (additions == null) + additions = (I[])EMPTY_ARRAY; + + if (removals.length == 0 && additions.length == 0) + { + return this; + } + + Arrays.sort(removals, Interval.<C, D>minOrdering()); + Arrays.sort(additions, Interval.<C, D>minOrdering()); + + for (int i = 1; i < additions.length; i++) + checkState( Interval.<C, D>minOrdering().compare(additions[i], additions[i-1]) != 0, "Duplicate interval in additions %s", additions[i]); + + I[] newByMin = buildUpdatedArray( + intervalsByMinOrder, + removals, + additions, + Interval.<C, D>minOrdering() + ); + + Arrays.sort(removals, Interval.<C, D>maxOrdering()); + Arrays.sort(additions, Interval.<C, D>maxOrdering()); + + I[] newByMax = buildUpdatedArray( + intervalsByMaxOrder, + removals, + additions, + Interval.<C, D>maxOrdering() + ); + + return create(newByMin, newByMax); + } + + @SuppressWarnings("unchecked") + private I[] buildUpdatedArray(I[] existingSorted, + I[] removalsSorted, + I[] additionsSorted, + AsymmetricOrdering<Interval<C, D>, C> cmp) + { + int finalSize = existingSorted.length + additionsSorted.length - removalsSorted.length; + I[] result = (I[]) new Interval[finalSize]; + + int existingIndex = 0; + int removalsIndex = 0; + int additionsIndex = 0; + int resultIndex = 0; + + while (existingIndex < existingSorted.length) + { + I currentExisting = existingSorted[existingIndex]; + + while (removalsIndex < removalsSorted.length + && cmp.compare(removalsSorted[removalsIndex], currentExisting) <= 0) Review Comment: could avoid this double cmp.compare with something like: ``` + int c; while (removalsIndex < removalsSorted.length - && cmp.compare(removalsSorted[removalsIndex], currentExisting) <= 0) + && (c = cmp.compare(removalsSorted[removalsIndex], currentExisting)) <= 0) { - int c = cmp.compare(removalsSorted[removalsIndex], currentExisting); if (c < 0) ``` ########## src/java/org/apache/cassandra/index/sasi/conf/view/View.java: ########## @@ -18,22 +18,30 @@ package org.apache.cassandra.index.sasi.conf.view; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; -import org.apache.cassandra.index.sasi.SSTableIndex; -import org.apache.cassandra.index.sasi.conf.ColumnIndex; -import org.apache.cassandra.index.sasi.plan.Expression; +import com.google.common.collect.Iterables; + import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.index.sasi.SSTableIndex; +import org.apache.cassandra.index.sasi.conf.ColumnIndex; +import org.apache.cassandra.index.sasi.plan.Expression; Review Comment: unrelated ########## src/java/org/apache/cassandra/utils/Interval.java: ########## @@ -42,6 +46,21 @@ public static <C, D> Interval<C, D> create(C min, C max, D data) return new Interval(min, max, data); } + + public C midpoint(BiFunction<C, C, C> calculateMidpoint) Review Comment: unused -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For additional commands, e-mail: pr-h...@cassandra.apache.org