bbotella commented on code in PR #125: URL: https://github.com/apache/cassandra-sidecar/pull/125#discussion_r1625078750
########## server-common/src/main/java/org/apache/cassandra/sidecar/common/server/cluster/locator/Token.java: ########## @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.common.server.cluster.locator; + +import java.math.BigInteger; +import java.util.Comparator; +import java.util.Objects; + +import org.jetbrains.annotations.NotNull; + +/** + * Token, i.e. hashed partition key, in Cassandra + */ +public final class Token implements Comparable<Token> +{ + private static final Comparator<Token> TOKEN_COMPARATOR = Comparator.comparing(Token::toBigInteger); + + private final BigInteger value; + private final int hashCode; + + public static Token from(BigInteger value) + { + return new Token(value); + } + + /** + * Create token from its string literal + * @param valueStr token value + * @throws NumberFormatException {@code valueStr} is not a valid representation + * of a BigInteger. + * @return token + */ + public static Token from(String valueStr) + { + return new Token(new BigInteger(valueStr)); + } + + public static Token from(long value) + { + return new Token(BigInteger.valueOf(value)); + } + + private Token(BigInteger value) + { + this.value = value; + this.hashCode = value.hashCode(); + } + + public BigInteger toBigInteger() + { + return value; + } + + public Token increment() + { + return new Token(value.add(BigInteger.ONE)); Review Comment: Do we need to create a new object instead of just incrementing `value` and returning this? ########## server-common/src/main/java/org/apache/cassandra/sidecar/common/server/cluster/locator/TokenRange.java: ########## @@ -75,18 +75,83 @@ public TokenRange(long start, long end) public TokenRange(BigInteger start, BigInteger end) { - this.start = start; - this.end = end; + this(Token.from(start), Token.from(end)); + } + + public TokenRange(Token start, Token end) + { + this.range = Range.openClosed(start, end); + } + + /** + * @return start token. It is not enclosed in the range. + */ + public Token start() + { + return range.lowerEndpoint(); + } + + /** + * @return end token. It is the last token enclosed in the range. + */ + public Token end() + { + return range.upperEndpoint(); + } + + /** + * @return the first token enclosed in the range + */ + @Nullable + public Token firstToken() + { + if (range.isEmpty()) + { + return null; + } + return range.lowerEndpoint().increment(); Review Comment: Curious. Why returning the increment here? Shouldn't it return `range.lowerEndpoint()`? ########## src/main/java/org/apache/cassandra/sidecar/restore/RestoreSliceTask.java: ########## @@ -430,22 +450,79 @@ private Future<File> validateFiles(File directory) } // all files match with the provided checksums - promise.tryComplete(directory); + return directory; }, false); // unordered return StopWatch.measureTimeTaken(future, d -> instanceMetrics.restore().sliceValidationTime.metric.update(d, TimeUnit.NANOSECONDS)); } - private void compareChecksums(Map<String, String> expectedChecksums, File[] files, Promise<?> promise) + // Remove all the SSTables that does not belong this node + // The method modifies the input manifest and delete files under directory, if out of range sstables are found + private void removeOutOfRangeSSTables(File directory, RestoreSliceManifest manifest) throws RestoreJobException, IOException + { + Set<TokenRange> ranges = localTokenRangesProvider.localTokenRanges(slice.keyspace()).get(slice.owner().id()); + if (ranges == null || ranges.isEmpty()) + { + // Note: retry is allowed for the failure + throw new RestoreJobException("Unable to fetch local range, retry later"); + } + + // 1. remove the sstables that are fully out of range + // 2. detect if there is any range that partially overlaps. In that case, signal that this node is required to run nodetool cleanup on job completion + Iterator<Map.Entry<String, RestoreSliceManifest.ManifestEntry>> it = manifest.entrySet().iterator(); + while (it.hasNext()) + { + RestoreSliceManifest.ManifestEntry entry = it.next().getValue(); + // TokenRange is open-closed, hence subtracting one from the rangeStart read from manifest + TokenRange sstableRange = new TokenRange(entry.startToken().subtract(BigInteger.ONE), + entry.endToken()); + + boolean hasOverlap = false; + boolean fullyEnclosed = false; + for (TokenRange owningRange : ranges) + { + if (hasOverlap) + { + break; + } + + hasOverlap = owningRange.overlaps(sstableRange); + + if (hasOverlap) + { + fullyEnclosed = owningRange.encloses(sstableRange); + } + } + + // fully out of range + if (!hasOverlap) + { + // remove the entry from manifest + it.remove(); + // delete the files + for (String fileName : entry.componentsChecksum().keySet()) + { + Path path = directory.toPath().resolve(fileName); + Files.deleteIfExists(path); + } Review Comment: Nit: Maybe extracting this to a private method? It may help with readability (the method is already big) ########## server-common/src/main/java/org/apache/cassandra/sidecar/common/server/cluster/locator/TokenRange.java: ########## @@ -75,18 +75,83 @@ public TokenRange(long start, long end) public TokenRange(BigInteger start, BigInteger end) { - this.start = start; - this.end = end; + this(Token.from(start), Token.from(end)); Review Comment: Should we validate that end is bigger than start? ########## server-common/src/test/java/org/apache/cassandra/sidecar/common/server/cluster/locator/TokenTest.java: ########## @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.common.server.cluster.locator; + +import java.math.BigInteger; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class TokenTest +{ + @Test + void testCreateToken() + { + Token t1 = Token.from(1); + Token t2 = Token.from(BigInteger.ONE); + Token t3 = Token.from("1"); Review Comment: ``` Token t4 = Token.from(1L); ``` ? ########## src/main/java/org/apache/cassandra/sidecar/restore/RestoreJobDiscoverer.java: ########## @@ -264,7 +264,7 @@ private void findSlicesOfRangeAndSubmit(InstanceMetadata instance, RestoreJob re { short bucketId = 0; // TODO: update the implementation to pick proper bucketId restoreSliceDatabaseAccessor - .selectByJobByBucketByTokenRange(restoreJob.jobId, bucketId, range.start, range.end) + .selectByJobByBucketByTokenRange(restoreJob.jobId, bucketId, range.start().toBigInteger(), range.end().toBigInteger()) Review Comment: Should we make `selectByJobByBucketByTokenRange` just accept a `TokenRange range` instead of a `range.start()` and a `range.end()`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

