aweisberg commented on code in PR #4118: URL: https://github.com/apache/cassandra/pull/4118#discussion_r2076241247
########## src/java/org/apache/cassandra/db/CoordinatorLogBoundaries.java: ########## @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.replication.CoordinatorLogId; +import org.apache.cassandra.replication.MutationId; +import org.apache.cassandra.utils.vint.VIntCoding; + +/** + * Max mutation ID present in this SSTable for each coordinator log, to determine whether an SSTable is reconciled or + * not. Once max mutation IDs are reconciled, next compaction can safely mark this SSTabled as repaired. Note that peers + * may have reconciled all mutations included in an SSTable, but {@link StatsMetadata#repairedAt} is dependent on + * compaction timing, so "nodetool repair --validate" may report temporary disagreements on the repaired set. + * <p> + * Iterable over {@link CoordinatorLogId}. + */ +public abstract class CoordinatorLogBoundaries implements Iterable<Long> +{ + public static class Builder + { + private final MutableCoordinatorLogBoundaries boundaries = new MutableCoordinatorLogBoundaries(); + + public CoordinatorLogBoundaries build() + { + return boundaries; + } + + public void add(MutationId id) + { + boundaries.add(id); + } + + public void addAll(CoordinatorLogBoundaries from) + { + for (long logId : from) + { + MutationId max = from.max(logId); + if (!max.isNone()) + boundaries.add(max); + } + } + } + + public static Builder builder() + { + return new Builder(); + } + + public abstract int maxOffset(long logId); + protected abstract MutationId max(long logId); + protected abstract int size(); + + public static final IVersionedSerializer<CoordinatorLogBoundaries> serializer = new IVersionedSerializer<>() + { + @Override + public void serialize(CoordinatorLogBoundaries boundaries, DataOutputPlus out, int version) throws IOException + { + if (version < MessagingService.VERSION_52) + return; + out.writeUnsignedVInt32(boundaries.size()); + for (long logId : boundaries) + MutationId.serializer.serialize(boundaries.max(logId), out, version); + } + + @Override + public CoordinatorLogBoundaries deserialize(DataInputPlus in, int version) throws IOException + { + int size = in.readUnsignedVInt32(); + Builder builder = CoordinatorLogBoundaries.builder(); + for (int i = 0; i < size; i++) + { + MutationId mutationId = MutationId.serializer.deserialize(in, version); + builder.add(mutationId); + } + return builder.build(); + } + + @Override + public long serializedSize(CoordinatorLogBoundaries boundaries, int version) + { + if (version < MessagingService.VERSION_52) + return 0; + long size = 0; + size += VIntCoding.computeUnsignedVIntSize(boundaries.size()); + for (long logId : boundaries) + size += MutationId.serializer.serializedSize(boundaries.max(logId), version); + return size; + } + }; + + public static final CoordinatorLogBoundaries NONE = new CoordinatorLogBoundaries() Review Comment: I think for this class it's probably not very important, but it's better to instantiate `MutableCoordinatorLogBoundaries` rather then create a new class that implements `CoordinatorLogBoundaries` because it makes call sites for the methods bimorphic instead of monomorphic so they don't inline quite as efficiently. ########## src/java/org/apache/cassandra/db/CoordinatorLogBoundaries.java: ########## @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.replication.CoordinatorLogId; +import org.apache.cassandra.replication.MutationId; +import org.apache.cassandra.utils.vint.VIntCoding; + +/** + * Max mutation ID present in this SSTable for each coordinator log, to determine whether an SSTable is reconciled or + * not. Once max mutation IDs are reconciled, next compaction can safely mark this SSTabled as repaired. Note that peers + * may have reconciled all mutations included in an SSTable, but {@link StatsMetadata#repairedAt} is dependent on + * compaction timing, so "nodetool repair --validate" may report temporary disagreements on the repaired set. + * <p> + * Iterable over {@link CoordinatorLogId}. + */ +public abstract class CoordinatorLogBoundaries implements Iterable<Long> +{ + public static class Builder + { + private final MutableCoordinatorLogBoundaries boundaries = new MutableCoordinatorLogBoundaries(); + + public CoordinatorLogBoundaries build() + { + return boundaries; + } + + public void add(MutationId id) + { + boundaries.add(id); + } + + public void addAll(CoordinatorLogBoundaries from) + { + for (long logId : from) + { + MutationId max = from.max(logId); + if (!max.isNone()) + boundaries.add(max); + } + } + } + + public static Builder builder() + { + return new Builder(); + } + + public abstract int maxOffset(long logId); + protected abstract MutationId max(long logId); + protected abstract int size(); + + public static final IVersionedSerializer<CoordinatorLogBoundaries> serializer = new IVersionedSerializer<>() + { + @Override + public void serialize(CoordinatorLogBoundaries boundaries, DataOutputPlus out, int version) throws IOException + { + if (version < MessagingService.VERSION_52) + return; + out.writeUnsignedVInt32(boundaries.size()); + for (long logId : boundaries) + MutationId.serializer.serialize(boundaries.max(logId), out, version); + } + + @Override + public CoordinatorLogBoundaries deserialize(DataInputPlus in, int version) throws IOException + { + int size = in.readUnsignedVInt32(); Review Comment: Why is there no check here on version? ########## src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java: ########## @@ -46,7 +45,8 @@ public abstract class AbstractMemtable implements Memtable protected final AtomicLong currentOperations = new AtomicLong(0); protected final ColumnsCollector columnsCollector; protected final StatsCollector statsCollector = new StatsCollector(); - protected final MutationIdCollector mutationIdCollector = new MutationIdCollector(); + // TODO: Handle concurrency + protected final MutableCoordinatorLogBoundaries coordinatorLogBoundaries = new MutableCoordinatorLogBoundaries(); Review Comment: Things seem a little mixed up. `AbstractMemtable` has storage for the coordinator log boundaries, and this is used in `SkipListMemtable`, but not in `ShardedSkipListMemtable` or `TrieMemtable`. `ShardedSkipListMemtable` also has a locking version which is a bit bonkers because it has all the non-blocking code and associated overhead in it even when locking. Some LHF performance improvements to be had there, but that is a tomorrow problem. I think the thing to do here is to have `MutableCoordinatorLogBoundaries` just be thread safe and move the copy in `AbstractMemtable` into `SkipListMemtable` since all other versions use the shard. And really once we have sharded versions of these I think an `AtomicReference` for padding is redundant with the sharding so just have `MutableCoordinatorLogBoundaries` extends `NonBlockingHashMapLong`. `NonBlockingHashMap` is quite fast and it's fine to use it where you would use a regular hash map at least according to Cliff Click. ########## src/java/org/apache/cassandra/db/memtable/Memtable.java: ########## @@ -202,6 +202,10 @@ interface Owner /** Size of the data not accounting for any metadata / mapping overheads */ long getLiveDataSize(); + /** Snapshot of the mutation id ranges applied to this memtable */ + @VisibleForTesting Review Comment: Bump this should still be removed ########## src/java/org/apache/cassandra/db/MutableCoordinatorLogBoundaries.java: ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db; + +import java.util.Iterator; +import java.util.Objects; + +import javax.annotation.concurrent.ThreadSafe; + +import org.apache.cassandra.replication.MutationId; +import org.apache.cassandra.replication.ShortMutationId; +import org.jctools.maps.NonBlockingHashMapLong; + +@ThreadSafe +public class MutableCoordinatorLogBoundaries extends CoordinatorLogBoundaries Review Comment: This could extend the actual collection it uses like I suggested earlier and then it's one less indirection to call `max`. I can't see where `max` is used outside of test so I am not sure if it's in a tight loop somewhere that would actually benefit. You can put the mutability into an interface to hide all the methods from the base class that you don't want to expose since it's messy to have all those extra methods. ########## src/java/org/apache/cassandra/db/CoordinatorLogBoundaries.java: ########## @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.replication.CoordinatorLogId; +import org.apache.cassandra.replication.MutationId; +import org.apache.cassandra.utils.vint.VIntCoding; + +/** + * Max mutation ID present in this SSTable for each coordinator log, to determine whether an SSTable is reconciled or + * not. Once max mutation IDs are reconciled, next compaction can safely mark this SSTabled as repaired. Note that peers + * may have reconciled all mutations included in an SSTable, but {@link StatsMetadata#repairedAt} is dependent on + * compaction timing, so "nodetool repair --validate" may report temporary disagreements on the repaired set. + * <p> + * Iterable over {@link CoordinatorLogId}. + */ +public abstract class CoordinatorLogBoundaries implements Iterable<Long> Review Comment: I think this makes more sense as an interface because that frees implementing classes to extend something else. ########## src/java/org/apache/cassandra/db/memtable/ShardedSkipListMemtable.java: ########## @@ -351,7 +357,7 @@ static class MemtableShard private final ColumnsCollector columnsCollector; private final StatsCollector statsCollector; - private final MutationIdCollector mutationIdCollector; + private final MutableCoordinatorLogBoundaries coordinatorLogBoundaries = new MutableCoordinatorLogBoundaries(); Review Comment: This is used mutably and then needs to be effectively immutable. The mutable reference should be set to `null` before it is used in a way that needs it to be immutable. So have a reference to the immutable version that with private visibility and a reference to the mutable version and a getter for the immutable version that will null out the mutable version when it is called. ########## src/java/org/apache/cassandra/db/memtable/TrieMemtable.java: ########## @@ -452,7 +462,7 @@ static class MemtableShard private final ColumnsCollector columnsCollector; private final StatsCollector statsCollector; - private final MutationIdCollector mutationIdCollector; + private final MutableCoordinatorLogBoundaries coordinatorLogBoundaries = new MutableCoordinatorLogBoundaries(); Review Comment: Same comment as `ShardedSkipListMemtable` where this should be `null` once it starts being shared. ########## src/java/org/apache/cassandra/db/CoordinatorLogBoundaries.java: ########## @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.replication.CoordinatorLogId; +import org.apache.cassandra.replication.MutationId; +import org.apache.cassandra.utils.vint.VIntCoding; + +/** + * Max mutation ID present in this SSTable for each coordinator log, to determine whether an SSTable is reconciled or + * not. Once max mutation IDs are reconciled, next compaction can safely mark this SSTabled as repaired. Note that peers + * may have reconciled all mutations included in an SSTable, but {@link StatsMetadata#repairedAt} is dependent on + * compaction timing, so "nodetool repair --validate" may report temporary disagreements on the repaired set. + * <p> + * Iterable over {@link CoordinatorLogId}. + */ +public abstract class CoordinatorLogBoundaries implements Iterable<Long> +{ + public static class Builder Review Comment: This particular class isn't built that often so a wrapper builder isn't a big deal, but it is a wasted allocation. Since the boundaries returned is the mutable instance anyways it doesn't help with blocking access to mutability which is one of the reasons to use a builder. I know Blake advocated for this approach and it's definitely the idiomatic way to do it when you aren't being picky about performance. ########## src/java/org/apache/cassandra/db/MutableCoordinatorLogBoundaries.java: ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db; + +import java.util.Iterator; +import java.util.Objects; + +import javax.annotation.concurrent.ThreadSafe; + +import org.apache.cassandra.replication.MutationId; +import org.apache.cassandra.replication.ShortMutationId; +import org.jctools.maps.NonBlockingHashMapLong; + +@ThreadSafe +public class MutableCoordinatorLogBoundaries extends CoordinatorLogBoundaries +{ + private static final MutationId NONE = MutationId.none(); + private static final int NONE_OFFSET = NONE.offset(); + + // A replica can only receive writes from another replica it shares ranges with, and tracked writes are executed by + // coordinators, so this should contain up to (2*RF - 1) keys + private final NonBlockingHashMapLong<MutationId> ids = new NonBlockingHashMapLong<>(); + + public void add(MutationId mutationId) + { + long logId = mutationId.logId(); + ids.merge(logId, mutationId, (existing, updating) -> { + if (ShortMutationId.comparator.compare(existing, updating) < 0) + return updating; + return existing; + }); + } + + @Override + public int maxOffset(long logId) + { + MutationId id = ids.get(logId); + return id == null ? NONE_OFFSET : id.offset(); + } + + @Override + protected MutationId max(long logId) + { + return ids.getOrDefault(logId, NONE); + } + + @Override + protected int size() + { + return ids.size(); + } + + @Override + public Iterator<Long> iterator() Review Comment: Why is there a wrapping iterator? Why not return `ids.keySet().iterator()`? ########## src/java/org/apache/cassandra/io/sstable/metadata/MetadataSerializer.java: ########## @@ -123,17 +124,10 @@ public Map<MetadataType, MetadataComponent> deserialize(Descriptor descriptor, E logger.trace("Load metadata for {}", descriptor); File statsFile = descriptor.fileFor(Components.STATS); if (!statsFile.exists()) + throw new NoSuchFileException("Stats component of sstable " + descriptor + " is missing"); Review Comment: This is a major change? What is the reasoning behind this? Does this create scenarios where people can't access data due to missing or corrupted stats files? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For additional commands, e-mail: pr-h...@cassandra.apache.org