dcapwell commented on code in PR #3574: URL: https://github.com/apache/cassandra/pull/3574#discussion_r1778813778
########## src/java/org/apache/cassandra/tcm/sequences/DropAccordTable.java: ########## @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.sequences; + +import java.io.IOException; +import java.time.Duration; +import java.util.Objects; +import java.util.concurrent.ExecutionException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MultiStepOperation; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.tcm.transformations.FinishDropAccordTable; +import org.apache.cassandra.utils.JVMStabilityInspector; + +import static java.lang.String.format; +import static org.apache.cassandra.tcm.Transformation.Kind.FINISH_DROP_ACCORD_TABLE; +import static org.apache.cassandra.tcm.sequences.SequenceState.continuable; +import static org.apache.cassandra.tcm.sequences.SequenceState.error; +import static org.apache.cassandra.tcm.sequences.SequenceState.halted; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/** + * A slightly atypical implementation as it consists of only a single step. To perform the drop of an + * Accord table, we first commit a PrepareDropAccordTable transformation. Upon enactement, that + * marks the table as pending drop, which blocks any new transactions from being started. It also + * instantiates an instance of this operation and adds it to the set of in progress operations. + * + * The intention is to introduce a barrier which blocks until the Accord service acknowledges that + * it was learned of the epoch in which the table was marked for deletion and that all prior transactions + * are completed. Once this is complete, we can proceed to actually drop the table. The transformation + * which performs that schema modification also removes this MSO from ClusterMetadata's in-flight set. + * This obviates the need to 'advance' this MSO in the way that other implementations with more steps do. + * + */ +public class DropAccordTable extends MultiStepOperation<Epoch> +{ + private static final Logger logger = LoggerFactory.getLogger(DropAccordTable.class); + + public static final Serializer serializer = new Serializer(); + + public final TableReference table; + + /** + * Used by factory method for external callers and by the serializer. + * We don't need to include the serialized FinishDropAccordTable step in the serialization + * of the MSO itself because they have no parameters other than the table reference and so + * we can just construct a new one when we execute it + */ + private DropAccordTable(TableReference table, Epoch latestModification) + { + super(0, latestModification); + this.table = table; + } + + public static DropAccordTable newSequence(TableReference table, Epoch preparedAt) + { + return new DropAccordTable(table, preparedAt); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DropAccordTable that = (DropAccordTable) o; + return latestModification.equals(that.latestModification) + && table.equals(that.table); + } + + @Override + public int hashCode() + { + return Objects.hash(latestModification, table); + } + + @Override + public Kind kind() + { + return Kind.DROP_ACCORD_TABLE; + } + + @Override + protected SequenceKey sequenceKey() + { + return table; + } + + @Override + public MetadataSerializer<? extends SequenceKey> keySerializer() + { + return TableReference.serializer; + } + + @Override + public Transformation.Kind nextStep() + { + return FINISH_DROP_ACCORD_TABLE; + } + + @Override + public SequenceState executeNext() + { + try + { + SequenceState failure = awaitSafeFromAccord(); + if (failure != null) return failure; + } + catch (Throwable t) + { + JVMStabilityInspector.inspectThrowable(t); + logger.warn("Exception while waiting for Accord service to notify all table txns are complete", t); + // this is actually continuable as we can simply retry + return continuable(); + } + try + { + // Now we're satisfied that all Accord txns have finished for the table, + // go ahead and actually drop it + ClusterMetadataService.instance().commit(new FinishDropAccordTable(table)); + return continuable(); + } + catch (Throwable t) + { + JVMStabilityInspector.inspectThrowable(t); + logger.warn("Exception committing finish_drop_accord_table. " + + "Accord service has acknowledged the operation but table remains present in schema", t); + return halted(); + } + } + + private SequenceState awaitSafeFromAccord() throws ExecutionException, InterruptedException + { + // make sure that Accord sees the current epoch, which must necessarily follow the + // one which marked the table as pending drop + ClusterMetadata metadata = ClusterMetadata.current(); + // just for the sake of paranoia, assert that the table is actually marked as being dropped + if (!verifyTableMarked(metadata.schema.getKeyspaces())) + return error(new IllegalStateException(String.format("Table %s is in an invalid state to be dropped", table.toCQLString()))); + + long startNanos = nanoTime(); + AccordService.instance().epochReady(metadata.epoch).get(); + long epochEndNanos = nanoTime(); + + //TODO (correctness, operability): this is more than likely to timeout, so best to "await" an existing txn_id rather than creating a new one... + AccordService.instance().awaitForTableDrop(table.id); + long awaitEndNanos = nanoTime(); + logger.info("Wait for Accord to see the drop table was success. " + + "Took {} to wait for Accord to learn about the change, then {} to process everything", + Duration.ofNanos(epochEndNanos - startNanos), Duration.ofNanos(awaitEndNanos - epochEndNanos)); + return null; + } + + private boolean verifyTableMarked(Keyspaces keyspaces) + { + TableMetadata tm = keyspaces.getTableOrViewNullable(table.id); + if (tm == null) + { + logger.warn("Unable to drop accord table {}, table not found", table); + return false; + } + + if (!tm.params.pendingDrop) + { + logger.warn("Unexpected state, table {} was not marked pending drop", table); + return false; + } + + return true; + } + + @Override + public Transformation.Result applyTo(ClusterMetadata metadata) + { + // note: that this will apply the finish drop transformation to the supplied metadata. It's + // not used to actually execute the MSO, but to determine what the metadata state will/would + // be if it were executed. + return new FinishDropAccordTable(table).execute(metadata); + } + + @Override + public DropAccordTable advance(Epoch epoch) + { + // note: this isn't really used by this MSO impl as it consists of a single step so there's nothing + // to advance. An action of the single step is to remove the MSO from the set of in progress sequences + return new DropAccordTable(this.table, epoch); + } + + @Override + public ProgressBarrier barrier() + { + return ProgressBarrier.immediate(); + } + + public static class TableReference implements SequenceKey, Comparable<TableReference> + { + public static final Serializer serializer = new Serializer(); + + public final String keyspace, name; + public final TableId id; + + public TableReference(String keyspace, String name, TableId id) + { + this.keyspace = normalize(keyspace); Review Comment: there was an issue I never figured out where the string matched but java said that they were not equal... after forcing the strings to be the same charset it started working, but then I switched to trying to find where it happened and it stopped happening... your comment is to drop the strings anyways... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

