nvharikrishna commented on code in PR #4149:
URL: https://github.com/apache/cassandra/pull/4149#discussion_r2913723350


##########
src/java/org/apache/cassandra/tools/CMSOfflineTool.java:
##########
@@ -0,0 +1,838 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.dht.IPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.io.util.FileInputStreamPlus;
+import org.apache.cassandra.io.util.FileOutputStreamPlus;
+import org.apache.cassandra.locator.InetAddressAndPort;
+import org.apache.cassandra.locator.MetaStrategy;
+import org.apache.cassandra.locator.Replica;
+import org.apache.cassandra.schema.KeyspaceMetadata;
+import org.apache.cassandra.schema.ReplicationParams;
+import org.apache.cassandra.tcm.ClusterMetadata;
+import org.apache.cassandra.tcm.ClusterMetadataService;
+import org.apache.cassandra.tcm.MultiStepOperation;
+import org.apache.cassandra.tcm.membership.Directory;
+import org.apache.cassandra.tcm.membership.Location;
+import org.apache.cassandra.tcm.membership.NodeAddresses;
+import org.apache.cassandra.tcm.membership.NodeId;
+import org.apache.cassandra.tcm.membership.NodeState;
+import org.apache.cassandra.tcm.membership.NodeVersion;
+import org.apache.cassandra.tcm.ownership.DataPlacement;
+import org.apache.cassandra.tcm.ownership.ReplicaGroups;
+import org.apache.cassandra.tcm.ownership.UniformRangePlacement;
+import org.apache.cassandra.tcm.sequences.BootstrapAndJoin;
+import org.apache.cassandra.tcm.sequences.Move;
+import org.apache.cassandra.tcm.sequences.ReconfigureCMS;
+import org.apache.cassandra.tcm.serialization.VerboseMetadataSerializer;
+import org.apache.cassandra.tcm.serialization.Version;
+import org.apache.cassandra.tcm.transformations.Assassinate;
+import org.apache.cassandra.tcm.transformations.CancelInProgressSequence;
+import org.apache.cassandra.tcm.transformations.PrepareMove;
+import org.apache.cassandra.tcm.transformations.Unregister;
+import org.apache.cassandra.tcm.transformations.UnsafeJoin;
+import org.apache.cassandra.tcm.transformations.cms.PrepareCMSReconfiguration;
+import org.apache.cassandra.utils.FBUtilities;
+
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+
+import static com.google.common.base.Throwables.getStackTraceAsString;
+import static 
org.apache.cassandra.tcm.transformations.cms.PrepareCMSReconfiguration.needsReconfiguration;
+
+/**
+ * Offline tool to print or update cluster metadata stored in a dump file.
+ * <p>
+ * The tool operates entirely offline: it reads a metadata dump file produced 
by
+ * {@code nodetool cms dump} (or equivalent), applies the requested 
transformation,
+ * and writes the result to a new file. The original dump file is never 
modified.
+ * <p>
+ * Run without a subcommand to print usage information.
+ */
+@SuppressWarnings({ "unused", "DefaultAnnotationParam", 
"UseOfSystemOutOrSystemErr" })
+@Command(name = "cmsofflinetool",
+mixinStandardHelpOptions = true,
+description = "Offline tool to print or update cluster metadata dump.",
+subcommands = { CMSOfflineTool.AbortBootstrap.class,
+                CMSOfflineTool.AbortDecommission.class,
+                CMSOfflineTool.AbortMove.class,
+                CMSOfflineTool.AssassinateNode.class,
+                CMSOfflineTool.Describe.class,
+                CMSOfflineTool.ForceJoin.class,
+                CommandLine.HelpCommand.class,
+                CMSOfflineTool.MoveToken.class,
+                CMSOfflineTool.Print.class,
+                CMSOfflineTool.PrintDataPlacements.class,
+                CMSOfflineTool.PrintDirectoryCmd.class,
+                CMSOfflineTool.ResetCMS.class })
+public class CMSOfflineTool implements Runnable
+{
+    private final Output output;
+
+    public CMSOfflineTool(Output output)
+    {
+        this.output = output;
+    }
+
+    public static void main(String[] args) throws IOException
+    {
+        CMSOfflineTool tool = new CMSOfflineTool(new Output(System.out, 
System.err));
+        CommandLine cli = new CommandLine(tool)
+                          
.setColorScheme(CommandLine.Help.defaultColorScheme(CommandLine.Help.Ansi.OFF))
+                          .setExecutionExceptionHandler((ex, cmd, parseResult) 
-> {
+                              cmd.getErr().println("Error: " + 
ex.getMessage());
+                              cmd.getErr().println("-- StackTrace --");
+                              cmd.getErr().println(getStackTraceAsString(ex));
+                              return 2;
+                          });
+        int status = cli.execute(args);
+        System.exit(status);
+    }
+
+    @Override
+    public void run()
+    {
+        CommandLine.usage(this, output.out, CommandLine.Help.Ansi.OFF);
+    }
+
+    public static abstract class ClusterMetadataToolCmd implements Runnable
+    {
+        @Option(names = { "-f", "--file" }, description = "Cluster metadata 
dump file path.", required = true)
+        protected String metadataDumpFile;
+
+        @Option(names = { "-sv", "--serialization-version" }, description = 
"Serialization version to use.")
+        private Version serializationVersion;
+
+        @CommandLine.ParentCommand
+        private CMSOfflineTool parent;
+
+        @Override
+        public void run()
+        {
+            try
+            {
+                execute(parent.output);
+                parent.output.out.flush();
+                parent.output.err.flush();
+            }
+            catch (IOException e)
+            {
+                throw new RuntimeException(e);
+            }
+        }
+
+        protected abstract void execute(Output output) throws IOException;
+
+        public ClusterMetadata parseClusterMetadata() throws IOException
+        {
+            File file = new File(metadataDumpFile);
+            if (!file.exists())
+            {
+                throw new IllegalArgumentException("Cluster metadata dump file 
" + metadataDumpFile + " does not exist.");
+            }
+
+            // Make sure the partitioner we use to manipulate the metadata is 
the same one used to generate it
+            IPartitioner partitioner;
+            try (FileInputStreamPlus fisp = new 
FileInputStreamPlus(metadataDumpFile))
+            {
+                int x = fisp.readUnsignedVInt32();
+                Version version = Version.fromInt(x);
+                partitioner = ClusterMetadata.Serializer.getPartitioner(fisp, 
version);
+            }
+            DatabaseDescriptor.toolInitialization();
+            DatabaseDescriptor.setPartitionerUnsafe(partitioner);
+            ClusterMetadataService.initializeForTools(false);
+
+            return 
ClusterMetadataService.deserializeClusterMetadata(metadataDumpFile);
+        }
+
+        public void writeMetadata(Output output, ClusterMetadata metadata, 
String outputFilePath) throws IOException
+        {
+            Path p = outputFilePath != null ?
+                     Files.createFile(Path.of(outputFilePath)) :
+                     Files.createTempFile("clustermetadata", "dump");
+
+            try (FileOutputStreamPlus out = new FileOutputStreamPlus(p))
+            {
+                VerboseMetadataSerializer.serialize(ClusterMetadata.serializer,
+                                                    metadata,
+                                                    out,
+                                                    
getSerializationVersion(metadata));
+                output.out.println("Updated cluster metadata written to file " 
+ p.toAbsolutePath());
+            }
+        }
+
+        Version getSerializationVersion(ClusterMetadata metadata)
+        {
+            Version currentVersion = 
NodeVersion.CURRENT.serializationVersion();
+            if (serializationVersion != null)
+            {
+                // Not a good idea to write metadata using lower serialization 
version. Ref: CASSANDRA-21174
+                if (currentVersion.isBefore(serializationVersion))
+                {
+                    throw new IllegalArgumentException("Given serialization 
version " + serializationVersion +
+                                                       " is newer compared to 
current version " + currentVersion + '.');
+                }
+
+                return serializationVersion;
+            }
+
+            Version metadataVersion = 
metadata.directory.commonSerializationVersion;
+            if (!currentVersion.isAtLeast(metadataVersion))
+            {
+                throw new IllegalArgumentException("Current version " + 
currentVersion +
+                                                   " is older than version in 
cluster metadata (" +
+                                                   metadataVersion + "). 
Cannot proceed further. " +
+                                                   "Try modifying cluster 
metadata using binaries that support " +
+                                                   "minimum serialization 
version: " + metadataVersion + '.');
+            }
+
+            return metadataVersion;
+        }
+    }
+
+    /**
+     * Cancels a JOIN or REPLACE bootstrap sequence for the given node and 
unregisters it
+     * from the cluster. Use this when a node is stuck in bootstrapping or 
replacement.
+     * Fails if no in-progress sequence exists, or if the sequence is not of 
kind JOIN or REPLACE.
+     */
+    @Command(name = "abortbootstrap",
+    description = "Aborts bootstrap for given node if in progress.")
+    public static class AbortBootstrap extends ClusterMetadataToolCmd
+    {
+        @CommandLine.ArgGroup(exclusive = true, multiplicity = "1")
+        NodeIdentifierOption nodeIdentifierOption;
+
+        @Option(names = { "-o", "--output-file" },
+        description = "Output file path for storing the updated cluster 
metadata.")
+        private String outputFilePath;
+
+        @Override
+        protected void execute(Output output) throws IOException
+        {
+            ClusterMetadata metadata = parseClusterMetadata();
+            NodeId nodeId = nodeIdentifierOption.getNodeId(metadata);
+            if (!metadata.inProgressSequences.contains(nodeId))
+            {
+                throw new IllegalArgumentException("Did not find any sequences 
in progress for node " +
+                                                   
nodeIdentifierOption.getNodeIpOrId() + '.');
+            }
+
+            MultiStepOperation<?> multiStepOperation = 
metadata.inProgressSequences.get(nodeId);
+            MultiStepOperation.Kind sequenceKind = multiStepOperation.kind();
+            if (sequenceKind != MultiStepOperation.Kind.JOIN
+                && sequenceKind != MultiStepOperation.Kind.REPLACE)
+            {
+                throw new IllegalArgumentException("abortbootstrap is not a 
valid operation when sequence of kind " +
+                                                   sequenceKind + " is in 
progress.");
+            }
+
+            CancelInProgressSequence cancelSequence = new 
CancelInProgressSequence(nodeId);
+            ClusterMetadata updatedMetadata = 
cancelSequence.execute(metadata).success().metadata;
+
+            // Cancelling the sequence is not enough, but we need to 
unregister as well
+            Unregister unregister = new Unregister(nodeId, 
EnumSet.of(NodeState.REGISTERED),
+                                                   
ClusterMetadataService.instance().placementProvider());
+
+            updatedMetadata = 
unregister.execute(updatedMetadata).success().metadata;
+
+            writeMetadata(output, updatedMetadata, outputFilePath);
+        }
+    }
+
+    /**
+     * Cancels an in-progress MOVE sequence for the given node, returning it 
to its
+     * pre-move token assignment. Fails if no in-progress sequence exists, or 
if the
+     * sequence is not of kind MOVE.
+     */
+    @Command(name = "abortmove", description = "Aborts in progress move 
sequence for given node.")
+    static class AbortMove extends ClusterMetadataToolCmd
+    {
+        @CommandLine.ArgGroup(exclusive = true, multiplicity = "1")
+        NodeIdentifierOption nodeIdentifierOption;
+
+        @Option(names = { "-o", "--output-file" },
+        description = "Output file path for storing the updated Cluster 
Metadata.")
+        private String outputFilePath;
+
+        @Override
+        protected void execute(Output output) throws IOException
+        {
+            ClusterMetadata metadata = parseClusterMetadata();
+            NodeId nodeId = nodeIdentifierOption.getNodeId(metadata);
+
+            MultiStepOperation<?> multiStepOperation = 
metadata.inProgressSequences.get(nodeId);
+            if (multiStepOperation == null)
+            {
+                throw new IllegalArgumentException("No transformation sequence 
is in progress for " +
+                                                   
nodeIdentifierOption.getNodeIpOrId() + '.');
+            }
+
+            if (multiStepOperation.kind() != MultiStepOperation.Kind.MOVE)
+            {
+                throw new IllegalArgumentException("Multi step operation of 
kind " + multiStepOperation.kind() +
+                                                   " is in progress for node " 
+ nodeIdentifierOption.getNodeIpOrId() +
+                                                   ". Cannot proceed with 
abort move.");
+            }
+
+            CancelInProgressSequence cancelSequence = new 
CancelInProgressSequence(nodeId);
+            ClusterMetadata updatedMetadata = 
cancelSequence.execute(metadata).success().metadata;
+            writeMetadata(output, updatedMetadata, outputFilePath);
+        }
+    }
+
+    /**
+     * Cancels an in-progress LEAVE (decommission) sequence for the given 
node, keeping
+     * it as an active member of the ring. Fails if no in-progress sequence 
exists, or if
+     * the sequence is not of kind LEAVE.
+     */
+    @Command(name = "abortdecommission", description = "Aborts in progress 
decommission sequence for given node.")
+    static class AbortDecommission extends ClusterMetadataToolCmd
+    {
+        @CommandLine.ArgGroup(exclusive = true, multiplicity = "1")
+        NodeIdentifierOption nodeIdentifierOption;
+
+        @Option(names = { "-o", "--output-file" },
+        description = "Output file path for storing the updated Cluster 
Metadata.")
+        private String outputFilePath;
+
+        @Override
+        protected void execute(Output output) throws IOException
+        {
+            ClusterMetadata metadata = parseClusterMetadata();
+            NodeId nodeId = nodeIdentifierOption.getNodeId(metadata);
+
+            MultiStepOperation<?> multiStepOperation = 
metadata.inProgressSequences.get(nodeId);
+            if (multiStepOperation == null)
+            {
+                throw new IllegalArgumentException("No transformation sequence 
is in progress for " +
+                                                   
nodeIdentifierOption.getNodeIpOrId() + '.');
+            }
+
+            if (multiStepOperation.kind() != MultiStepOperation.Kind.LEAVE)
+            {
+                throw new IllegalArgumentException("Multi step operation of 
kind " + multiStepOperation.kind() +
+                                                   " is in progress for node " 
+ nodeIdentifierOption.getNodeIpOrId() +
+                                                   ". Cannot proceed with 
abort decommission.");
+            }
+
+            CancelInProgressSequence cancelSequence = new 
CancelInProgressSequence(nodeId);
+            ClusterMetadata updatedMetadata = 
cancelSequence.execute(metadata).success().metadata;
+            writeMetadata(output, updatedMetadata, outputFilePath);
+        }
+    }
+
+    /**
+     * Removes a node from cluster metadata by applying the {@link 
Assassinate} transformation.
+     * If a MOVE or LEAVE sequence is in progress for the node, it is 
cancelled first.
+     * If the node is a CMS member, it is removed from CMS before 
assassination.
+     * Fails if the node is in a JOIN or REPLACE sequence; use {@code 
abortbootstrap} instead.
+     */
+    @Command(name = "assassinate", description = "Assassinates given node from 
Cluster metadata.")
+    static class AssassinateNode extends ClusterMetadataToolCmd
+    {
+        private final EnumSet<MultiStepOperation.Kind> 
supportedCancelSequences =
+        EnumSet.of(MultiStepOperation.Kind.MOVE, 
MultiStepOperation.Kind.LEAVE);
+        @CommandLine.ArgGroup(exclusive = true, multiplicity = "1")
+        NodeIdentifierOption nodeIdentifierOption;
+        @Option(names = { "-o", "--output-file" },
+        description = "Output file path for storing the updated Cluster 
Metadata.")
+        private String outputFilePath;
+
+        @Override
+        protected void execute(Output output) throws IOException
+        {
+            ClusterMetadata metadata = parseClusterMetadata();
+            NodeId nodeId = nodeIdentifierOption.getNodeId(metadata);
+
+            // Check if there are any in-progress sequences for given node
+            // If any, then cancel the sequence and then assassinate it
+            if (metadata.inProgressSequences.contains(nodeId))
+            {
+                MultiStepOperation<?> multiStepOperation = 
metadata.inProgressSequences.get(nodeId);
+                MultiStepOperation.Kind sequenceKind = 
multiStepOperation.kind();
+                if (!supportedCancelSequences.contains(sequenceKind))
+                {
+                    if (sequenceKind == MultiStepOperation.Kind.JOIN || 
sequenceKind == MultiStepOperation.Kind.REPLACE)
+                    {
+                        throw new IllegalArgumentException("Cannot assassinate 
the node when sequence of kind " +

Review Comment:
   Assassinate is internally calling PrepareLeave, which checks if the node is 
in the JOINED state 
([ref](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/tcm/transformations/PrepareLeave.java#L92)).
 After cancelling the in-progress BootstrapAndJoin/BootstrapAndReplace 
sequence, node is coming to the REGISTERED state. Assassinate is failing as it 
is not in JOINED state. We just need to unregister it. So, suggesting to run 
`abortbootstrap` instead of `assassinate`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to