This is an automated email from the ASF dual-hosted git repository.
nanda pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 34792eda72 HDDS-12010. Block ozone repair if service is running (#7758)
34792eda72 is described below
commit 34792eda720842b3c00ea88ed23f8098b26931e8
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Thu Jan 30 07:41:46 2025 +0100
HDDS-12010. Block ozone repair if service is running (#7758)
---
.../org/apache/hadoop/ozone/repair/RepairTool.java | 64 ++++++++++++++++------
.../hadoop/ozone/repair/TransactionInfoRepair.java | 28 +++++-----
.../hadoop/ozone/repair/om/FSORepairTool.java | 10 +++-
.../ozone/repair/om/SnapshotChainRepair.java | 10 +++-
.../repair/scm/cert/RecoverSCMCertificate.java | 10 +++-
5 files changed, 82 insertions(+), 40 deletions(-)
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RepairTool.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RepairTool.java
index 1ae033e2e7..3fa1033fb2 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RepairTool.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RepairTool.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.ozone.repair;
+import jakarta.annotation.Nullable;
import org.apache.hadoop.hdds.cli.AbstractSubcommand;
import picocli.CommandLine;
@@ -45,35 +46,57 @@ public abstract class RepairTool extends AbstractSubcommand
implements Callable<
/** Hook method for subclasses for performing actual repair task. */
protected abstract void execute() throws Exception;
+ /** Which Ozone component should be verified to be offline. */
+ @Nullable
+ protected Component serviceToBeOffline() {
+ return null;
+ }
+
@Override
public final Void call() throws Exception {
if (!dryRun) {
confirmUser();
}
- execute();
+ if (isServiceStateOK()) {
+ execute();
+ }
return null;
}
- protected boolean checkIfServiceIsRunning(String serviceName) {
- String runningEnvVar = String.format("OZONE_%s_RUNNING", serviceName);
- String pidEnvVar = String.format("OZONE_%s_PID", serviceName);
- String isServiceRunning = System.getenv(runningEnvVar);
- String servicePid = System.getenv(pidEnvVar);
- if ("true".equals(isServiceRunning)) {
- if (!force) {
- error("Error: %s is currently running on this host with PID %s. " +
- "Stop the service before running the repair tool.", serviceName,
servicePid);
- return true;
- } else {
- info("Warning: --force flag used. Proceeding despite %s being detected
as running with PID %s.",
- serviceName, servicePid);
- }
- } else {
- info("No running %s service detected. Proceeding with repair.",
serviceName);
+ private boolean isServiceStateOK() {
+ final Component service = serviceToBeOffline();
+
+ if (service == null) {
+ return true; // online tool
+ }
+
+ if (!isServiceRunning(service)) {
+ info("No running %s service detected. Proceeding with repair.", service);
+ return true;
+ }
+
+ String servicePid = getServicePid(service);
+
+ if (force) {
+ info("Warning: --force flag used. Proceeding despite %s being detected
as running with PID %s.",
+ service, servicePid);
+ return true;
}
+
+ error("Error: %s is currently running on this host with PID %s. " +
+ "Stop the service before running the repair tool.", service,
servicePid);
+
return false;
}
+ private static String getServicePid(Component service) {
+ return System.getenv(String.format("OZONE_%s_PID", service));
+ }
+
+ private static boolean isServiceRunning(Component service) {
+ return "true".equals(System.getenv(String.format("OZONE_%s_RUNNING",
service)));
+ }
+
protected boolean isDryRun() {
return dryRun;
}
@@ -117,4 +140,11 @@ private String getConsoleReadLineWithFormat(String
currentUser) {
.nextLine()
.trim();
}
+
+ /** Ozone component for offline tools. */
+ protected enum Component {
+ DATANODE,
+ OM,
+ SCM,
+ }
}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/TransactionInfoRepair.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/TransactionInfoRepair.java
index 4fca8e40a0..8b44c30877 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/TransactionInfoRepair.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/TransactionInfoRepair.java
@@ -21,6 +21,7 @@
*/
package org.apache.hadoop.ozone.repair;
+import jakarta.annotation.Nonnull;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition;
import org.apache.hadoop.hdds.utils.IOUtils;
@@ -69,16 +70,12 @@ public class TransactionInfoRepair extends RepairTool {
@Override
public void execute() throws Exception {
- final Component component = getComponent();
- if (checkIfServiceIsRunning(component.name())) {
- return;
- }
List<ColumnFamilyHandle> cfHandleList = new ArrayList<>();
List<ColumnFamilyDescriptor> cfDescList =
RocksDBUtils.getColumnFamilyDescriptors(
dbPath);
+ String columnFamilyName = getColumnFamily().getName();
try (ManagedRocksDB db = ManagedRocksDB.open(dbPath, cfDescList,
cfHandleList)) {
- String columnFamilyName = component.columnFamilyDefinition.getName();
ColumnFamilyHandle transactionInfoCfh =
RocksDBUtils.getColumnFamilyHandle(columnFamilyName, cfHandleList);
if (transactionInfoCfh == null) {
throw new IllegalArgumentException(columnFamilyName +
@@ -111,7 +108,9 @@ public void execute() throws Exception {
}
}
- private Component getComponent() {
+ @Override
+ @Nonnull
+ protected Component serviceToBeOffline() {
final String parent = spec().parent().name();
switch (parent) {
case "om":
@@ -123,14 +122,15 @@ private Component getComponent() {
}
}
- private enum Component {
- OM(OMDBDefinition.TRANSACTION_INFO_TABLE),
- SCM(SCMDBDefinition.TRANSACTIONINFO);
-
- private final DBColumnFamilyDefinition<String, TransactionInfo>
columnFamilyDefinition;
-
- Component(DBColumnFamilyDefinition<String, TransactionInfo>
columnFamilyDefinition) {
- this.columnFamilyDefinition = columnFamilyDefinition;
+ private DBColumnFamilyDefinition<String, TransactionInfo> getColumnFamily() {
+ Component component = serviceToBeOffline();
+ switch (component) {
+ case OM:
+ return OMDBDefinition.TRANSACTION_INFO_TABLE;
+ case SCM:
+ return SCMDBDefinition.TRANSACTIONINFO;
+ default:
+ throw new IllegalStateException("This tool does not support component: "
+ component);
}
}
}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java
index eb5a5dd9a2..7e22536a1b 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.ozone.repair.om;
+import jakarta.annotation.Nonnull;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -97,11 +98,14 @@ public class FSORepairTool extends RepairTool {
description = "Verbose output. Show all intermediate steps.")
private boolean verbose;
+ @Nonnull
+ @Override
+ protected Component serviceToBeOffline() {
+ return Component.OM;
+ }
+
@Override
public void execute() throws Exception {
- if (checkIfServiceIsRunning("OM")) {
- return;
- }
try {
Impl repairTool = new Impl();
repairTool.run();
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotChainRepair.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotChainRepair.java
index bafd2f8937..06cb8b1e11 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotChainRepair.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotChainRepair.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.repair.om;
+import jakarta.annotation.Nonnull;
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.hdds.utils.db.StringCodec;
import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB;
@@ -76,11 +77,14 @@ public class SnapshotChainRepair extends RepairTool {
description = "Path previous snapshotId to set for the given snapshot")
private UUID pathPreviousSnapshotId;
+ @Nonnull
+ @Override
+ protected Component serviceToBeOffline() {
+ return Component.OM;
+ }
+
@Override
public void execute() throws Exception {
- if (checkIfServiceIsRunning("OM")) {
- return;
- }
List<ColumnFamilyHandle> cfHandleList = new ArrayList<>();
List<ColumnFamilyDescriptor> cfDescList =
RocksDBUtils.getColumnFamilyDescriptors(dbPath);
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/scm/cert/RecoverSCMCertificate.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/scm/cert/RecoverSCMCertificate.java
index 2fac9d5329..1461e4aa73 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/scm/cert/RecoverSCMCertificate.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/scm/cert/RecoverSCMCertificate.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.ozone.repair.scm.cert;
+import jakarta.annotation.Nonnull;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.security.SecurityConfig;
import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType;
@@ -71,11 +72,14 @@ public class RecoverSCMCertificate extends RepairTool {
description = "SCM DB Path")
private String dbPath;
+ @Nonnull
+ @Override
+ protected Component serviceToBeOffline() {
+ return Component.SCM;
+ }
+
@Override
public void execute() throws Exception {
- if (checkIfServiceIsRunning("SCM")) {
- return;
- }
dbPath = removeTrailingSlashIfNeeded(dbPath);
String tableName = VALID_SCM_CERTS.getName();
DBDefinition dbDefinition =
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]