This is an automated email from the ASF dual-hosted git repository.
erose pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 786da39cf5 HDDS-12057. Implement command ozone debug replicas verify
checksums (#7748)
786da39cf5 is described below
commit 786da39cf5a5d9c0f766ac42390e06b62aac0d38
Author: Rishabh Patel <[email protected]>
AuthorDate: Thu Mar 13 14:43:52 2025 -0700
HDDS-12057. Implement command ozone debug replicas verify checksums (#7748)
Co-authored-by: Ethan Rose <[email protected]>
---
.../debug/ozone-debug-corrupt-block.robot | 4 +-
.../debug/ozone-debug-dead-datanode.robot | 6 +-
.../debug/ozone-debug-stale-datanode.robot | 6 +-
.../smoketest/debug/ozone-debug-tests-ec3-2.robot | 14 +-
.../smoketest/debug/ozone-debug-tests-ec6-3.robot | 20 +--
.../main/smoketest/debug/ozone-debug-tests.robot | 2 +-
.../src/main/smoketest/debug/ozone-debug.robot | 4 +-
.../{ReadReplicas.java => replicas/Checksums.java} | 163 ++++++++++-----------
.../debug/{ => replicas}/FindMissingPadding.java | 125 +++++-----------
.../{ReplicasDebug.java => ReplicaVerifier.java} | 20 +--
.../hadoop/ozone/debug/replicas/ReplicasDebug.java | 3 +-
.../ozone/debug/replicas/ReplicasVerify.java | 142 ++++++++++++++++++
12 files changed, 292 insertions(+), 217 deletions(-)
diff --git
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot
index e0fcd50ac1..20689b7c0f 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot
@@ -27,8 +27,8 @@ ${TESTFILE} testfile
${CORRUPT_DATANODE} ozone_datanode_1.ozone_default
*** Test Cases ***
-Test ozone debug read-replicas with corrupt block replica
- ${directory} = Execute read-replicas CLI tool
+Test ozone debug checksums with corrupt block replica
+ ${directory} = Execute replicas verify checksums CLI
tool
Set Test Variable ${DIR} ${directory}
${count_files} = Count Files In Directory
${directory}
diff --git
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot
index e8385bb593..42ae5dec7e 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot
@@ -14,7 +14,7 @@
# limitations under the License.
*** Settings ***
-Documentation Test read-replicas in case of one datanode is dead
+Documentation Test checksums in case of one datanode is dead
Library OperatingSystem
Resource ../lib/os.robot
Resource ozone-debug.robot
@@ -26,8 +26,8 @@ ${BUCKET} cli-debug-bucket
${TESTFILE} testfile
*** Test Cases ***
-Test ozone debug read-replicas with one datanode DEAD
- ${directory} = Execute read-replicas CLI tool
+Test ozone debug checksums with one datanode DEAD
+ ${directory} = Execute replicas verify checksums CLI tool
Set Test Variable ${DIR} ${directory}
${count_files} = Count Files In Directory ${directory}
diff --git
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot
index c7cc7aaf3a..36cef5e665 100644
---
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot
+++
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot
@@ -14,7 +14,7 @@
# limitations under the License.
*** Settings ***
-Documentation Test read-replicas in case of one datanode is stale
+Documentation Test checksums in case of one datanode is stale
Library OperatingSystem
Resource ../lib/os.robot
Resource ozone-debug.robot
@@ -27,8 +27,8 @@ ${TESTFILE} testfile
${STALE_DATANODE} ozone_datanode_1.ozone_default
*** Test Cases ***
-Test ozone debug read-replicas with one datanode STALE
- ${directory} = Execute read-replicas CLI tool
+Test ozone debug checksums with one datanode STALE
+ ${directory} = Execute replicas verify checksums CLI tool
Set Test Variable ${DIR} ${directory}
${count_files} = Count Files In Directory ${directory}
diff --git
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot
index 5b3638040a..57227458cc 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot
@@ -44,13 +44,13 @@ Create EC key
*** Test Cases ***
0 data block
Create EC key 1000 0
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 1
1 data block
Create EC key 1048576 1
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 6
${sum_size} = Evaluate 1048576 * 3
@@ -58,7 +58,7 @@ Create EC key
2 data blocks
Create EC key 1048576 2
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${sum_size} = Evaluate 1048576 * 4
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 6
@@ -66,7 +66,7 @@ Create EC key
3 data blocks
Create EC key 1048576 3
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${sum_size} = Evaluate 1048576 * 5
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 6
@@ -74,7 +74,7 @@ Create EC key
3 data blocks and partial stripe
Create EC key 1000000 4
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 5
${sum_size_last_stripe} = Evaluate ((1000000 * 4) % 1048576)
* 3
@@ -84,7 +84,7 @@ Create EC key
4 data blocks and partial stripe
Create EC key 1000000 5
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 5
${sum_size_last_stripe} = Evaluate 1048576 * 3 + ((1000000 *
5) % 1048576)
@@ -94,7 +94,7 @@ Create EC key
6 data blocks
Create EC key 1048576 6
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 5
Should Be Equal As Integers ${count_files} 11
diff --git
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot
index 692f2791e2..52d48c25f7 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot
@@ -39,13 +39,13 @@ Create EC key
*** Test Cases ***
0 data block
Create EC key 1048576 0
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 1
1 data block
Create EC key 1048576 1
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 10
${sum_size} = Evaluate 1048576 * 4
@@ -53,7 +53,7 @@ Create EC key
2 data blocks
Create EC key 1048576 2
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${sum_size} = Evaluate 1048576 * 5
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 10
@@ -61,7 +61,7 @@ Create EC key
3 data blocks
Create EC key 1048576 3
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${sum_size} = Evaluate 1048576 * 6
${count_files} = Count Files In Directory
${directory}
Should Be Equal As Integers ${count_files} 10
@@ -69,7 +69,7 @@ Create EC key
4 data blocks
Create EC key 1048576 4
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 7
Should Be Equal As Integers ${count_files} 10
@@ -77,7 +77,7 @@ Create EC key
5 data blocks
Create EC key 1048576 5
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 8
Should Be Equal As Integers ${count_files} 10
@@ -85,7 +85,7 @@ Create EC key
6 data blocks
Create EC key 1048576 6
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 9
Should Be Equal As Integers ${count_files} 10
@@ -93,7 +93,7 @@ Create EC key
6 data blocks and partial stripe
Create EC key 1000000 7
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 9
${sum_size_last_stripe} = Evaluate ((1000000 * 7) % 1048576)
* 4
@@ -103,10 +103,10 @@ Create EC key
7 data blocks and partial stripe
Create EC key 1000000 8
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
${count_files} = Count Files In Directory
${directory}
${sum_size} = Evaluate 1048576 * 9
${sum_size_last_stripe} = Evaluate 1048576 * 4 + ((1000000 *
8) % 1048576)
Should Be Equal As Integers ${count_files} 19
Verify Healthy EC Replica ${directory} 1 ${sum_size}
- Verify Healthy EC Replica ${directory} 2
${sum_size_last_stripe}
\ No newline at end of file
+ Verify Healthy EC Replica ${directory} 2
${sum_size_last_stripe}
diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot
index 4e013e2a64..803ab19ade 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot
@@ -37,7 +37,7 @@ Write keys
*** Test Cases ***
Test ozone debug read-replicas
- ${directory} = Execute read-replicas CLI tool
+ ${directory} = Execute replicas verify checksums CLI
tool
Set Test Variable ${DIR} ${directory}
${count_files} = Count Files In Directory
${directory}
diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot
index fb3e0f4158..9bb77d00d6 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot
@@ -19,8 +19,8 @@ Library Collections
Resource ../lib/os.robot
*** Keywords ***
-Execute read-replicas CLI tool
- Execute ozone debug
-Dozone.network.topology.aware.read=true read-replicas --output-dir ${TEMP_DIR}
o3://om/${VOLUME}/${BUCKET}/${TESTFILE}
+Execute replicas verify checksums CLI tool
+ Execute ozone debug
-Dozone.network.topology.aware.read=true replicas verify --checksums
--output-dir ${TEMP_DIR} o3://om/${VOLUME}/${BUCKET}/${TESTFILE}
${directory} = Execute ls -d
${TEMP_DIR}/${VOLUME}_${BUCKET}_${TESTFILE}_*/ | tail -n 1
Directory Should Exist ${directory}
File Should Exist ${directory}/${TESTFILE}_manifest
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/ReadReplicas.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java
similarity index 78%
rename from
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/ReadReplicas.java
rename to
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java
index 0ea53f5bb1..de6aa05de7 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/ReadReplicas.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.hadoop.ozone.debug;
+package org.apache.hadoop.ozone.debug.replicas;
import static java.util.Collections.emptyMap;
@@ -33,7 +33,6 @@
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
-import org.apache.hadoop.hdds.cli.DebugSubcommand;
import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -46,27 +45,14 @@
import org.apache.hadoop.ozone.client.rpc.RpcClient;
import org.apache.hadoop.ozone.common.OzoneChecksumException;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
-import org.apache.hadoop.ozone.shell.OzoneAddress;
-import org.apache.hadoop.ozone.shell.keys.KeyHandler;
-import org.kohsuke.MetaInfServices;
-import picocli.CommandLine;
+import org.slf4j.Logger;
/**
* Class that downloads every replica for all the blocks associated with a
* given key. It also generates a manifest file with information about the
* downloaded replicas.
*/
[email protected](name = "read-replicas",
- description = "Reads every replica for all the blocks associated with a " +
- "given key.")
-@MetaInfServices(DebugSubcommand.class)
-public class ReadReplicas extends KeyHandler implements DebugSubcommand {
-
- @CommandLine.Option(names = {"--outputDir", "-o", "--output-dir"},
- description = "Destination where the directory will be created" +
- " for the downloaded replicas and the manifest file.",
- defaultValue = "/opt/hadoop")
- private String outputDir;
+public class Checksums implements ReplicaVerifier {
private static final String JSON_PROPERTY_FILE_NAME = "filename";
private static final String JSON_PROPERTY_FILE_SIZE = "datasize";
@@ -81,70 +67,18 @@ public class ReadReplicas extends KeyHandler implements
DebugSubcommand {
private static final String JSON_PROPERTY_REPLICA_UUID = "uuid";
private static final String JSON_PROPERTY_REPLICA_EXCEPTION = "exception";
- @Override
- protected void execute(OzoneClient client, OzoneAddress address)
- throws IOException {
-
- address.ensureKeyAddress();
- boolean isChecksumVerifyEnabled
- = getConf().getBoolean("ozone.client.verify.checksum", true);
- OzoneConfiguration configuration = new OzoneConfiguration(getConf());
- configuration.setBoolean("ozone.client.verify.checksum",
- !isChecksumVerifyEnabled);
-
- RpcClient newClient = new RpcClient(configuration, null);
- try {
- ClientProtocol noChecksumClient;
- ClientProtocol checksumClient;
- if (isChecksumVerifyEnabled) {
- checksumClient = client.getObjectStore().getClientProxy();
- noChecksumClient = newClient;
- } else {
- checksumClient = newClient;
- noChecksumClient = client.getObjectStore().getClientProxy();
- }
-
- String volumeName = address.getVolumeName();
- String bucketName = address.getBucketName();
- String keyName = address.getKeyName();
- // Multilevel keys will have a '/' in their names. This interferes with
- // directory and file creation process. Flatten the keys to fix this.
- String sanitizedKeyName = address.getKeyName().replace("/", "_");
-
- File dir = createDirectory(volumeName, bucketName, sanitizedKeyName);
-
- OzoneKeyDetails keyInfoDetails
- = checksumClient.getKeyDetails(volumeName, bucketName, keyName);
-
- Map<OmKeyLocationInfo, Map<DatanodeDetails, OzoneInputStream>> replicas =
- checksumClient.getKeysEveryReplicas(volumeName, bucketName, keyName);
-
- Map<OmKeyLocationInfo, Map<DatanodeDetails, OzoneInputStream>>
- replicasWithoutChecksum = noChecksumClient
- .getKeysEveryReplicas(volumeName, bucketName, keyName);
-
- ObjectNode result = JsonUtils.createObjectNode(null);
- result.put(JSON_PROPERTY_FILE_NAME,
- volumeName + "/" + bucketName + "/" + keyName);
- result.put(JSON_PROPERTY_FILE_SIZE, keyInfoDetails.getDataSize());
-
- ArrayNode blocks = JsonUtils.createArrayNode();
- downloadReplicasAndCreateManifest(sanitizedKeyName, replicas,
- replicasWithoutChecksum, dir, blocks);
- result.set(JSON_PROPERTY_FILE_BLOCKS, blocks);
-
- String prettyJson =
JsonUtils.toJsonStringWithDefaultPrettyPrinter(result);
-
- String manifestFileName = sanitizedKeyName + "_manifest";
- System.out.println("Writing manifest file : " + manifestFileName);
- File manifestFile
- = new File(dir, manifestFileName);
- Files.write(manifestFile.toPath(),
- prettyJson.getBytes(StandardCharsets.UTF_8));
- } finally {
- newClient.close();
- }
+ private String outputDir;
+ private RpcClient rpcClient = null;
+ private OzoneClient client;
+ private Logger log;
+ private OzoneConfiguration ozoneConfiguration;
+
+ public Checksums(OzoneClient client, String outputDir, Logger log,
OzoneConfiguration conf) {
+ this.client = client;
+ this.outputDir = outputDir;
+ this.log = log;
+ this.ozoneConfiguration = conf;
}
private void downloadReplicasAndCreateManifest(
@@ -161,8 +95,8 @@ private void downloadReplicasAndCreateManifest(
ArrayNode replicasJson = JsonUtils.createArrayNode();
blockIndex += 1;
- blockJson.put(JSON_PROPERTY_BLOCK_INDEX, blockIndex);
OmKeyLocationInfo locationInfo = block.getKey();
+ blockJson.put(JSON_PROPERTY_BLOCK_INDEX, blockIndex);
blockJson.put(JSON_PROPERTY_BLOCK_CONTAINERID,
locationInfo.getContainerID());
blockJson.put(JSON_PROPERTY_BLOCK_LOCALID, locationInfo.getLocalID());
@@ -184,7 +118,6 @@ private void downloadReplicasAndCreateManifest(
String fileName = keyName + "_block" + blockIndex + "_" +
datanode.getHostName();
- System.out.println("Writing : " + fileName);
Path path = new File(dir, fileName).toPath();
try (InputStream is = replica.getValue()) {
@@ -204,7 +137,7 @@ private void downloadReplicasAndCreateManifest(
blockJson.set(JSON_PROPERTY_BLOCK_REPLICAS, replicasJson);
blocks.add(blockJson);
- IOUtils.close(LOG, blockReplicasWithoutChecksum.values());
+ IOUtils.close(log, blockReplicasWithoutChecksum.values());
}
}
@@ -236,7 +169,7 @@ private File createDirectory(String volumeName, String
bucketName,
System.out.println("Creating directory : " + directoryName);
File dir = new File(outputDir, directoryName);
if (!dir.exists()) {
- if (dir.mkdir()) {
+ if (dir.mkdirs()) {
System.out.println("Successfully created!");
} else {
throw new IOException(String.format(
@@ -245,4 +178,66 @@ private File createDirectory(String volumeName, String
bucketName,
}
return dir;
}
+
+ @Override
+ public void verifyKey(OzoneKeyDetails keyDetails) {
+ String volumeName = keyDetails.getVolumeName();
+ String bucketName = keyDetails.getBucketName();
+ String keyName = keyDetails.getName();
+ System.out.println("Processing key : " + volumeName + "/" + bucketName +
"/" + keyName);
+ boolean isChecksumVerifyEnabled =
ozoneConfiguration.getBoolean("ozone.client.verify.checksum", true);
+ RpcClient newClient = null;
+ try {
+ OzoneConfiguration configuration = new
OzoneConfiguration(ozoneConfiguration);
+ configuration.setBoolean("ozone.client.verify.checksum",
!isChecksumVerifyEnabled);
+ newClient = getClient(isChecksumVerifyEnabled);
+ ClientProtocol noChecksumClient;
+ ClientProtocol checksumClient;
+ if (isChecksumVerifyEnabled) {
+ checksumClient = client.getObjectStore().getClientProxy();
+ noChecksumClient = newClient;
+ } else {
+ checksumClient = newClient;
+ noChecksumClient = client.getObjectStore().getClientProxy();
+ }
+
+ // Multilevel keys will have a '/' in their names. This interferes with
+ // directory and file creation process. Flatten the keys to fix this.
+ String sanitizedKeyName = keyName.replace("/", "_");
+
+ File dir = createDirectory(volumeName, bucketName, sanitizedKeyName);
+ OzoneKeyDetails keyInfoDetails =
checksumClient.getKeyDetails(volumeName, bucketName, keyName);
+ Map<OmKeyLocationInfo, Map<DatanodeDetails, OzoneInputStream>> replicas =
+ checksumClient.getKeysEveryReplicas(volumeName, bucketName, keyName);
+ Map<OmKeyLocationInfo, Map<DatanodeDetails, OzoneInputStream>>
replicasWithoutChecksum =
+ noChecksumClient.getKeysEveryReplicas(volumeName, bucketName,
keyName);
+
+ ObjectNode result = JsonUtils.createObjectNode(null);
+ result.put(JSON_PROPERTY_FILE_NAME, volumeName + "/" + bucketName + "/"
+ keyName);
+ result.put(JSON_PROPERTY_FILE_SIZE, keyInfoDetails.getDataSize());
+
+ ArrayNode blocks = JsonUtils.createArrayNode();
+ downloadReplicasAndCreateManifest(sanitizedKeyName, replicas,
replicasWithoutChecksum, dir, blocks);
+ result.set(JSON_PROPERTY_FILE_BLOCKS, blocks);
+
+ String prettyJson =
JsonUtils.toJsonStringWithDefaultPrettyPrinter(result);
+
+ String manifestFileName = sanitizedKeyName + "_manifest";
+ File manifestFile = new File(dir, manifestFileName);
+ Files.write(manifestFile.toPath(),
prettyJson.getBytes(StandardCharsets.UTF_8));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private RpcClient getClient(boolean isChecksumVerifyEnabled) throws
IOException {
+ if (rpcClient != null) {
+ return rpcClient;
+ }
+
+ OzoneConfiguration configuration = new
OzoneConfiguration(ozoneConfiguration);
+ configuration.setBoolean("ozone.client.verify.checksum",
!isChecksumVerifyEnabled);
+ rpcClient = new RpcClient(configuration, null);
+ return rpcClient;
+ }
}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FindMissingPadding.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/FindMissingPadding.java
similarity index 65%
rename from
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FindMissingPadding.java
rename to
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/FindMissingPadding.java
index 29acef2b0d..b5c11d41d0 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FindMissingPadding.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/FindMissingPadding.java
@@ -15,24 +15,24 @@
* limitations under the License.
*/
-package org.apache.hadoop.ozone.debug;
+package org.apache.hadoop.ozone.debug.replicas;
import static java.util.Collections.emptySet;
import static java.util.Comparator.comparing;
import java.io.IOException;
+import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
-import org.apache.hadoop.hdds.cli.DebugSubcommand;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.XceiverClientFactory;
@@ -48,41 +48,26 @@
import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls;
import org.apache.hadoop.hdds.security.SecurityConfig;
import org.apache.hadoop.hdds.utils.HAUtils;
-import org.apache.hadoop.ozone.client.ObjectStore;
-import org.apache.hadoop.ozone.client.OzoneBucket;
import org.apache.hadoop.ozone.client.OzoneClient;
-import org.apache.hadoop.ozone.client.OzoneClientException;
import org.apache.hadoop.ozone.client.OzoneKey;
import org.apache.hadoop.ozone.client.OzoneKeyDetails;
import org.apache.hadoop.ozone.client.OzoneKeyLocation;
-import org.apache.hadoop.ozone.client.OzoneVolume;
-import org.apache.hadoop.ozone.client.protocol.ClientProtocol;
import org.apache.hadoop.ozone.client.rpc.RpcClient;
-import org.apache.hadoop.ozone.shell.Handler;
-import org.apache.hadoop.ozone.shell.OzoneAddress;
-import org.apache.hadoop.ozone.shell.Shell;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.StringUtils;
-import org.kohsuke.MetaInfServices;
-import picocli.CommandLine;
+import org.slf4j.Logger;
/**
* Find EC keys affected by missing padding blocks (HDDS-10681).
*/
[email protected](name = "find-missing-padding",
- aliases = { "fmp" },
- description = "List all keys with any missing padding, optionally limited
to a volume/bucket/key URI.")
-@MetaInfServices(DebugSubcommand.class)
-public class FindMissingPadding extends Handler implements DebugSubcommand {
+public class FindMissingPadding implements ReplicaVerifier {
- @CommandLine.Mixin
+ private OzoneClient ozoneClient;
private ScmOption scmOption;
-
- @CommandLine.Parameters(arity = "0..1",
- description = Shell.OZONE_URI_DESCRIPTION)
- private String uri;
-
+ private Logger log;
+ private PrintWriter printWriter;
+ private OzoneConfiguration ozoneConfiguration;
/**
* Keys possibly affected (those with any block under threshold size),
* grouped by container ID and block (local) ID.
@@ -91,69 +76,31 @@ public class FindMissingPadding extends Handler implements
DebugSubcommand {
private final Set<OzoneKey> affectedKeys = new HashSet<>();
- @Override
- protected OzoneAddress getAddress() throws OzoneClientException {
- return new OzoneAddress(uri);
+ public FindMissingPadding(OzoneClient ozoneClient, ScmOption scmOption,
Logger log,
+ PrintWriter printWriter, OzoneConfiguration ozoneConfiguration) {
+ this.ozoneClient = ozoneClient;
+ this.scmOption = scmOption;
+ this.log = log;
+ this.printWriter = printWriter;
+ this.ozoneConfiguration = ozoneConfiguration;
}
- @Override
- protected void execute(OzoneClient ozoneClient, OzoneAddress address) throws
IOException {
- findCandidateKeys(ozoneClient, address);
- checkContainers(ozoneClient);
+ protected void execute() throws IOException {
+ checkContainers();
handleAffectedKeys();
}
- private void findCandidateKeys(OzoneClient ozoneClient, OzoneAddress
address) throws IOException {
- ObjectStore objectStore = ozoneClient.getObjectStore();
- ClientProtocol rpcClient = objectStore.getClientProxy();
- String volumeName = address.getVolumeName();
- String bucketName = address.getBucketName();
- String keyName = address.getKeyName();
- if (!keyName.isEmpty()) {
- checkKey(rpcClient, volumeName, bucketName, keyName);
- } else if (!bucketName.isEmpty()) {
- OzoneVolume volume = objectStore.getVolume(volumeName);
- OzoneBucket bucket = volume.getBucket(bucketName);
- checkBucket(bucket, rpcClient);
- } else if (!volumeName.isEmpty()) {
- OzoneVolume volume = objectStore.getVolume(volumeName);
- checkVolume(volume, rpcClient);
- } else {
- for (Iterator<? extends OzoneVolume> it = objectStore.listVolumes(null);
it.hasNext();) {
- checkVolume(it.next(), rpcClient);
- }
- }
- }
-
- private void checkVolume(OzoneVolume volume, ClientProtocol rpcClient)
throws IOException {
- for (Iterator<? extends OzoneBucket> it = volume.listBuckets(null);
it.hasNext();) {
- OzoneBucket bucket = it.next();
- checkBucket(bucket, rpcClient);
- }
- }
-
- private void checkBucket(OzoneBucket bucket, ClientProtocol rpcClient)
throws IOException {
- String volumeName = bucket.getVolumeName();
- String bucketName = bucket.getName();
- for (Iterator<? extends OzoneKey> it = bucket.listKeys(null);
it.hasNext();) {
- OzoneKey key = it.next();
- if (isEC(key)) {
- checkKey(rpcClient, volumeName, bucketName, key.getName());
- } else {
- LOG.trace("Key {}/{}/{} is not EC", volumeName, bucketName,
key.getName());
- }
- }
+ @Override
+ public void verifyKey(OzoneKeyDetails keyDetails) {
+ checkECKey(keyDetails);
}
- private void checkKey(ClientProtocol rpcClient, String volumeName, String
bucketName, String keyName)
- throws IOException {
- OzoneKeyDetails keyDetails = rpcClient.getKeyDetails(volumeName,
bucketName, keyName);
- if (isEC(keyDetails)) {
- checkECKey(keyDetails);
+ private void checkECKey(OzoneKeyDetails keyDetails) {
+ if (!isEC(keyDetails)) {
+ log.trace("Key {}/{}/{} is not EC", keyDetails.getVolumeName(),
keyDetails.getBucketName(), keyDetails.getName());
+ return;
}
- }
- private void checkECKey(OzoneKeyDetails keyDetails) {
List<OzoneKeyLocation> locations = keyDetails.getOzoneKeyLocations();
if (!locations.isEmpty()) {
ECReplicationConfig ecConfig = (ECReplicationConfig)
keyDetails.getReplicationConfig();
@@ -167,7 +114,7 @@ private void checkECKey(OzoneKeyDetails keyDetails) {
}
}
} else {
- LOG.trace("Key {}/{}/{} has no locations",
+ log.trace("Key {}/{}/{} has no locations",
keyDetails.getVolumeName(), keyDetails.getBucketName(),
keyDetails.getName());
}
}
@@ -176,14 +123,14 @@ private static boolean isEC(OzoneKey key) {
return key.getReplicationConfig().getReplicationType() ==
HddsProtos.ReplicationType.EC;
}
- private void checkContainers(OzoneClient ozoneClient) throws IOException {
+ private void checkContainers() throws IOException {
if (candidateKeys.isEmpty()) {
return;
}
- SecurityConfig securityConfig = new SecurityConfig(getConf());
+ SecurityConfig securityConfig = new SecurityConfig(ozoneConfiguration);
final boolean tokenEnabled = securityConfig.isSecurityEnabled() &&
securityConfig.isContainerTokenEnabled();
- StorageContainerLocationProtocol scmContainerClient =
HAUtils.getScmContainerClient(getConf());
+ StorageContainerLocationProtocol scmContainerClient =
HAUtils.getScmContainerClient(ozoneConfiguration);
RpcClient rpcClient = (RpcClient) ozoneClient.getProxy();
XceiverClientFactory xceiverClientManager =
rpcClient.getXceiverClientManager();
Pipeline.Builder pipelineBuilder = Pipeline.newBuilder()
@@ -198,7 +145,7 @@ private void checkContainers(OzoneClient ozoneClient)
throws IOException {
ContainerInfo container = scmClient.getContainer(containerID);
if (container.getState() != HddsProtos.LifeCycleState.CLOSED) {
- LOG.trace("Skip container {} as it is not CLOSED, rather {}",
containerID, container.getState());
+ log.trace("Skip container {} as it is not CLOSED, rather {}",
containerID, container.getState());
continue;
}
@@ -208,7 +155,7 @@ private void checkContainers(OzoneClient ozoneClient)
throws IOException {
List<ContainerReplicaInfo> containerReplicas =
scmClient.getContainerReplicas(containerID);
- LOG.debug("Container {} replicas: {}", containerID,
containerReplicas.stream()
+ log.debug("Container {} replicas: {}", containerID,
containerReplicas.stream()
.sorted(comparing(ContainerReplicaInfo::getReplicaIndex)
.thenComparing(ContainerReplicaInfo::getState)
.thenComparing(r -> r.getDatanodeDetails().getUuidString()))
@@ -219,7 +166,7 @@ private void checkContainers(OzoneClient ozoneClient)
throws IOException {
for (ContainerReplicaInfo replica : containerReplicas) {
if
(!HddsProtos.LifeCycleState.CLOSED.name().equals(replica.getState())) {
- LOG.trace("Ignore container {} replica {} at {} in {} state",
+ log.trace("Ignore container {} replica {} at {} in {} state",
replica.getContainerID(), replica.getReplicaIndex(),
replica.getDatanodeDetails(), replica.getState());
continue;
}
@@ -236,10 +183,10 @@ private void checkContainers(OzoneClient ozoneClient)
throws IOException {
missingBlocks.remove(blockData.getBlockID().getLocalID());
}
if (missingBlocks.isEmpty()) {
- LOG.debug("All {} blocks in container {} found on replica {} at
{}",
+ log.debug("All {} blocks in container {} found on replica {} at
{}",
blockToKeysMap.keySet().size(), containerID,
replica.getReplicaIndex(), replica.getDatanodeDetails());
} else {
- LOG.info("Found {} blocks missing from container {} on replica
{} at {}",
+ log.info("Found {} blocks missing from container {} on replica
{} at {}",
missingBlocks.size(), containerID,
replica.getReplicaIndex(), replica.getDatanodeDetails());
missingBlocks.forEach(b ->
affectedKeys.addAll(blockToKeysMap.getOrDefault(b, emptySet())));
}
@@ -253,11 +200,11 @@ private void checkContainers(OzoneClient ozoneClient)
throws IOException {
private void handleAffectedKeys() {
if (!affectedKeys.isEmpty()) {
- out().println(StringUtils.join("\t", Arrays.asList(
+ printWriter.println(StringUtils.join("\t", Arrays.asList(
"Key", "Size", "Replication"
)));
for (OzoneKey key : affectedKeys) {
- out().println(StringUtils.join("\t", Arrays.asList(
+ printWriter.println(StringUtils.join("\t", Arrays.asList(
key.getVolumeName() + "/" + key.getBucketName() + "/" +
key.getName(),
key.getDataSize(),
key.getReplicationConfig().getReplication()
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java
similarity index 58%
copy from
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
copy to
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java
index 19531136f4..cbb5f31c98 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java
@@ -17,22 +17,12 @@
package org.apache.hadoop.ozone.debug.replicas;
-import org.apache.hadoop.hdds.cli.DebugSubcommand;
-import org.apache.hadoop.ozone.debug.replicas.chunk.ChunkKeyHandler;
-import org.kohsuke.MetaInfServices;
-import picocli.CommandLine;
+import org.apache.hadoop.ozone.client.OzoneKeyDetails;
/**
- * Replicas debug related commands.
+ * Functional interface for implementing a key verifier.
*/
[email protected](
- name = "replicas",
- description = "Debug commands for replica-related issues, retrieving
replica information from the OM and " +
- "performing checks over the network against a running cluster.",
- subcommands = {
- ChunkKeyHandler.class
- }
-)
-@MetaInfServices(DebugSubcommand.class)
-public class ReplicasDebug implements DebugSubcommand {
+@FunctionalInterface
+public interface ReplicaVerifier {
+ void verifyKey(OzoneKeyDetails keyDetails);
}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
index 19531136f4..30d5d69f5f 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java
@@ -30,7 +30,8 @@
description = "Debug commands for replica-related issues, retrieving
replica information from the OM and " +
"performing checks over the network against a running cluster.",
subcommands = {
- ChunkKeyHandler.class
+ ChunkKeyHandler.class,
+ ReplicasVerify.class
}
)
@MetaInfServices(DebugSubcommand.class)
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
new file mode 100644
index 0000000000..a5d474219e
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.debug.replicas;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.hadoop.hdds.scm.cli.ScmOption;
+import org.apache.hadoop.ozone.client.ObjectStore;
+import org.apache.hadoop.ozone.client.OzoneBucket;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientException;
+import org.apache.hadoop.ozone.client.OzoneKey;
+import org.apache.hadoop.ozone.client.OzoneKeyDetails;
+import org.apache.hadoop.ozone.client.OzoneVolume;
+import org.apache.hadoop.ozone.shell.Handler;
+import org.apache.hadoop.ozone.shell.OzoneAddress;
+import org.apache.hadoop.ozone.shell.Shell;
+import picocli.CommandLine;
+
+/**
+ * Verify replicas command.
+ */
+
[email protected](
+ name = "verify",
+ description = "Run checks to verify data across replicas")
+public class ReplicasVerify extends Handler {
+ @CommandLine.Mixin
+ private ScmOption scmOption;
+
+ @CommandLine.Parameters(arity = "1",
+ description = Shell.OZONE_URI_DESCRIPTION)
+ private String uri;
+
+ @CommandLine.Option(names = {"-o", "--output-dir"},
+ description = "Destination directory to save the generated output.",
+ required = true)
+ private String outputDir;
+
+ @CommandLine.ArgGroup(exclusive = false, multiplicity = "1")
+ private Verification verification;
+
+ static class Verification {
+ @CommandLine.Option(names = "--checksums",
+ description = "Do client side data checksum validation of all
replicas.",
+ // value will be true only if the "--checksums" option was specified
on the CLI
+ defaultValue = "false")
+ private boolean doExecuteChecksums;
+
+ @CommandLine.Option(names = "--padding",
+ description = "Check for missing padding in erasure coded replicas.",
+ defaultValue = "false")
+ private boolean doExecutePadding;
+ }
+ private FindMissingPadding findMissingPadding;
+ private List<ReplicaVerifier> replicaVerifiers;
+
+ @Override
+ protected void execute(OzoneClient client, OzoneAddress address) throws
IOException {
+ replicaVerifiers = new ArrayList<>();
+
+ if (verification.doExecuteChecksums) {
+ replicaVerifiers.add(new Checksums(client, outputDir, LOG, getConf()));
+ }
+
+ if (verification.doExecutePadding) {
+ findMissingPadding = new FindMissingPadding(client, scmOption, LOG,
out(), getConf());
+ replicaVerifiers.add(findMissingPadding);
+ }
+
+ findCandidateKeys(client, address);
+
+ if (verification.doExecutePadding) {
+ findMissingPadding.execute();
+ }
+ }
+
+ @Override
+ protected OzoneAddress getAddress() throws OzoneClientException {
+ return new OzoneAddress(uri);
+ }
+
+ void findCandidateKeys(OzoneClient ozoneClient, OzoneAddress address) throws
IOException {
+ ObjectStore objectStore = ozoneClient.getObjectStore();
+ String volumeName = address.getVolumeName();
+ String bucketName = address.getBucketName();
+ String keyName = address.getKeyName();
+ if (!keyName.isEmpty()) {
+ OzoneKeyDetails keyDetails =
ozoneClient.getProxy().getKeyDetails(volumeName, bucketName, keyName);
+ processKey(keyDetails);
+ } else if (!bucketName.isEmpty()) {
+ OzoneVolume volume = objectStore.getVolume(volumeName);
+ OzoneBucket bucket = volume.getBucket(bucketName);
+ checkBucket(bucket);
+ } else if (!volumeName.isEmpty()) {
+ OzoneVolume volume = objectStore.getVolume(volumeName);
+ checkVolume(volume);
+ } else {
+ for (Iterator<? extends OzoneVolume> it = objectStore.listVolumes(null);
it.hasNext();) {
+ checkVolume(it.next());
+ }
+ }
+ }
+
+ void checkVolume(OzoneVolume volume) throws IOException {
+ for (Iterator<? extends OzoneBucket> it = volume.listBuckets(null);
it.hasNext();) {
+ OzoneBucket bucket = it.next();
+ checkBucket(bucket);
+ }
+ }
+
+ void checkBucket(OzoneBucket bucket) throws IOException {
+ for (Iterator<? extends OzoneKey> it = bucket.listKeys(null);
it.hasNext();) {
+ OzoneKey key = it.next();
+ // TODO: Remove this check once HDDS-12094 is fixed
+ if (!key.getName().endsWith("/")) {
+ processKey(bucket.getKey(key.getName()));
+ }
+ }
+ }
+
+ void processKey(OzoneKeyDetails keyDetails) {
+ replicaVerifiers.forEach(verifier -> verifier.verifyKey(keyDetails));
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]