aswinshakil commented on code in PR #7401:
URL: https://github.com/apache/ozone/pull/7401#discussion_r1866770483
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECValidator.java:
##########
@@ -0,0 +1,114 @@
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.scm.OzoneClientConfig;
+import org.apache.hadoop.hdds.scm.storage.ECBlockOutputStream;
+import org.apache.hadoop.ozone.common.Checksum;
+import org.apache.hadoop.ozone.common.ChecksumData;
+import org.apache.hadoop.ozone.common.ChunkBuffer;
+import org.apache.hadoop.ozone.common.OzoneChecksumException;
+import org.apache.hadoop.ozone.container.common.helpers.BlockData;
+import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
+import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class ECValidator {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ECValidator.class);
+ private final boolean isValidationEnabled;
+ private Collection<Integer> reconstructionIndexes;
+ private final int parityCount;
+ private long blockLength;
+ private final ECReplicationConfig ecReplicationConfig;
+
+ ECValidator(OzoneClientConfig config, ECReplicationConfig ecReplConfig) {
+ // We fetch the configuration value beforehand to avoid re-fetching on
every validation call
+ isValidationEnabled = config.getEcReconstructionValidation();
+ ecReplicationConfig = ecReplConfig;
+ parityCount = ecReplConfig.getParity();
+ }
+
+ public void setReconstructionIndexes(Collection<Integer>
reconstructionIndexes) {
+ this.reconstructionIndexes = reconstructionIndexes;
+ }
+
+ public void setBlockLength(long blockLength) {
+ this.blockLength = blockLength;
+ }
+
+ private void validateChecksumInStripe(ContainerProtos.ChecksumData
checksumData,
+ ByteString stripeChecksum, int
chunkIndex)
+ throws OzoneChecksumException {
+
+ // If we have say 100 bytes per checksum, in the stripe the first 100
bytes should
+ // correspond to the fist chunk checksum, next 100 should be the second
chunk checksum
+ // and so on. So the checksum should range from (numOfBytes * index of
chunk) to ((numOfBytes * index of chunk) + numOfBytes)
+ int bytesPerChecksum = checksumData.getBytesPerChecksum();
+
+ int checksumIdxStart = (bytesPerChecksum * chunkIndex);
+ ByteString expectedChecksum = stripeChecksum.substring(checksumIdxStart,
Review Comment:
Instead of `ByteString` and `substring` and we can use `ByteBuffer` for
fine grained byte level buffer manipulation.
`ECBlockChecksumComputer#computeCompositeCrc()` has similar implementation for
this.
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECValidator.java:
##########
@@ -0,0 +1,114 @@
+package org.apache.hadoop.ozone.container.ec.reconstruction;
+
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.scm.OzoneClientConfig;
+import org.apache.hadoop.hdds.scm.storage.ECBlockOutputStream;
+import org.apache.hadoop.ozone.common.Checksum;
+import org.apache.hadoop.ozone.common.ChecksumData;
+import org.apache.hadoop.ozone.common.ChunkBuffer;
+import org.apache.hadoop.ozone.common.OzoneChecksumException;
+import org.apache.hadoop.ozone.container.common.helpers.BlockData;
+import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
+import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class ECValidator {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ECValidator.class);
+ private final boolean isValidationEnabled;
+ private Collection<Integer> reconstructionIndexes;
+ private final int parityCount;
+ private long blockLength;
+ private final ECReplicationConfig ecReplicationConfig;
+
+ ECValidator(OzoneClientConfig config, ECReplicationConfig ecReplConfig) {
+ // We fetch the configuration value beforehand to avoid re-fetching on
every validation call
+ isValidationEnabled = config.getEcReconstructionValidation();
+ ecReplicationConfig = ecReplConfig;
+ parityCount = ecReplConfig.getParity();
+ }
+
+ public void setReconstructionIndexes(Collection<Integer>
reconstructionIndexes) {
+ this.reconstructionIndexes = reconstructionIndexes;
+ }
+
+ public void setBlockLength(long blockLength) {
+ this.blockLength = blockLength;
+ }
+
+ private void validateChecksumInStripe(ContainerProtos.ChecksumData
checksumData,
+ ByteString stripeChecksum, int
chunkIndex)
+ throws OzoneChecksumException {
+
+ // If we have say 100 bytes per checksum, in the stripe the first 100
bytes should
+ // correspond to the fist chunk checksum, next 100 should be the second
chunk checksum
Review Comment:
A chunk can a multiple checksum depending on the size of the chunk and
bytesPerCrc.
For example, If we have EC 3-2-1024k. We have 1 MB chunk, The calculation
would be correct if the `bytesPerCrc` is also 1MB. But`bytesPerCrc` is
configurable. But by default #6331 changes this value to 16KB. Which means we
would have (1024/16) = 16 checksums for each chunk. We need to take that into
account as well.
You can take a look at #7230 I have added changes to split the
`stripeChecksum` into parts. But the core idea is the one I mentioned above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]