This is an automated email from the ASF dual-hosted git repository. elek pushed a commit to branch HDDS-2204 in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git
commit a76bdf71fd6ceac7edc9f688722c59c4771bf188 Author: Tsz Wo Nicholas Sze <szets...@apache.org> AuthorDate: Fri Oct 4 17:58:37 2019 +0800 HDDS-2204. Avoid buffer coping in checksum verification. --- .../hdds/scm/storage/ContainerProtocolCalls.java | 2 +- .../org/apache/hadoop/ozone/common/Checksum.java | 218 ++++++++++----------- .../apache/hadoop/ozone/common/ChecksumData.java | 4 +- .../apache/hadoop/ozone/common/TestChecksum.java | 6 +- 4 files changed, 108 insertions(+), 122 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java index d0ba60d..e549eec 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java @@ -337,7 +337,7 @@ public final class ContainerProtocolCalls { KeyValue.newBuilder().setKey("OverWriteRequested").setValue("true") .build(); Checksum checksum = new Checksum(); - ChecksumData checksumData = checksum.computeChecksum(data, 0, data.length); + final ChecksumData checksumData = checksum.computeChecksum(data); ChunkInfo chunk = ChunkInfo.newBuilder() .setChunkName(blockID.getLocalID() + "_chunk") diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java index 0e70515..7db1d2b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java @@ -18,22 +18,21 @@ package org.apache.hadoop.ozone.common; import com.google.common.annotations.VisibleForTesting; -import com.google.common.primitives.Longs; +import java.io.DataOutputStream; +import java.io.IOException; import java.nio.ByteBuffer; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; +import java.util.function.Function; +import java.util.function.Supplier; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ChecksumType; -import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.util.PureJavaCrc32; -import org.apache.hadoop.util.PureJavaCrc32C; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,16 +43,71 @@ import org.slf4j.LoggerFactory; * This class is not thread safe. */ public class Checksum { - public static final Logger LOG = LoggerFactory.getLogger(Checksum.class); + private static Function<ByteBuffer, ByteString> newMessageDigestFunction( + String algorithm) { + final MessageDigest md; + try { + md = MessageDigest.getInstance(algorithm); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException( + "Failed to get MessageDigest for " + algorithm, e); + } + return data -> { + md.reset(); + md.update(data); + return ByteString.copyFrom(md.digest()); + }; + } + + private static ByteString int2ByteString(int n) { + final ByteString.Output out = ByteString.newOutput(); + try(DataOutputStream dataOut = new DataOutputStream(out)) { + dataOut.writeInt(n); + } catch (IOException e) { + throw new IllegalStateException( + "Failed to write integer n = " + n + " to a ByteString", e); + } + return out.toByteString(); + } + + private static Function<ByteBuffer, ByteString> newChecksumByteBufferFunction( + Supplier<ChecksumByteBuffer> constructor) { + final ChecksumByteBuffer algorithm = constructor.get(); + return data -> { + algorithm.reset(); + algorithm.update(data); + return int2ByteString((int)algorithm.getValue()); + }; + } + + /** The algorithms for {@link ChecksumType}. */ + enum Algorithm { + NONE(() -> data -> ByteString.EMPTY), + CRC32(() -> newChecksumByteBufferFunction(PureJavaCrc32ByteBuffer::new)), + CRC32C(() -> newChecksumByteBufferFunction(PureJavaCrc32CByteBuffer::new)), + SHA256(() -> newMessageDigestFunction("SHA-256")), + MD5(() -> newMessageDigestFunction("MD5")); + + private final Supplier<Function<ByteBuffer, ByteString>> constructor; + + static Algorithm valueOf(ChecksumType type) { + return valueOf(type.name()); + } + + Algorithm(Supplier<Function<ByteBuffer, ByteString>> constructor) { + this.constructor = constructor; + } + + Function<ByteBuffer, ByteString> newChecksumFunction() { + return constructor.get(); + } + } + private final ChecksumType checksumType; private final int bytesPerChecksum; - private PureJavaCrc32 crc32Checksum; - private PureJavaCrc32C crc32cChecksum; - private MessageDigest sha; - /** * Constructs a Checksum object. * @param type type of Checksum @@ -78,13 +132,12 @@ public class Checksum { /** * Computes checksum for give data. - * @param byteBuffer input data in the form of ByteString. + * @param data input data. * @return ChecksumData computed for input data. */ - public ChecksumData computeChecksum(ByteBuffer byteBuffer) + public ChecksumData computeChecksum(byte[] data, int off, int len) throws OzoneChecksumException { - return computeChecksum(byteBuffer.array(), byteBuffer.position(), - byteBuffer.limit()); + return computeChecksum(ByteBuffer.wrap(data, off, len)); } /** @@ -94,46 +147,38 @@ public class Checksum { */ public ChecksumData computeChecksum(byte[] data) throws OzoneChecksumException { - return computeChecksum(data, 0, data.length); + return computeChecksum(ByteBuffer.wrap(data)); } /** * Computes checksum for give data. - * @param data input data in the form of byte array. + * @param data input data. * @return ChecksumData computed for input data. + * @throws OzoneChecksumException thrown when ChecksumType is not recognized */ - public ChecksumData computeChecksum(byte[] data, int offset, int len) + public ChecksumData computeChecksum(ByteBuffer data) throws OzoneChecksumException { - ChecksumData checksumData = new ChecksumData(this.checksumType, this - .bytesPerChecksum); + if (!data.isReadOnly()) { + data = data.asReadOnlyBuffer(); + } + + final ChecksumData checksumData = new ChecksumData( + checksumType, bytesPerChecksum); if (checksumType == ChecksumType.NONE) { // Since type is set to NONE, we do not need to compute the checksums return checksumData; } - switch (checksumType) { - case CRC32: - crc32Checksum = new PureJavaCrc32(); - break; - case CRC32C: - crc32cChecksum = new PureJavaCrc32C(); - break; - case SHA256: - try { - sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH); - } catch (NoSuchAlgorithmException e) { - throw new OzoneChecksumException(OzoneConsts.FILE_HASH, e); - } - break; - case MD5: - break; - default: + final Function<ByteBuffer, ByteString> function; + try { + function = Algorithm.valueOf(checksumType).newChecksumFunction(); + } catch (Exception e) { throw new OzoneChecksumException(checksumType); } // Compute number of checksums needs for given data length based on bytes // per checksum. - int dataSize = len - offset; + final int dataSize = data.remaining(); int numChecksums = (dataSize + bytesPerChecksum - 1) / bytesPerChecksum; // Checksum is computed for each bytesPerChecksum number of bytes of data @@ -141,7 +186,7 @@ public class Checksum { // remaining data with length less than bytesPerChecksum. List<ByteString> checksumList = new ArrayList<>(numChecksums); for (int index = 0; index < numChecksums; index++) { - checksumList.add(computeChecksumAtIndex(data, index, offset, len)); + checksumList.add(computeChecksum(data, function, bytesPerChecksum)); } checksumData.setChecksums(checksumList); @@ -149,78 +194,24 @@ public class Checksum { } /** - * Computes checksum based on checksumType for a data block at given index - * and a max length of bytesPerChecksum. + * Compute checksum using the algorithm for the data upto the max length. * @param data input data - * @param index index to compute the offset from where data must be read - * @param start start pos of the array where the computation has to start - * @length length of array till which checksum needs to be computed + * @param function the checksum function + * @param maxLength the max length of data * @return computed checksum ByteString - * @throws OzoneChecksumException thrown when ChecksumType is not recognized */ - private ByteString computeChecksumAtIndex(byte[] data, int index, int start, - int length) - throws OzoneChecksumException { - int offset = start + index * bytesPerChecksum; - int dataLength = length - start; - int len = bytesPerChecksum; - if ((offset + len) > dataLength) { - len = dataLength - offset; - } - byte[] checksumBytes = null; - switch (checksumType) { - case CRC32: - checksumBytes = computeCRC32Checksum(data, offset, len); - break; - case CRC32C: - checksumBytes = computeCRC32CChecksum(data, offset, len); - break; - case SHA256: - checksumBytes = computeSHA256Checksum(data, offset, len); - break; - case MD5: - checksumBytes = computeMD5Checksum(data, offset, len); - break; - default: - throw new OzoneChecksumException(checksumType); + private static ByteString computeChecksum(ByteBuffer data, + Function<ByteBuffer, ByteString> function, int maxLength) { + final int limit = data.limit(); + try { + final int maxIndex = data.position() + maxLength; + if (limit > maxIndex) { + data.limit(maxIndex); + } + return function.apply(data); + } finally { + data.limit(limit); } - - return ByteString.copyFrom(checksumBytes); - } - - /** - * Computes CRC32 checksum. - */ - private byte[] computeCRC32Checksum(byte[] data, int offset, int len) { - crc32Checksum.reset(); - crc32Checksum.update(data, offset, len); - return Longs.toByteArray(crc32Checksum.getValue()); - } - - /** - * Computes CRC32C checksum. - */ - private byte[] computeCRC32CChecksum(byte[] data, int offset, int len) { - crc32cChecksum.reset(); - crc32cChecksum.update(data, offset, len); - return Longs.toByteArray(crc32cChecksum.getValue()); - } - - /** - * Computes SHA-256 checksum. - */ - private byte[] computeSHA256Checksum(byte[] data, int offset, int len) { - sha.reset(); - sha.update(data, offset, len); - return sha.digest(); - } - - /** - * Computes MD5 checksum. - */ - private byte[] computeMD5Checksum(byte[] data, int offset, int len) { - MD5Hash md5out = MD5Hash.digest(data, offset, len); - return md5out.getDigest(); } /** @@ -235,7 +226,8 @@ public class Checksum { */ public static boolean verifyChecksum(ByteString byteString, ChecksumData checksumData, int startIndex) throws OzoneChecksumException { - return verifyChecksum(byteString.toByteArray(), checksumData, startIndex); + final ByteBuffer buffer = byteString.asReadOnlyByteBuffer(); + return verifyChecksum(buffer, checksumData, startIndex); } /** @@ -247,7 +239,7 @@ public class Checksum { */ public static boolean verifyChecksum(byte[] data, ChecksumData checksumData) throws OzoneChecksumException { - return verifyChecksum(data, checksumData, 0); + return verifyChecksum(ByteBuffer.wrap(data), checksumData, 0); } /** @@ -259,7 +251,8 @@ public class Checksum { * data's computed checksum. * @throws OzoneChecksumException is thrown if checksums do not match */ - public static boolean verifyChecksum(byte[] data, ChecksumData checksumData, + private static boolean verifyChecksum(ByteBuffer data, + ChecksumData checksumData, int startIndex) throws OzoneChecksumException { ChecksumType checksumType = checksumData.getChecksumType(); if (checksumType == ChecksumType.NONE) { @@ -269,11 +262,8 @@ public class Checksum { int bytesPerChecksum = checksumData.getBytesPerChecksum(); Checksum checksum = new Checksum(checksumType, bytesPerChecksum); - ChecksumData computedChecksumData = - checksum.computeChecksum(data, 0, data.length); - - return checksumData.verifyChecksumDataMatches(computedChecksumData, - startIndex); + final ChecksumData computed = checksum.computeChecksum(data); + return checksumData.verifyChecksumDataMatches(computed, startIndex); } /** diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumData.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumData.java index 4a927fb..3658b40 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumData.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumData.java @@ -36,8 +36,8 @@ public class ChecksumData { private ChecksumType type; // Checksum will be computed for every bytesPerChecksum number of bytes and // stored sequentially in checksumList - private int bytesPerChecksum; - private List<ByteString> checksums; + private final int bytesPerChecksum; + private final List<ByteString> checksums; public ChecksumData(ChecksumType checksumType, int bytesPerChecksum) { this(checksumType, bytesPerChecksum, Lists.newArrayList()); diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java index 819c29f..a924321 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java @@ -82,16 +82,12 @@ public class TestChecksum { * not match. */ @Test - public void testChecksumMismatchForDifferentChecksumTypes() throws Exception { - byte[] data = RandomStringUtils.randomAlphabetic(55).getBytes(); - + public void testChecksumMismatchForDifferentChecksumTypes() { // Checksum1 of type SHA-256 Checksum checksum1 = getChecksum(null); - ChecksumData checksumData1 = checksum1.computeChecksum(data); // Checksum2 of type CRC32 Checksum checksum2 = getChecksum(ContainerProtos.ChecksumType.CRC32); - ChecksumData checksumData2 = checksum2.computeChecksum(data); // The two checksums should not match as they have different types Assert.assertNotEquals( --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-commits-h...@hadoop.apache.org