mridulm commented on a change in pull request #32401:
URL: https://github.com/apache/spark/pull/32401#discussion_r669342242



##########
File path: 
core/src/main/java/org/apache/spark/shuffle/checksum/ShuffleChecksumHelper.java
##########
@@ -0,0 +1,66 @@
+package org.apache.spark.shuffle.checksum;
+
+import java.util.Locale;
+import java.util.zip.Adler32;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkException;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.storage.ShuffleChecksumBlockId;
+
+public class ShuffleChecksumHelper {
+
+  public static boolean isShuffleChecksumEnabled(SparkConf conf) {
+    return (boolean) conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ENABLED());
+  }
+
+  public static Checksum[] createPartitionChecksumsIfEnabled(int 
numPartitions, SparkConf conf)
+    throws SparkException {
+    Checksum[] partitionChecksums;
+
+    if (!isShuffleChecksumEnabled(conf)) {
+      partitionChecksums = new Checksum[0];
+      return partitionChecksums;
+    }
+
+    String checksumAlgo = 
shuffleChecksumAlgorithm(conf).toLowerCase(Locale.ROOT);
+    switch (checksumAlgo) {
+      case "adler32":
+        partitionChecksums = new Adler32[numPartitions];
+        for (int i = 0; i < numPartitions; i ++) {
+          partitionChecksums[i] = new Adler32();
+        }
+        return partitionChecksums;
+
+      case "crc32":
+        partitionChecksums = new CRC32[numPartitions];
+        for (int i = 0; i < numPartitions; i ++) {
+          partitionChecksums[i] = new CRC32();
+        }
+        return partitionChecksums;
+
+      default:
+        throw new SparkException("Unsupported shuffle checksum algorithm: " + 
checksumAlgo);
+    }
+  }
+
+  public static long[] getChecksumValues(Checksum[] partitionChecksums) {

Review comment:
       I was looking to avoid the allocs/looping ... nit micro optimization 
which is not worth it.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to