otterc commented on a change in pull request #32401:
URL: https://github.com/apache/spark/pull/32401#discussion_r668069285
##########
File path:
core/src/main/java/org/apache/spark/shuffle/checksum/ShuffleChecksumHelper.java
##########
@@ -0,0 +1,81 @@
+package org.apache.spark.shuffle.checksum;
+
+import java.util.zip.Adler32;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkException;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.storage.ShuffleChecksumBlockId;
+
+public class ShuffleChecksumHelper {
Review comment:
Nit: documentation is missing
##########
File path:
core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
##########
@@ -59,6 +59,10 @@
* available to downstream reduce tasks. If this method throws any
exception, this module's
* {@link #abort(Throwable)} method will be invoked before propagating the
exception.
* <p>
+ * Shuffle extension who cares about the cause of shuffle data corruption
should store
Review comment:
Nit: `Shuffle extensions which care about the cause of shuffle data
corruption...`
##########
File path:
core/src/main/java/org/apache/spark/shuffle/checksum/ShuffleChecksumHelper.java
##########
@@ -0,0 +1,81 @@
+package org.apache.spark.shuffle.checksum;
+
+import java.util.zip.Adler32;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkException;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.storage.ShuffleChecksumBlockId;
+
+public class ShuffleChecksumHelper {
+
+ /** Used when the checksum is disabled for shuffle. */
+ private static final Checksum[] EMPTY_CHECKSUM = new Checksum[0];
+ public static final long[] EMPTY_CHECKSUM_VALUE = new long[0];
+
+ public static boolean isShuffleChecksumEnabled(SparkConf conf) {
+ return (boolean) conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ENABLED());
+ }
+
+ public static Checksum[] createPartitionChecksumsIfEnabled(int
numPartitions, SparkConf conf)
+ throws SparkException {
+ Checksum[] partitionChecksums;
Review comment:
Nit: Doesn't look like this is needed here?
##########
File path: core/src/main/scala/org/apache/spark/storage/BlockId.scala
##########
@@ -92,6 +92,11 @@ case class ShuffleIndexBlockId(shuffleId: Int, mapId: Long,
reduceId: Int) exten
override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" +
reduceId + ".index"
}
+@DeveloperApi
Review comment:
Nit: `@Since` is missing
##########
File path:
core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
##########
@@ -360,13 +389,41 @@ private[spark] class IndexShuffleBlockResolver(
if (dataTmp != null && dataTmp.exists() &&
!dataTmp.renameTo(dataFile)) {
throw new IOException("fail to rename file " + dataTmp + " to " +
dataFile)
}
+
+ // write the checksum file
+ checksumTmpOpt.zip(checksumFileOpt).foreach { case (checksumTmp,
checksumFile) =>
+ val out = new DataOutputStream(
+ new BufferedOutputStream(
+ new FileOutputStream(checksumTmp)
+ )
+ )
+ Utils.tryWithSafeFinally {
+ checksums.foreach(out.writeLong)
+ } {
+ out.close()
+ }
+
+ if (checksumFile.exists()) {
+ checksumFile.delete()
+ }
+ if (!checksumTmp.renameTo(checksumFile)) {
+ // It's not worthwhile to fail here after index file and data
file are already
+ // successfully stored due to checksum is only used for the
corner error case.
+ logWarning("fail to rename file " + checksumTmp + " to " +
checksumFile)
+ }
+ }
}
}
} finally {
logDebug(s"Shuffle index for mapId $mapId: ${lengths.mkString("[", ",",
"]")}")
if (indexTmp.exists() && !indexTmp.delete()) {
logError(s"Failed to delete temporary index file at
${indexTmp.getAbsolutePath}")
}
+ checksumTmpOpt.foreach { checksumTmp =>
+ if (checksumTmp.exists() && !checksumTmp.delete()) {
Review comment:
Here `checksumTmp.delete()` can also throw IOException. Should that be
propagated to the caller since writing checksum is besteffort?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]