vanzin commented on a change in pull request #25007: 
[SPARK-28209][CORE][SHUFFLE] Proposed new shuffle writer API 
URL: https://github.com/apache/spark/pull/25007#discussion_r308855608
 
 

 ##########
 File path: 
core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
 ##########
 @@ -179,43 +193,79 @@ public void write(Iterator<Product2<K, V>> records) 
throws IOException {
    *
    * @return array of lengths, in bytes, of each partition of the file (used 
by map output tracker).
    */
-  private long[] writePartitionedFile(File outputFile) throws IOException {
+  private long[] writePartitionedData(ShuffleMapOutputWriter mapOutputWriter) 
throws IOException {
     // Track location of the partition starts in the output file
     final long[] lengths = new long[numPartitions];
     if (partitionWriters == null) {
       // We were passed an empty iterator
       return lengths;
     }
-
-    final FileOutputStream out = new FileOutputStream(outputFile, true);
     final long writeStartTime = System.nanoTime();
-    boolean threwException = true;
     try {
       for (int i = 0; i < numPartitions; i++) {
         final File file = partitionWriterSegments[i].file();
+        ShufflePartitionWriter writer = mapOutputWriter.getPartitionWriter(i);
         if (file.exists()) {
-          final FileInputStream in = new FileInputStream(file);
-          boolean copyThrewException = true;
-          try {
-            lengths[i] = Utils.copyStream(in, out, false, transferToEnabled);
-            copyThrewException = false;
-          } finally {
-            Closeables.close(in, copyThrewException);
+          if (transferToEnabled) {
+            // Using WritableByteChannelWrapper to make resource closing 
consistent between
+            // this implementation and UnsafeShuffleWriter.
+            Optional<WritableByteChannelWrapper> maybeOutputChannel = 
writer.openChannelWrapper();
+            if (maybeOutputChannel.isPresent()) {
+              writePartitionedDataWithChannel(file, maybeOutputChannel.get());
+            } else {
+              writePartitionedDataWithStream(file, writer);
+            }
+          } else {
+            writePartitionedDataWithStream(file, writer);
           }
           if (!file.delete()) {
             logger.error("Unable to delete file for partition {}", i);
           }
         }
+        lengths[i] = writer.getNumBytesWritten();
       }
-      threwException = false;
     } finally {
-      Closeables.close(out, threwException);
       writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
     }
     partitionWriters = null;
     return lengths;
   }
 
+  private void writePartitionedDataWithChannel(
+      File file, WritableByteChannelWrapper outputChannel) throws IOException {
 
 Review comment:
   nit: one arg per line

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to