YannByron commented on code in PR #1454:
URL: https://github.com/apache/incubator-paimon/pull/1454#discussion_r1247519322


##########
paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/WriteIntoPaimonTable.scala:
##########
@@ -88,29 +88,32 @@ case class WriteIntoPaimonTable(table: FileStoreTable, 
overwrite: Boolean, data:
       
withBucketCol.mapPartitions(commonBucketProcessor.processPartition)(withBucketDataEncoder)
     }
 
-    val commitMessages =
-      withAssignedBucket
+    val commitMessages = {
+      var df = withAssignedBucket
         .toDF()
-        .repartition(partitionCols ++ Seq(col(BUCKET_COL)): _*)
-        .mapPartitions {
-          iter =>
-            val write = writeBuilder.newWrite()
-            write.withIOManager(createIOManager)
-            try {
-              iter.foreach {
-                row =>
-                  val bucket = row.getInt(bucketColIdx)
-                  val bucketColDropped = originFromRow(toRow(row))
-                  write.write(new DynamicBucketRow(new SparkRow(rowType, 
bucketColDropped), bucket))
-              }
-              val serializer = new CommitMessageSerializer
-              
write.prepareCommit().asScala.map(serializer.serialize).toIterator
-            } finally {
-              write.close()
+      if (!isUnawareBucketTable) {
+        // unaware bucket mode, we don't shuffle while writing
+        df = df.repartition(partitionCols ++ Seq(col(BUCKET_COL)): _*)
+      }

Review Comment:
   Suggest that put these `repartition` codes outside of `val commitMessages = 
...`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to