Re: [PR] [spark] Support report scan ordering [paimon]

via GitHub Sun, 25 Aug 2024 22:30:12 -0700


JingsongLi commented on code in PR #4026:
URL: https://github.com/apache/paimon/pull/4026#discussion_r1730686410



##########
paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonScan.scala:
##########
@@ -72,17 +73,56 @@ case class PaimonScan(
     }
   }
 
+  private def shouldDoBucketedScan: Boolean = {
+    !bucketedScanDisabled && conf.v2BucketingEnabled && 
extractBucketTransform.isDefined
+  }
+
+  // Since Spark 3.3
   override def outputPartitioning: Partitioning = {
     extractBucketTransform
       .map(bucket => new KeyGroupedPartitioning(Array(bucket), 
lazyInputPartitions.size))
       .getOrElse(new UnknownPartitioning(0))
   }
 
-  override def getInputPartitions(splits: Array[Split]): 
Seq[PaimonInputPartition] = {
+  // Since Spark 3.4
+  override def outputOrdering(): Array[SortOrder] = {
     if (
-      bucketedScanDisabled || !conf.v2BucketingEnabled || 
extractBucketTransform.isEmpty ||
-      splits.exists(!_.isInstanceOf[DataSplit])
+      !shouldDoBucketedScan || lazyInputPartitions.exists(
+        !_.isInstanceOf[PaimonBucketedInputPartition])
     ) {
+      return Array.empty
+    }
+
+    val primaryKeys = table match {
+      case fileStoreTable: FileStoreTable => 
fileStoreTable.primaryKeys().asScala
+      case _ => Seq.empty
+    }
+    if (primaryKeys.isEmpty) {
+      return Array.empty
+    }
+
+    val allSplitsKeepOrdering = lazyInputPartitions.toSeq

Review Comment:
   Merging only occurs within a single split.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Re: [PR] [spark] Support report scan ordering [paimon]

Reply via email to