JingsongLi commented on code in PR #4026: URL: https://github.com/apache/paimon/pull/4026#discussion_r1730686410
########## paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonScan.scala: ########## @@ -72,17 +73,56 @@ case class PaimonScan( } } + private def shouldDoBucketedScan: Boolean = { + !bucketedScanDisabled && conf.v2BucketingEnabled && extractBucketTransform.isDefined + } + + // Since Spark 3.3 override def outputPartitioning: Partitioning = { extractBucketTransform .map(bucket => new KeyGroupedPartitioning(Array(bucket), lazyInputPartitions.size)) .getOrElse(new UnknownPartitioning(0)) } - override def getInputPartitions(splits: Array[Split]): Seq[PaimonInputPartition] = { + // Since Spark 3.4 + override def outputOrdering(): Array[SortOrder] = { if ( - bucketedScanDisabled || !conf.v2BucketingEnabled || extractBucketTransform.isEmpty || - splits.exists(!_.isInstanceOf[DataSplit]) + !shouldDoBucketedScan || lazyInputPartitions.exists( + !_.isInstanceOf[PaimonBucketedInputPartition]) ) { + return Array.empty + } + + val primaryKeys = table match { + case fileStoreTable: FileStoreTable => fileStoreTable.primaryKeys().asScala + case _ => Seq.empty + } + if (primaryKeys.isEmpty) { + return Array.empty + } + + val allSplitsKeepOrdering = lazyInputPartitions.toSeq Review Comment: Merging only occurs within a single split. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org