(paimon) branch master updated: [spark] optimize data-evolution performance (#7054)

lzljs3620320 Thu, 15 Jan 2026 03:11:46 -0800

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new 88ad1bffe6 [spark] optimize data-evolution performance (#7054)
88ad1bffe6 is described below

commit 88ad1bffe68c65c2c7a8d69f577ce0ae3453ec08
Author: shidayang <[email protected]>
AuthorDate: Thu Jan 15 17:52:51 2026 +0800

    [spark] optimize data-evolution performance (#7054)
---
 .../spark/commands/MergeIntoPaimonDataEvolutionTable.scala     | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala
 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala
index e2eaed8fe5..ff6b8ccbb4 100644
--- 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala
+++ 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonDataEvolutionTable.scala
@@ -41,9 +41,9 @@ import 
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.StructType
 
+import scala.collection.{immutable, mutable}
 import scala.collection.JavaConverters._
 import scala.collection.Searching.{search, Found, InsertionPoint}
-import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
 
 /** Command for Merge Into for Data Evolution paimon table. */
@@ -75,7 +75,7 @@ case class MergeIntoPaimonDataEvolutionTable(
   import MergeIntoPaimonDataEvolutionTable._
 
   override val table: FileStoreTable = 
v2Table.getTable.asInstanceOf[FileStoreTable]
-  private val firstRowIds: Seq[Long] = table
+  private val firstRowIds: immutable.IndexedSeq[Long] = table
     .store()
     .newScan()
     .withManifestEntryFilter(
@@ -90,7 +90,7 @@ case class MergeIntoPaimonDataEvolutionTable(
     .map(file => file.file().firstRowId().asInstanceOf[Long])
     .distinct
     .sorted
-    .toSeq
+    .toIndexedSeq
 
   private val firstRowIdToBlobFirstRowIds = {
     val map = new mutable.HashMap[Long, List[Long]]()
@@ -516,9 +516,7 @@ object MergeIntoPaimonDataEvolutionTable {
   final private val redundantColumns =
     Seq(PaimonMetadataColumn.ROW_ID.toAttribute)
 
-  def floorBinarySearch(sortedSeq: Seq[Long], value: Long): Long = {
-    val indexed = sortedSeq.toIndexedSeq
-
+  def floorBinarySearch(indexed: immutable.IndexedSeq[Long], value: Long): 
Long = {
     if (indexed.isEmpty) {
       throw new IllegalArgumentException("The input sorted sequence is empty.")
     }

(paimon) branch master updated: [spark] optimize data-evolution performance (#7054)

Reply via email to