WweiL commented on code in PR #48355:
URL: https://github.com/apache/spark/pull/48355#discussion_r1880824115


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala:
##########
@@ -1533,3 +1723,40 @@ case class AcquiredThreadInfo(
   }
 }
 
+/**
+ * A helper class to manage the lineage information when checkpoint unique id 
is enabled.
+ * "lineage" is an array of LineageItem (version, uniqueId) pair.
+ *
+ * The first item of "lineage" should normally be the version of a snapshot, 
except
+ * for the first few versions. Because they are solely loaded from changelog 
file.
+ * (i.e. with default minDeltasForSnapshot, there is only 1_uuid1.changelog, 
no 1_uuid1.zip)
+ *
+ * The last item of "lineage" corresponds to one version before the 
to-be-committed version.
+ */
+private[sql] class RocksDBLineageManager {
+  @volatile private var lineage: Array[LineageItem] = Array.empty
+
+  override def toString: String = lineage.map {
+    case LineageItem(version, uuid) => s"$version: $uuid"
+  }.mkString(" ")
+
+  def appendLineageItem(item: LineageItem): Unit = {
+    lineage = lineage :+ item
+  }
+
+  def resetLineage(newLineage: Array[LineageItem]): Unit = {
+    lineage = newLineage
+  }
+
+  def getLineageForCurrVersion(): Array[LineageItem] = {
+    lineage.clone()
+  }
+
+  def contains(item: LineageItem): Boolean = {
+    lineage.contains(item)
+  }

Review Comment:
   It is controlled by this config: 
   
https://github.com/apache/spark/blob/d0dbc6c5e5c44f64c3f13e676e0fb468a3ae7f57/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala#L2135-L2142
   
   Default is 10, here I believe at most we store 2x the min_delta_version so 
it should be a fairly small array



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to