Re: [PR] [SPARK-49249][SPARK-49122] Artifact isolation in Spark Classic [spark]

via GitHub Thu, 24 Oct 2024 11:11:53 -0700


vicennial commented on code in PR #48120:
URL: https://github.com/apache/spark/pull/48120#discussion_r1815495236



##########
sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala:
##########
@@ -68,18 +69,66 @@ class ArtifactManager(session: SparkSession) extends 
Logging {
       s"$artifactRootURI${File.separator}${session.sessionUUID}")
 
   // The base directory/URI where all class file artifacts are stored for this 
`sessionUUID`.
-  protected[artifact] val (classDir, classURI): (Path, String) =
+  protected[artifact] val (classDir, replClassURI): (Path, String) =
     (ArtifactUtils.concatenatePaths(artifactPath, "classes"),
       s"$artifactURI${File.separator}classes${File.separator}")
 
-  protected[artifact] val state: JobArtifactState =
-    JobArtifactState(session.sessionUUID, Option(classURI))
 
-  def withResources[T](f: => T): T = {
-    Utils.withContextClassLoader(classloader) {
-      JobArtifactSet.withActiveJobArtifactState(state) {
+  private lazy val sessionIsolated =
+    session.conf.get("spark.session.isolate.artifacts", "true") == "true"
+  private lazy val replIsolated =
+    session.conf.get("spark.repl.isolate.artifacts", "false") == "true"

Review Comment:
   (please also add documentation for each config)



##########
sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala:
##########
@@ -68,18 +69,66 @@ class ArtifactManager(session: SparkSession) extends 
Logging {
       s"$artifactRootURI${File.separator}${session.sessionUUID}")
 
   // The base directory/URI where all class file artifacts are stored for this 
`sessionUUID`.
-  protected[artifact] val (classDir, classURI): (Path, String) =
+  protected[artifact] val (classDir, replClassURI): (Path, String) =
     (ArtifactUtils.concatenatePaths(artifactPath, "classes"),
       s"$artifactURI${File.separator}classes${File.separator}")
 
-  protected[artifact] val state: JobArtifactState =
-    JobArtifactState(session.sessionUUID, Option(classURI))
 
-  def withResources[T](f: => T): T = {
-    Utils.withContextClassLoader(classloader) {
-      JobArtifactSet.withActiveJobArtifactState(state) {
+  private lazy val sessionIsolated =
+    session.conf.get("spark.session.isolate.artifacts", "true") == "true"
+  private lazy val replIsolated =
+    session.conf.get("spark.repl.isolate.artifacts", "false") == "true"
+
+  protected[sql] lazy val state: JobArtifactState = {
+    (sessionIsolated, replIsolated) match {
+      case (true, true) => JobArtifactState(session.sessionUUID, 
Some(replClassURI))
+      case (true, false) => JobArtifactState(session.sessionUUID, None)
+      case (false, true) => throw SparkException.internalError(
+        "To enable REPL isolation, session isolation must also be enabled.")
+      case (false, false) => null
+    }
+  }
+
+  private val shouldApplyClassLoader = new AtomicBoolean(false)

Review Comment:
   Can we add a short blurb on when this boolean should be set to true?



##########
sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala:
##########
@@ -68,18 +69,66 @@ class ArtifactManager(session: SparkSession) extends 
Logging {
       s"$artifactRootURI${File.separator}${session.sessionUUID}")
 
   // The base directory/URI where all class file artifacts are stored for this 
`sessionUUID`.
-  protected[artifact] val (classDir, classURI): (Path, String) =
+  protected[artifact] val (classDir, replClassURI): (Path, String) =
     (ArtifactUtils.concatenatePaths(artifactPath, "classes"),
       s"$artifactURI${File.separator}classes${File.separator}")
 
-  protected[artifact] val state: JobArtifactState =
-    JobArtifactState(session.sessionUUID, Option(classURI))
 
-  def withResources[T](f: => T): T = {
-    Utils.withContextClassLoader(classloader) {
-      JobArtifactSet.withActiveJobArtifactState(state) {
+  private lazy val sessionIsolated =
+    session.conf.get("spark.session.isolate.artifacts", "true") == "true"
+  private lazy val replIsolated =
+    session.conf.get("spark.repl.isolate.artifacts", "false") == "true"

Review Comment:
   Why are these not simply defined as SQL configs with default values?



##########
sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala:
##########
@@ -68,18 +69,66 @@ class ArtifactManager(session: SparkSession) extends 
Logging {
       s"$artifactRootURI${File.separator}${session.sessionUUID}")
 
   // The base directory/URI where all class file artifacts are stored for this 
`sessionUUID`.
-  protected[artifact] val (classDir, classURI): (Path, String) =
+  protected[artifact] val (classDir, replClassURI): (Path, String) =
     (ArtifactUtils.concatenatePaths(artifactPath, "classes"),
       s"$artifactURI${File.separator}classes${File.separator}")
 
-  protected[artifact] val state: JobArtifactState =
-    JobArtifactState(session.sessionUUID, Option(classURI))
 
-  def withResources[T](f: => T): T = {
-    Utils.withContextClassLoader(classloader) {
-      JobArtifactSet.withActiveJobArtifactState(state) {
+  private lazy val sessionIsolated =
+    session.conf.get("spark.session.isolate.artifacts", "true") == "true"
+  private lazy val replIsolated =
+    session.conf.get("spark.repl.isolate.artifacts", "false") == "true"
+
+  protected[sql] lazy val state: JobArtifactState = {
+    (sessionIsolated, replIsolated) match {
+      case (true, true) => JobArtifactState(session.sessionUUID, 
Some(replClassURI))
+      case (true, false) => JobArtifactState(session.sessionUUID, None)
+      case (false, true) => throw SparkException.internalError(
+        "To enable REPL isolation, session isolation must also be enabled.")

Review Comment:
   let's also make sure we capture this dependency in the config definitions. 
   Further, are these going to be Static confs?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-49249][SPARK-49122] Artifact isolation in Spark Classic [spark]

Reply via email to