This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a2dbfeefa98 [SPARK-41056][R] Fix new R_LIBS_SITE behavior introduced 
in R 4.2
a2dbfeefa98 is described below

commit a2dbfeefa98e561734c10fc25cd44017882313d8
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Wed Nov 9 16:41:07 2022 +0900

    [SPARK-41056][R] Fix new R_LIBS_SITE behavior introduced in R 4.2
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to keep the `R_LIBS_SITE` as was. It has been changed from 
R 4.2.
    
    ### Why are the changes needed?
    
    To keep the behaviour same as the previous. This especially affects the 
external libraries installed in SparkR worker sides. Especially this can break 
the user-installed libraries. See the paths below:
    
    **R 4.2**
    
    ```r
    # R
    > Sys.getenv("R_LIBS_SITE")
    [1] 
"/usr/local/lib/R/site-library/:/usr/lib/R/site-library:/usr/lib/R/library'"
    # R --vanilla
    > Sys.getenv("R_LIBS_SITE")
    [1] "/usr/lib/R/site-library"
    ```
    
    **R 4.1**
    
    ```r
    # R
    > Sys.getenv("R_LIBS_SITE")
    [1] 
"/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library"
    # R --vanilla
    > Sys.getenv("R_LIBS_SITE")
    [1] 
"/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library"
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. With R 4.2, user-installed libraries won't be found in SparkR workers.
    
    ### How was this patch tested?
    
    Manually tested, unittest added. It's difficult to add an e2e tests.
    
    I also manually tested `getROptions` in Scala shall.
    
    Closes #38570 from HyukjinKwon/SPARK-41056.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../scala/org/apache/spark/api/r/BaseRRunner.scala | 12 +++++++-
 .../org/apache/spark/api/r/BaseRRunnerSuite.scala  | 36 ++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala 
b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
index fdfe5f5b41d..0f93873c06a 100644
--- a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
@@ -278,6 +278,16 @@ private[r] object BaseRRunner {
     thread
   }
 
+  private[r] def getROptions(rCommand: String): String = Try {
+    val result = scala.sys.process.Process(Seq(rCommand, "--version")).!!
+    "([0-9]+)\\.([0-9]+)\\.([0-9]+)".r.findFirstMatchIn(result).map { m =>
+      val major = m.group(1).toInt
+      val minor = m.group(2).toInt
+      val shouldUseNoRestore = major > 4 || major == 4 && minor >= 2
+      if (shouldUseNoRestore) "--no-restore" else "--vanilla"
+    }.getOrElse("--vanilla")
+  }.getOrElse("--vanilla")
+
   private def createRProcess(port: Int, script: String): BufferedStreamThread 
= {
     // "spark.sparkr.r.command" is deprecated and replaced by 
"spark.r.command",
     // but kept here for backward compatibility.
@@ -286,7 +296,7 @@ private[r] object BaseRRunner {
     rCommand = sparkConf.get(R_COMMAND).orElse(Some(rCommand)).get
 
     val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT)
-    val rOptions = "--vanilla"
+    val rOptions = getROptions(rCommand)
     val rLibDir = RUtils.sparkRPackagePath(isDriver = false)
     val rExecScript = rLibDir(0) + "/SparkR/worker/" + script
     val pb = new ProcessBuilder(Arrays.asList(rCommand, rOptions, rExecScript))
diff --git a/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala 
b/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala
new file mode 100644
index 00000000000..01dd7df3a1a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.TestUtils.testCommandAvailable
+import org.apache.spark.internal.config.R.SPARKR_COMMAND
+
+class BaseRRunnerSuite extends SparkFunSuite {
+  test("Retrieve R options from R command") {
+    val rCommand = SPARKR_COMMAND.defaultValue.get
+    assume(testCommandAvailable(rCommand))
+    assert(BaseRRunner.getROptions(rCommand) === "--no-restore"
+      || BaseRRunner.getROptions(rCommand) === "--vanilla")
+  }
+
+  test("Should return the default value if the R command does not exist") {
+    assume(!testCommandAvailable("nonexistent"))
+    assert(BaseRRunner.getROptions("nonexistent") === "--vanilla")
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to