This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a2dbfeefa98 [SPARK-41056][R] Fix new R_LIBS_SITE behavior introduced in R 4.2 a2dbfeefa98 is described below commit a2dbfeefa98e561734c10fc25cd44017882313d8 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Wed Nov 9 16:41:07 2022 +0900 [SPARK-41056][R] Fix new R_LIBS_SITE behavior introduced in R 4.2 ### What changes were proposed in this pull request? This PR proposes to keep the `R_LIBS_SITE` as was. It has been changed from R 4.2. ### Why are the changes needed? To keep the behaviour same as the previous. This especially affects the external libraries installed in SparkR worker sides. Especially this can break the user-installed libraries. See the paths below: **R 4.2** ```r # R > Sys.getenv("R_LIBS_SITE") [1] "/usr/local/lib/R/site-library/:/usr/lib/R/site-library:/usr/lib/R/library'" # R --vanilla > Sys.getenv("R_LIBS_SITE") [1] "/usr/lib/R/site-library" ``` **R 4.1** ```r # R > Sys.getenv("R_LIBS_SITE") [1] "/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library" # R --vanilla > Sys.getenv("R_LIBS_SITE") [1] "/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library" ``` ### Does this PR introduce _any_ user-facing change? Yes. With R 4.2, user-installed libraries won't be found in SparkR workers. ### How was this patch tested? Manually tested, unittest added. It's difficult to add an e2e tests. I also manually tested `getROptions` in Scala shall. Closes #38570 from HyukjinKwon/SPARK-41056. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../scala/org/apache/spark/api/r/BaseRRunner.scala | 12 +++++++- .../org/apache/spark/api/r/BaseRRunnerSuite.scala | 36 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala index fdfe5f5b41d..0f93873c06a 100644 --- a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala @@ -278,6 +278,16 @@ private[r] object BaseRRunner { thread } + private[r] def getROptions(rCommand: String): String = Try { + val result = scala.sys.process.Process(Seq(rCommand, "--version")).!! + "([0-9]+)\\.([0-9]+)\\.([0-9]+)".r.findFirstMatchIn(result).map { m => + val major = m.group(1).toInt + val minor = m.group(2).toInt + val shouldUseNoRestore = major > 4 || major == 4 && minor >= 2 + if (shouldUseNoRestore) "--no-restore" else "--vanilla" + }.getOrElse("--vanilla") + }.getOrElse("--vanilla") + private def createRProcess(port: Int, script: String): BufferedStreamThread = { // "spark.sparkr.r.command" is deprecated and replaced by "spark.r.command", // but kept here for backward compatibility. @@ -286,7 +296,7 @@ private[r] object BaseRRunner { rCommand = sparkConf.get(R_COMMAND).orElse(Some(rCommand)).get val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT) - val rOptions = "--vanilla" + val rOptions = getROptions(rCommand) val rLibDir = RUtils.sparkRPackagePath(isDriver = false) val rExecScript = rLibDir(0) + "/SparkR/worker/" + script val pb = new ProcessBuilder(Arrays.asList(rCommand, rOptions, rExecScript)) diff --git a/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala b/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala new file mode 100644 index 00000000000..01dd7df3a1a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.r + +import org.apache.spark.SparkFunSuite +import org.apache.spark.TestUtils.testCommandAvailable +import org.apache.spark.internal.config.R.SPARKR_COMMAND + +class BaseRRunnerSuite extends SparkFunSuite { + test("Retrieve R options from R command") { + val rCommand = SPARKR_COMMAND.defaultValue.get + assume(testCommandAvailable(rCommand)) + assert(BaseRRunner.getROptions(rCommand) === "--no-restore" + || BaseRRunner.getROptions(rCommand) === "--vanilla") + } + + test("Should return the default value if the R command does not exist") { + assume(!testCommandAvailable("nonexistent")) + assert(BaseRRunner.getROptions("nonexistent") === "--vanilla") + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org