Github user holdenk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/13599#discussion_r160069334
  
    --- Diff: 
core/src/main/scala/org/apache/spark/api/python/VirtualEnvFactory.scala ---
    @@ -0,0 +1,151 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.api.python
    +
    +import java.io.File
    +import java.util.{Map => JMap}
    +import java.util.Arrays
    +import java.util.concurrent.atomic.AtomicInteger
    +
    +import scala.collection.JavaConverters._
    +
    +import com.google.common.io.Files
    +
    +import org.apache.spark.SparkConf
    +import org.apache.spark.internal.Logging
    +
    +
    +private[spark] class VirtualEnvFactory(pythonExec: String, conf: 
SparkConf, isDriver: Boolean)
    +  extends Logging {
    +
    +  private var virtualEnvType = conf.get("spark.pyspark.virtualenv.type", 
"native")
    +  private var virtualEnvPath = 
conf.get("spark.pyspark.virtualenv.bin.path", "")
    +  private var virtualEnvName: String = _
    +  private var virtualPythonExec: String = _
    +  private val VIRTUALENV_ID = new AtomicInteger()
    +  private var isLauncher: Boolean = false
    +
    +  // used by launcher when user want to use virtualenv in pyspark shell. 
Launcher need this class
    +  // to create virtualenv for driver.
    +  def this(pythonExec: String, properties: JMap[String, String], isDriver: 
java.lang.Boolean) {
    +    this(pythonExec, new SparkConf(), isDriver)
    +    properties.asScala.foreach(entry => this.conf.set(entry._1, entry._2))
    +    virtualEnvType = conf.get("spark.pyspark.virtualenv.type", "native")
    +    virtualEnvPath = conf.get("spark.pyspark.virtualenv.bin.path")
    +    this.isLauncher = true
    +  }
    +
    +  /*
    +   * Create virtualenv using native virtualenv or conda
    +   *
    +   * Native Virtualenv:
    +   *   -  Execute command: virtualenv -p <pythonExec> --no-site-packages 
<virtualenvName>
    +   *   -  Execute command: python -m pip --cache-dir <cache-dir> install 
-r <requirement_file>
    +   *
    +   * Conda
    +   *   -  Execute command: conda create --prefix <prefix> --file 
<requirement_file> -y
    +   *
    +   */
    +  def setupVirtualEnv(): String = {
    +    logInfo("Start to setup virtualenv...")
    +    logDebug("user.dir=" + System.getProperty("user.dir"))
    +    logDebug("user.home=" + System.getProperty("user.home"))
    +
    +    require(virtualEnvType == "native" || virtualEnvType == "conda",
    +      s"VirtualEnvType: ${virtualEnvType} is not supported." )
    +    require(new File(virtualEnvPath).exists(),
    +      s"VirtualEnvPath: ${virtualEnvPath} is not defined or doesn't 
exist.")
    +    // Use a temp directory for virtualenv in the following cases:
    +    // 1. driver of pyspark shell
    +    // 2. driver of yarn-client mode
    +    // otherwise create the virtualenv folder under the executor working 
directory.
    --- End diff --
    
    Maybe we can clarify why we do the logic this way?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to