[GitHub] spark pull request #18648: [SPARK-21428] Turn IsolatedClientLoader off while...

jiangxb1987 Wed, 09 Aug 2017 04:27:12 -0700

Github user jiangxb1987 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18648#discussion_r132155005
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala 
---
    @@ -105,107 +105,69 @@ private[hive] class HiveClientImpl(
       // Create an internal session state for this HiveClientImpl.
       val state: SessionState = {
         val original = Thread.currentThread().getContextClassLoader
    -    // Switch to the initClassLoader.
    -    Thread.currentThread().setContextClassLoader(initClassLoader)
    -
    -    // Set up kerberos credentials for UserGroupInformation.loginUser 
within
    -    // current class loader
    -    if (sparkConf.contains("spark.yarn.principal") && 
sparkConf.contains("spark.yarn.keytab")) {
    -      val principalName = sparkConf.get("spark.yarn.principal")
    -      val keytabFileName = sparkConf.get("spark.yarn.keytab")
    -      if (!new File(keytabFileName).exists()) {
    -        throw new SparkException(s"Keytab file: ${keytabFileName}" +
    -          " specified in spark.yarn.keytab does not exist")
    -      } else {
    -        logInfo("Attempting to login to Kerberos" +
    -          s" using principal: ${principalName} and keytab: 
${keytabFileName}")
    -        UserGroupInformation.loginUserFromKeytab(principalName, 
keytabFileName)
    -      }
    -    }
    -
    -    def isCliSessionState(state: SessionState): Boolean = {
    -      var temp: Class[_] = if (state != null) state.getClass else null
    -      var found = false
    -      while (temp != null && !found) {
    -        found = temp.getName == 
"org.apache.hadoop.hive.cli.CliSessionState"
    -        temp = temp.getSuperclass
    +    if (clientLoader.isolationOn) {
    +      // Switch to the initClassLoader.
    +      Thread.currentThread().setContextClassLoader(initClassLoader)
    +      // Set up kerberos credentials for UserGroupInformation.loginUser 
within
    +      // current class loader
    +      if (sparkConf.contains("spark.yarn.principal") && 
sparkConf.contains("spark.yarn.keytab")) {
    +        val principal = sparkConf.get("spark.yarn.principal")
    +        val keytab = sparkConf.get("spark.yarn.keytab")
    +        SparkHadoopUtil.get.loginUserFromKeytab(principal, keytab)
           }
    -      found
    -    }
    -
    -    val ret = try {
    -      // originState will be created if not exists, will never be null
    -      val originalState = SessionState.get()
    -      if (isCliSessionState(originalState)) {
    -        // In `SparkSQLCLIDriver`, we have already started a 
`CliSessionState`,
    -        // which contains information like configurations from command 
line. Later
    -        // we call `SparkSQLEnv.init()` there, which would run into this 
part again.
    -        // so we should keep `conf` and reuse the existing instance of 
`CliSessionState`.
    -        originalState
    -      } else {
    -        val hiveConf = new HiveConf(classOf[SessionState])
    -        // 1: we set all confs in the hadoopConf to this hiveConf.
    -        // This hadoopConf contains user settings in Hadoop's 
core-site.xml file
    -        // and Hive's hive-site.xml file. Note, we load hive-site.xml file 
manually in
    -        // SharedState and put settings in this hadoopConf instead of 
relying on HiveConf
    -        // to load user settings. Otherwise, HiveConf's initialize method 
will override
    -        // settings in the hadoopConf. This issue only shows up when 
spark.sql.hive.metastore.jars
    -        // is not set to builtin. When spark.sql.hive.metastore.jars is 
builtin, the classpath
    -        // has hive-site.xml. So, HiveConf will use that to override its 
default values.
    -        hadoopConf.iterator().asScala.foreach { entry =>
    -          val key = entry.getKey
    -          val value = entry.getValue
    -          if (key.toLowerCase(Locale.ROOT).contains("password")) {
    -            logDebug(s"Applying Hadoop and Hive config to Hive Conf: 
$key=xxx")
    -          } else {
    -            logDebug(s"Applying Hadoop and Hive config to Hive Conf: 
$key=$value")
    -          }
    -          hiveConf.set(key, value)
    -        }
    -        // HiveConf is a Hadoop Configuration, which has a field of 
classLoader and
    -        // the initial value will be the current thread's context class 
loader
    -        // (i.e. initClassLoader at here).
    -        // We call initialConf.setClassLoader(initClassLoader) at here to 
make
    -        // this action explicit.
    -        hiveConf.setClassLoader(initClassLoader)
    -        // 2: we set all spark confs to this hiveConf.
    -        sparkConf.getAll.foreach { case (k, v) =>
    -          if (k.toLowerCase(Locale.ROOT).contains("password")) {
    -            logDebug(s"Applying Spark config to Hive Conf: $k=xxx")
    -          } else {
    -            logDebug(s"Applying Spark config to Hive Conf: $k=$v")
    -          }
    -          hiveConf.set(k, v)
    -        }
    -        // 3: we set all entries in config to this hiveConf.
    -        extraConfig.foreach { case (k, v) =>
    -          if (k.toLowerCase(Locale.ROOT).contains("password")) {
    -            logDebug(s"Applying extra config to HiveConf: $k=xxx")
    -          } else {
    -            logDebug(s"Applying extra config to HiveConf: $k=$v")
    -          }
    -          hiveConf.set(k, v)
    -        }
    -        val state = new SessionState(hiveConf)
    -        if (clientLoader.cachedHive != null) {
    -          Hive.set(clientLoader.cachedHive.asInstanceOf[Hive])
    -        }
    -        SessionState.start(state)
    -        state.out = new PrintStream(outputBuffer, true, "UTF-8")
    -        state.err = new PrintStream(outputBuffer, true, "UTF-8")
    -        state
    +      try {
    +        newState()
    +      } finally {
    +        Thread.currentThread().setContextClassLoader(original)
           }
    -    } finally {
    -      Thread.currentThread().setContextClassLoader(original)
    +    } else {
    +      Option(SessionState.get()).getOrElse(newState())
         }
    -    ret
       }
     
       // Log the default warehouse location.
       logInfo(
         s"Warehouse location for Hive client " +
           s"(version ${version.fullVersion}) is 
${conf.get("hive.metastore.warehouse.dir")}")
     
    +  private def newState(): SessionState = {
    +    val hiveConf = new HiveConf(classOf[SessionState])
    +    // HiveConf is a Hadoop Configuration, which has a field of 
classLoader and
    +    // the initial value will be the current thread's context class loader
    +    // (i.e. initClassLoader at here).
    +    // We call initialConf.setClassLoader(initClassLoader) at here to make
    +    // this action explicit.
    +    hiveConf.setClassLoader(initClassLoader)
    +
    +    // 1: Take all from the hadoopConf to this hiveConf.
    +    // This hadoopConf contains user settings in Hadoop's core-site.xml 
file
    +    // and Hive's hive-site.xml file. Note, we load hive-site.xml file 
manually in
    +    // SharedState and put settings in this hadoopConf instead of relying 
on HiveConf
    +    // to load user settings. Otherwise, HiveConf's initialize method will 
override
    +    // settings in the hadoopConf. This issue only shows up when 
spark.sql.hive.metastore.jars
    +    // is not set to builtin. When spark.sql.hive.metastore.jars is 
builtin, the classpath
    +    // has hive-site.xml. So, HiveConf will use that to override its 
default values.
    +    // 2: we set all spark confs to this hiveConf.
    +    // 3: we set all entries in config to this hiveConf.
    +    (hadoopConf.iterator().asScala.map(kv => kv.getKey -> kv.getValue)
    +      ++ sparkConf.getAll.toMap ++ extraConfig).foreach { case (k, v) =>
    +      if (k.toLowerCase(Locale.ROOT).contains("password")) {
    +        logDebug(s"Applying Spark config to Hive Conf: $k=xxx")
    --- End diff --
    
    This may also be Hadoop/Hive or extra config.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #18648: [SPARK-21428] Turn IsolatedClientLoader off while...

Reply via email to