Repository: spark Updated Branches: refs/heads/master d0990e3df -> c3c45cbd7
[SPARK-25540][SQL][PYSPARK] Make HiveContext in PySpark behave as the same as Scala. ## What changes were proposed in this pull request? In Scala, `HiveContext` sets a config `spark.sql.catalogImplementation` of the given `SparkContext` and then passes to `SparkSession.builder`. The `HiveContext` in PySpark should behave as the same as Scala. ## How was this patch tested? Existing tests. Closes #22552 from ueshin/issues/SPARK-25540/hive_context. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3c45cbd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3c45cbd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3c45cbd Branch: refs/heads/master Commit: c3c45cbd76d91d591d98cf8411fcfd30079f5969 Parents: d0990e3 Author: Takuya UESHIN <ues...@databricks.com> Authored: Thu Sep 27 09:51:20 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Sep 27 09:51:20 2018 +0800 ---------------------------------------------------------------------- python/pyspark/sql/context.py | 3 ++- python/pyspark/sql/session.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c3c45cbd/python/pyspark/sql/context.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 9c094dd..1938965 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -485,7 +485,8 @@ class HiveContext(SQLContext): "SparkSession.builder.enableHiveSupport().getOrCreate() instead.", DeprecationWarning) if jhiveContext is None: - sparkSession = SparkSession.builder.enableHiveSupport().getOrCreate() + sparkContext._conf.set("spark.sql.catalogImplementation", "hive") + sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate() else: sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession()) SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext) http://git-wip-us.apache.org/repos/asf/spark/blob/c3c45cbd/python/pyspark/sql/session.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 51a38eb..a5e2872 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -83,6 +83,7 @@ class SparkSession(object): _lock = RLock() _options = {} + _sc = None @since(2.0) def config(self, key=None, value=None, conf=None): @@ -139,6 +140,11 @@ class SparkSession(object): """ return self.config("spark.sql.catalogImplementation", "hive") + def _sparkContext(self, sc): + with self._lock: + self._sc = sc + return self + @since(2.0) def getOrCreate(self): """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a @@ -167,11 +173,14 @@ class SparkSession(object): from pyspark.conf import SparkConf session = SparkSession._instantiatedSession if session is None or session._sc._jsc is None: - sparkConf = SparkConf() - for key, value in self._options.items(): - sparkConf.set(key, value) - sc = SparkContext.getOrCreate(sparkConf) - # This SparkContext may be an existing one. + if self._sc is not None: + sc = self._sc + else: + sparkConf = SparkConf() + for key, value in self._options.items(): + sparkConf.set(key, value) + sc = SparkContext.getOrCreate(sparkConf) + # This SparkContext may be an existing one. for key, value in self._options.items(): # we need to propagate the confs # before we create the SparkSession. Otherwise, confs like --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org