Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21368#discussion_r189964634
--- Diff: python/pyspark/sql/session.py ---
@@ -547,6 +547,40 @@ def _create_from_pandas_with_arrow(self, pdf, schema,
timezone):
df._schema = schema
return df
+ @staticmethod
+ def _create_shell_session():
+ """
+ Initialize a SparkSession for a pyspark shell session. This is
called from shell.py
+ to make error handling simpler without needing to declare local
variables in that
+ script, which would expose those to users.
+ """
+ import py4j
+ from pyspark.conf import SparkConf
+ from pyspark.context import SparkContext
+ try:
+ # Try to access HiveConf, it will raise exception if Hive is
not added
+ conf = SparkConf()
+ if conf.get('spark.sql.catalogImplementation', 'hive').lower()
== 'hive':
+ SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
+ return SparkSession.builder\
+ .enableHiveSupport()\
+ .getOrCreate()
+ else:
+ return SparkSession.builder.getOrCreate()
+ except py4j.protocol.Py4JError:
+ if conf.get('spark.sql.catalogImplementation', '').lower() ==
'hive':
+ warnings.warn("Fall back to non-hive support because
failing to access HiveConf, "
+ "please make sure you build spark with hive")
+
+ try:
+ return SparkSession.builder.getOrCreate()
--- End diff --
This is intentional to avoid the python exception being unreadable (see
commit description).
The actual flow logic is the same.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]