spark git commit: [SPARK-24665][PYSPARK] Use SQLConf in PySpark to manage all sql configs

gurwls223 Sun, 01 Jul 2018 23:36:02 -0700

Repository: spark
Updated Branches:
  refs/heads/master f825847c8 -> 8f91c697e



[SPARK-24665][PYSPARK] Use SQLConf in PySpark to manage all sql configs

## What changes were proposed in this pull request?

Use SQLConf for PySpark to manage all sql configs, drop all the hard code in 
config usage.

## How was this patch tested?

Existing UT.

Author: Yuanjian Li <[email protected]>

Closes #21648 from xuanyuanking/SPARK-24665.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f91c697
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f91c697
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f91c697

Branch: refs/heads/master
Commit: 8f91c697e251423b826cd6ac4ddd9e2dac15b96e
Parents: f825847
Author: Yuanjian Li <[email protected]>
Authored: Mon Jul 2 14:35:37 2018 +0800
Committer: hyukjinkwon <[email protected]>
Committed: Mon Jul 2 14:35:37 2018 +0800

----------------------------------------------------------------------
 python/pyspark/sql/context.py                   |  5 +++
 python/pyspark/sql/dataframe.py                 | 42 +++++---------------
 .../org/apache/spark/sql/internal/SQLConf.scala |  6 +++
 3 files changed, 21 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8f91c697/python/pyspark/sql/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index e9ec7ba..9c094dd 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -93,6 +93,11 @@ class SQLContext(object):
         """
         return self._jsqlContext
 
+    @property
+    def _conf(self):
+        """Accessor for the JVM SQL-specific configurations"""
+        return self.sparkSession._jsparkSession.sessionState().conf()
+
     @classmethod
     @since(1.6)
     def getOrCreate(cls, sc):

http://git-wip-us.apache.org/repos/asf/spark/blob/8f91c697/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index cb3fe44..c40aea9 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -354,32 +354,12 @@ class DataFrame(object):
         else:
             print(self._jdf.showString(n, int(truncate), vertical))
 
-    @property
-    def _eager_eval(self):
-        """Returns true if the eager evaluation enabled.
-        """
-        return self.sql_ctx.getConf(
-            "spark.sql.repl.eagerEval.enabled", "false").lower() == "true"
-
-    @property
-    def _max_num_rows(self):
-        """Returns the max row number for eager evaluation.
-        """
-        return int(self.sql_ctx.getConf(
-            "spark.sql.repl.eagerEval.maxNumRows", "20"))
-
-    @property
-    def _truncate(self):
-        """Returns the truncate length for eager evaluation.
-        """
-        return int(self.sql_ctx.getConf(
-            "spark.sql.repl.eagerEval.truncate", "20"))
-
     def __repr__(self):
-        if not self._support_repr_html and self._eager_eval:
+        if not self._support_repr_html and 
self.sql_ctx._conf.isReplEagerEvalEnabled():
             vertical = False
             return self._jdf.showString(
-                self._max_num_rows, self._truncate, vertical)
+                self.sql_ctx._conf.replEagerEvalMaxNumRows(),
+                self.sql_ctx._conf.replEagerEvalTruncate(), vertical)
         else:
             return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in 
self.dtypes))
 
@@ -391,10 +371,10 @@ class DataFrame(object):
         import cgi
         if not self._support_repr_html:
             self._support_repr_html = True
-        if self._eager_eval:
-            max_num_rows = max(self._max_num_rows, 0)
+        if self.sql_ctx._conf.isReplEagerEvalEnabled():
+            max_num_rows = max(self.sql_ctx._conf.replEagerEvalMaxNumRows(), 0)
             sock_info = self._jdf.getRowsToPython(
-                max_num_rows, self._truncate)
+                max_num_rows, self.sql_ctx._conf.replEagerEvalTruncate())
             rows = list(_load_from_socket(sock_info, 
BatchedSerializer(PickleSerializer())))
             head = rows[0]
             row_data = rows[1:]
@@ -2049,13 +2029,12 @@ class DataFrame(object):
 
         import pandas as pd
 
-        if 
self.sql_ctx.getConf("spark.sql.execution.pandas.respectSessionTimeZone").lower()
 \
-           == "true":
-            timezone = self.sql_ctx.getConf("spark.sql.session.timeZone")
+        if self.sql_ctx._conf.pandasRespectSessionTimeZone():
+            timezone = self.sql_ctx._conf.sessionLocalTimeZone()
         else:
             timezone = None
 
-        if self.sql_ctx.getConf("spark.sql.execution.arrow.enabled", 
"false").lower() == "true":
+        if self.sql_ctx._conf.arrowEnabled():
             use_arrow = True
             try:
                 from pyspark.sql.types import to_arrow_schema
@@ -2065,8 +2044,7 @@ class DataFrame(object):
                 to_arrow_schema(self.schema)
             except Exception as e:
 
-                if 
self.sql_ctx.getConf("spark.sql.execution.arrow.fallback.enabled", "true") \
-                        .lower() == "true":
+                if self.sql_ctx._conf.arrowFallbackEnabled():
                     msg = (
                         "toPandas attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.enabled' is set to true; 
however, "

http://git-wip-us.apache.org/repos/asf/spark/blob/8f91c697/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index da1c34c..e2c48e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1728,6 +1728,12 @@ class SQLConf extends Serializable with Logging {
 
   def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL)
 
+  def isReplEagerEvalEnabled: Boolean = 
getConf(SQLConf.REPL_EAGER_EVAL_ENABLED)
+
+  def replEagerEvalMaxNumRows: Int = 
getConf(SQLConf.REPL_EAGER_EVAL_MAX_NUM_ROWS)
+
+  def replEagerEvalTruncate: Int = getConf(SQLConf.REPL_EAGER_EVAL_TRUNCATE)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-24665][PYSPARK] Use SQLConf in PySpark to manage all sql configs

Reply via email to