This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 362d1c7361e2 [SPARK-49530][PYTHON] Get active session from dataframes
362d1c7361e2 is described below

commit 362d1c7361e270ea0cfa33b9c22c6623861da982
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Fri Nov 1 19:39:48 2024 +0900

    [SPARK-49530][PYTHON] Get active session from dataframes
    
    ### What changes were proposed in this pull request?
    Get active session from dataframes
    
    ### Why are the changes needed?
    we can directly get session from dataframes
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #48735 from zhengruifeng/py_plot_session.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/plot/core.py | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
index 70164d782283..9e67b6bac8b5 100644
--- a/python/pyspark/sql/plot/core.py
+++ b/python/pyspark/sql/plot/core.py
@@ -19,11 +19,7 @@ import math
 
 from typing import Any, TYPE_CHECKING, List, Optional, Union, Sequence
 from types import ModuleType
-from pyspark.errors import (
-    PySparkRuntimeError,
-    PySparkTypeError,
-    PySparkValueError,
-)
+from pyspark.errors import PySparkTypeError, PySparkValueError
 from pyspark.sql import Column, functions as F
 from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.pandas.utils import require_minimum_pandas_version
@@ -38,14 +34,8 @@ if TYPE_CHECKING:
 
 class PySparkTopNPlotBase:
     def get_top_n(self, sdf: "DataFrame") -> "pd.DataFrame":
-        from pyspark.sql import SparkSession
-
-        session = SparkSession.getActiveSession()
-        if session is None:
-            raise PySparkRuntimeError(errorClass="NO_ACTIVE_SESSION", 
messageParameters=dict())
-
         max_rows = int(
-            session.conf.get("spark.sql.pyspark.plotting.max_rows")  # type: 
ignore[arg-type]
+            sdf._session.conf.get("spark.sql.pyspark.plotting.max_rows")  # 
type: ignore[arg-type]
         )
         pdf = sdf.limit(max_rows + 1).toPandas()
 
@@ -59,16 +49,11 @@ class PySparkTopNPlotBase:
 
 class PySparkSampledPlotBase:
     def get_sampled(self, sdf: "DataFrame") -> "pd.DataFrame":
-        from pyspark.sql import SparkSession, Observation, functions as F
-
-        session = SparkSession.getActiveSession()
-        if session is None:
-            raise PySparkRuntimeError(errorClass="NO_ACTIVE_SESSION", 
messageParameters=dict())
+        from pyspark.sql import Observation, functions as F
 
         max_rows = int(
-            session.conf.get("spark.sql.pyspark.plotting.max_rows")  # type: 
ignore[arg-type]
+            sdf._session.conf.get("spark.sql.pyspark.plotting.max_rows")  # 
type: ignore[arg-type]
         )
-
         observation = Observation("pyspark plotting")
 
         rand_col_name = "__pyspark_plotting_sampled_plot_base_rand__"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to