This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 362d1c7361e2 [SPARK-49530][PYTHON] Get active session from dataframes
362d1c7361e2 is described below
commit 362d1c7361e270ea0cfa33b9c22c6623861da982
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Fri Nov 1 19:39:48 2024 +0900
[SPARK-49530][PYTHON] Get active session from dataframes
### What changes were proposed in this pull request?
Get active session from dataframes
### Why are the changes needed?
we can directly get session from dataframes
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #48735 from zhengruifeng/py_plot_session.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/plot/core.py | 23 ++++-------------------
1 file changed, 4 insertions(+), 19 deletions(-)
diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
index 70164d782283..9e67b6bac8b5 100644
--- a/python/pyspark/sql/plot/core.py
+++ b/python/pyspark/sql/plot/core.py
@@ -19,11 +19,7 @@ import math
from typing import Any, TYPE_CHECKING, List, Optional, Union, Sequence
from types import ModuleType
-from pyspark.errors import (
- PySparkRuntimeError,
- PySparkTypeError,
- PySparkValueError,
-)
+from pyspark.errors import PySparkTypeError, PySparkValueError
from pyspark.sql import Column, functions as F
from pyspark.sql.internal import InternalFunction as SF
from pyspark.sql.pandas.utils import require_minimum_pandas_version
@@ -38,14 +34,8 @@ if TYPE_CHECKING:
class PySparkTopNPlotBase:
def get_top_n(self, sdf: "DataFrame") -> "pd.DataFrame":
- from pyspark.sql import SparkSession
-
- session = SparkSession.getActiveSession()
- if session is None:
- raise PySparkRuntimeError(errorClass="NO_ACTIVE_SESSION",
messageParameters=dict())
-
max_rows = int(
- session.conf.get("spark.sql.pyspark.plotting.max_rows") # type:
ignore[arg-type]
+ sdf._session.conf.get("spark.sql.pyspark.plotting.max_rows") #
type: ignore[arg-type]
)
pdf = sdf.limit(max_rows + 1).toPandas()
@@ -59,16 +49,11 @@ class PySparkTopNPlotBase:
class PySparkSampledPlotBase:
def get_sampled(self, sdf: "DataFrame") -> "pd.DataFrame":
- from pyspark.sql import SparkSession, Observation, functions as F
-
- session = SparkSession.getActiveSession()
- if session is None:
- raise PySparkRuntimeError(errorClass="NO_ACTIVE_SESSION",
messageParameters=dict())
+ from pyspark.sql import Observation, functions as F
max_rows = int(
- session.conf.get("spark.sql.pyspark.plotting.max_rows") # type:
ignore[arg-type]
+ sdf._session.conf.get("spark.sql.pyspark.plotting.max_rows") #
type: ignore[arg-type]
)
-
observation = Observation("pyspark plotting")
rand_col_name = "__pyspark_plotting_sampled_plot_base_rand__"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]