This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1f449817af93 [SPARK-52554][PS] Avoid multiple roundtrips for config check in Spark Connect 1f449817af93 is described below commit 1f449817af93978141aefe1c387b10c3c97930f1 Author: Takuya Ueshin <ues...@databricks.com> AuthorDate: Tue Jun 24 09:21:04 2025 +0900 [SPARK-52554][PS] Avoid multiple roundtrips for config check in Spark Connect ### What changes were proposed in this pull request? Avoids multiple roundtrips for config check in Spark Connect. ### Why are the changes needed? Some APIs for pandas API on Spark now need to check the server configs, which could cause a performance issue in Spark Connect. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually, and the existing tests should pass. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51252 from ueshin/issues/SPARK-52554/is_ansi_mode_enabled. Authored-by: Takuya Ueshin <ues...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/utils.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py index 23350c06a147..b91f7011851c 100644 --- a/python/pyspark/pandas/utils.py +++ b/python/pyspark/pandas/utils.py @@ -20,6 +20,7 @@ Commonly used utils in pandas-on-Spark. import functools from contextlib import contextmanager +import json import os from typing import ( Any, @@ -1071,10 +1072,28 @@ def xor(df1: PySparkDataFrame, df2: PySparkDataFrame) -> PySparkDataFrame: def is_ansi_mode_enabled(spark: SparkSession) -> bool: - return ( - ps.get_option("compute.ansi_mode_support", spark_session=spark) - and spark.conf.get("spark.sql.ansi.enabled") == "true" - ) + if is_remote(): + from pyspark.sql.connect.session import SparkSession as ConnectSession + from pyspark.pandas.config import _key_format, _options_dict + + client = cast(ConnectSession, spark).client + (ansi_mode_support, ansi_enabled) = client.get_config_with_defaults( + ( + _key_format("compute.ansi_mode_support"), + json.dumps(_options_dict["compute.ansi_mode_support"].default), + ), + ("spark.sql.ansi.enabled", None), + ) + if ansi_enabled is None: + ansi_enabled = spark.conf.get("spark.sql.ansi.enabled") + # Explicitly set the default value to reduce the roundtrip for the next time. + spark.conf.set("spark.sql.ansi.enabled", ansi_enabled) + return json.loads(ansi_mode_support) and ansi_enabled.lower() == "true" + else: + return ( + ps.get_option("compute.ansi_mode_support", spark_session=spark) + and spark.conf.get("spark.sql.ansi.enabled").lower() == "true" + ) def _test() -> None: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org