This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fa6a0786bb4b Revert "[SPARK-49531][PYTHON][CONNECT] Support line plot
with plotly backend"
fa6a0786bb4b is described below
commit fa6a0786bb4b23a895e68a721df9ee88684c4fab
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sat Sep 14 17:57:35 2024 -0700
Revert "[SPARK-49531][PYTHON][CONNECT] Support line plot with plotly
backend"
This reverts commit 3b8dddac65bce6f88f51e23e777d521d65fa3373.
---
dev/sparktestsupport/modules.py | 4 -
python/pyspark/errors/error-conditions.json | 5 -
python/pyspark/sql/classic/dataframe.py | 5 -
python/pyspark/sql/connect/dataframe.py | 5 -
python/pyspark/sql/dataframe.py | 27 -----
python/pyspark/sql/plot/__init__.py | 21 ----
python/pyspark/sql/plot/core.py | 135 ---------------------
python/pyspark/sql/plot/plotly.py | 30 -----
.../sql/tests/connect/test_parity_frame_plot.py | 36 ------
.../tests/connect/test_parity_frame_plot_plotly.py | 36 ------
python/pyspark/sql/tests/plot/__init__.py | 16 ---
python/pyspark/sql/tests/plot/test_frame_plot.py | 79 ------------
.../sql/tests/plot/test_frame_plot_plotly.py | 64 ----------
python/pyspark/sql/utils.py | 17 ---
python/pyspark/testing/sqlutils.py | 7 --
.../org/apache/spark/sql/internal/SQLConf.scala | 27 -----
16 files changed, 514 deletions(-)
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index b9a4bed715f6..34fbb8450d54 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -548,8 +548,6 @@ pyspark_sql = Module(
"pyspark.sql.tests.test_udtf",
"pyspark.sql.tests.test_utils",
"pyspark.sql.tests.test_resources",
- "pyspark.sql.tests.plot.test_frame_plot",
- "pyspark.sql.tests.plot.test_frame_plot_plotly",
],
)
@@ -1053,8 +1051,6 @@ pyspark_connect = Module(
"pyspark.sql.tests.connect.test_parity_arrow_cogrouped_map",
"pyspark.sql.tests.connect.test_parity_python_datasource",
"pyspark.sql.tests.connect.test_parity_python_streaming_datasource",
- "pyspark.sql.tests.connect.test_parity_frame_plot",
- "pyspark.sql.tests.connect.test_parity_frame_plot_plotly",
"pyspark.sql.tests.connect.test_utils",
"pyspark.sql.tests.connect.client.test_artifact",
"pyspark.sql.tests.connect.client.test_artifact_localcluster",
diff --git a/python/pyspark/errors/error-conditions.json
b/python/pyspark/errors/error-conditions.json
index 92aeb15e21d1..4061d024a83c 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -1088,11 +1088,6 @@
"Function `<func_name>` should use only POSITIONAL or POSITIONAL OR
KEYWORD arguments."
]
},
- "UNSUPPORTED_PLOT_BACKEND": {
- "message": [
- "`<backend>` is not supported, it should be one of the values from
<supported_backends>"
- ]
- },
"UNSUPPORTED_SIGNATURE": {
"message": [
"Unsupported signature: <signature>."
diff --git a/python/pyspark/sql/classic/dataframe.py
b/python/pyspark/sql/classic/dataframe.py
index d174f7774cc5..91b959162590 100644
--- a/python/pyspark/sql/classic/dataframe.py
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -58,7 +58,6 @@ from pyspark.sql.column import Column
from pyspark.sql.classic.column import _to_seq, _to_list, _to_java_column
from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
from pyspark.sql.merge import MergeIntoWriter
-from pyspark.sql.plot import PySparkPlotAccessor
from pyspark.sql.streaming import DataStreamWriter
from pyspark.sql.types import (
StructType,
@@ -1863,10 +1862,6 @@ class DataFrame(ParentDataFrame, PandasMapOpsMixin,
PandasConversionMixin):
messageParameters={"member": "queryExecution"},
)
- @property
- def plot(self) -> PySparkPlotAccessor:
- return PySparkPlotAccessor(self)
-
class DataFrameNaFunctions(ParentDataFrameNaFunctions):
def __init__(self, df: ParentDataFrame):
diff --git a/python/pyspark/sql/connect/dataframe.py
b/python/pyspark/sql/connect/dataframe.py
index e3b1d35b2d5d..768abd655d49 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -83,7 +83,6 @@ from pyspark.sql.connect.expressions import (
UnresolvedStar,
)
from pyspark.sql.connect.functions import builtin as F
-from pyspark.sql.plot import PySparkPlotAccessor
from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
from pyspark.sql.pandas.functions import _validate_pandas_udf # type:
ignore[attr-defined]
@@ -2240,10 +2239,6 @@ class DataFrame(ParentDataFrame):
def executionInfo(self) -> Optional["ExecutionInfo"]:
return self._execution_info
- @property
- def plot(self) -> PySparkPlotAccessor:
- return PySparkPlotAccessor(self)
-
class DataFrameNaFunctions(ParentDataFrameNaFunctions):
def __init__(self, df: ParentDataFrame):
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 7748510258ea..ef35b7333257 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -39,7 +39,6 @@ from pyspark.resource import ResourceProfile
from pyspark.sql.column import Column
from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
from pyspark.sql.merge import MergeIntoWriter
-from pyspark.sql.plot import PySparkPlotAccessor
from pyspark.sql.streaming import DataStreamWriter
from pyspark.sql.types import StructType, Row
from pyspark.sql.utils import dispatch_df_method
@@ -6395,32 +6394,6 @@ class DataFrame:
"""
...
- @property
- def plot(self) -> PySparkPlotAccessor:
- """
- Returns a :class:`PySparkPlotAccessor` for plotting functions.
-
- .. versionadded:: 4.0.0
-
- Returns
- -------
- :class:`PySparkPlotAccessor`
-
- Notes
- -----
- This API is experimental.
-
- Examples
- --------
- >>> data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
- >>> columns = ["category", "int_val", "float_val"]
- >>> df = spark.createDataFrame(data, columns)
- >>> type(df.plot)
- <class 'pyspark.sql.plot.core.PySparkPlotAccessor'>
- >>> df.plot.line(x="category", y=["int_val", "float_val"]) # doctest:
+SKIP
- """
- ...
-
class DataFrameNaFunctions:
"""Functionality for working with missing data in :class:`DataFrame`.
diff --git a/python/pyspark/sql/plot/__init__.py
b/python/pyspark/sql/plot/__init__.py
deleted file mode 100644
index 6da07061b2a0..000000000000
--- a/python/pyspark/sql/plot/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-This package includes the plotting APIs for PySpark DataFrame.
-"""
-from pyspark.sql.plot.core import * # noqa: F403, F401
diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
deleted file mode 100644
index baee610dc6bd..000000000000
--- a/python/pyspark/sql/plot/core.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from typing import Any, TYPE_CHECKING, Optional, Union
-from types import ModuleType
-from pyspark.errors import PySparkRuntimeError, PySparkValueError
-from pyspark.sql.utils import require_minimum_plotly_version
-
-
-if TYPE_CHECKING:
- from pyspark.sql import DataFrame
- import pandas as pd
- from plotly.graph_objs import Figure
-
-
-class PySparkTopNPlotBase:
- def get_top_n(self, sdf: "DataFrame") -> "pd.DataFrame":
- from pyspark.sql import SparkSession
-
- session = SparkSession.getActiveSession()
- if session is None:
- raise PySparkRuntimeError(errorClass="NO_ACTIVE_SESSION",
messageParameters=dict())
-
- max_rows = int(
- session.conf.get("spark.sql.pyspark.plotting.max_rows") # type:
ignore[arg-type]
- )
- pdf = sdf.limit(max_rows + 1).toPandas()
-
- self.partial = False
- if len(pdf) > max_rows:
- self.partial = True
- pdf = pdf.iloc[:max_rows]
-
- return pdf
-
-
-class PySparkSampledPlotBase:
- def get_sampled(self, sdf: "DataFrame") -> "pd.DataFrame":
- from pyspark.sql import SparkSession
-
- session = SparkSession.getActiveSession()
- if session is None:
- raise PySparkRuntimeError(errorClass="NO_ACTIVE_SESSION",
messageParameters=dict())
-
- sample_ratio =
session.conf.get("spark.sql.pyspark.plotting.sample_ratio")
- max_rows = int(
- session.conf.get("spark.sql.pyspark.plotting.max_rows") # type:
ignore[arg-type]
- )
-
- if sample_ratio is None:
- fraction = 1 / (sdf.count() / max_rows)
- fraction = min(1.0, fraction)
- else:
- fraction = float(sample_ratio)
-
- sampled_sdf = sdf.sample(fraction=fraction)
- pdf = sampled_sdf.toPandas()
-
- return pdf
-
-
-class PySparkPlotAccessor:
- plot_data_map = {
- "line": PySparkSampledPlotBase().get_sampled,
- }
- _backends = {} # type: ignore[var-annotated]
-
- def __init__(self, data: "DataFrame"):
- self.data = data
-
- def __call__(
- self, kind: str = "line", backend: Optional[str] = None, **kwargs: Any
- ) -> "Figure":
- plot_backend = PySparkPlotAccessor._get_plot_backend(backend)
-
- return plot_backend.plot_pyspark(self.data, kind=kind, **kwargs)
-
- @staticmethod
- def _get_plot_backend(backend: Optional[str] = None) -> ModuleType:
- backend = backend or "plotly"
-
- if backend in PySparkPlotAccessor._backends:
- return PySparkPlotAccessor._backends[backend]
-
- if backend == "plotly":
- require_minimum_plotly_version()
- else:
- raise PySparkValueError(
- errorClass="UNSUPPORTED_PLOT_BACKEND",
- messageParameters={"backend": backend, "supported_backends":
", ".join(["plotly"])},
- )
- from pyspark.sql.plot import plotly as module
-
- return module
-
- def line(self, x: str, y: Union[str, list[str]], **kwargs: Any) ->
"Figure":
- """
- Plot DataFrame as lines.
-
- Parameters
- ----------
- x : str
- Name of column to use for the horizontal axis.
- y : str or list of str
- Name(s) of the column(s) to use for the vertical axis. Multiple
columns can be plotted.
- **kwds : optional
- Additional keyword arguments.
-
- Returns
- -------
- :class:`plotly.graph_objs.Figure`
-
- Examples
- --------
- >>> data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
- >>> columns = ["category", "int_val", "float_val"]
- >>> df = spark.createDataFrame(data, columns)
- >>> df.plot.line(x="category", y="int_val") # doctest: +SKIP
- >>> df.plot.line(x="category", y=["int_val", "float_val"]) # doctest:
+SKIP
- """
- return self(kind="line", x=x, y=y, **kwargs)
diff --git a/python/pyspark/sql/plot/plotly.py
b/python/pyspark/sql/plot/plotly.py
deleted file mode 100644
index 5efc19476057..000000000000
--- a/python/pyspark/sql/plot/plotly.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from typing import TYPE_CHECKING, Any
-
-from pyspark.sql.plot import PySparkPlotAccessor
-
-if TYPE_CHECKING:
- from pyspark.sql import DataFrame
- from plotly.graph_objs import Figure
-
-
-def plot_pyspark(data: "DataFrame", kind: str, **kwargs: Any) -> "Figure":
- import plotly
-
- return plotly.plot(PySparkPlotAccessor.plot_data_map[kind](data), kind,
**kwargs)
diff --git a/python/pyspark/sql/tests/connect/test_parity_frame_plot.py
b/python/pyspark/sql/tests/connect/test_parity_frame_plot.py
deleted file mode 100644
index c69e438bf7eb..000000000000
--- a/python/pyspark/sql/tests/connect/test_parity_frame_plot.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.sql.tests.plot.test_frame_plot import DataFramePlotTestsMixin
-
-
-class FramePlotParityTests(DataFramePlotTestsMixin, ReusedConnectTestCase):
- pass
-
-
-if __name__ == "__main__":
- import unittest
- from pyspark.sql.tests.connect.test_parity_frame_plot import * # noqa:
F401
-
- try:
- import xmlrunner # type: ignore[import]
-
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
- except ImportError:
- testRunner = None
- unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_frame_plot_plotly.py
b/python/pyspark/sql/tests/connect/test_parity_frame_plot_plotly.py
deleted file mode 100644
index 78508fe53337..000000000000
--- a/python/pyspark/sql/tests/connect/test_parity_frame_plot_plotly.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.sql.tests.plot.test_frame_plot_plotly import
DataFramePlotPlotlyTestsMixin
-
-
-class FramePlotPlotlyParityTests(DataFramePlotPlotlyTestsMixin,
ReusedConnectTestCase):
- pass
-
-
-if __name__ == "__main__":
- import unittest
- from pyspark.sql.tests.connect.test_parity_frame_plot_plotly import * #
noqa: F401
-
- try:
- import xmlrunner # type: ignore[import]
-
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
- except ImportError:
- testRunner = None
- unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/plot/__init__.py
b/python/pyspark/sql/tests/plot/__init__.py
deleted file mode 100644
index cce3acad34a4..000000000000
--- a/python/pyspark/sql/tests/plot/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot.py
b/python/pyspark/sql/tests/plot/test_frame_plot.py
deleted file mode 100644
index 19ef53e46b2f..000000000000
--- a/python/pyspark/sql/tests/plot/test_frame_plot.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from pyspark.errors import PySparkValueError
-from pyspark.sql import Row
-from pyspark.sql.plot import PySparkSampledPlotBase, PySparkTopNPlotBase
-from pyspark.testing.sqlutils import ReusedSQLTestCase
-
-
-class DataFramePlotTestsMixin:
- def test_backend(self):
- accessor = self.spark.range(2).plot
- backend = accessor._get_plot_backend()
- self.assertEqual(backend.__name__, "pyspark.sql.plot.plotly")
-
- with self.assertRaises(PySparkValueError) as pe:
- accessor._get_plot_backend("matplotlib")
-
- self.check_error(
- exception=pe.exception,
- errorClass="UNSUPPORTED_PLOT_BACKEND",
- messageParameters={"backend": "matplotlib", "supported_backends":
"plotly"},
- )
-
- def test_topn_max_rows(self):
- try:
- self.spark.conf.set("spark.sql.pyspark.plotting.max_rows", "1000")
- sdf = self.spark.range(2500)
- pdf = PySparkTopNPlotBase().get_top_n(sdf)
- self.assertEqual(len(pdf), 1000)
- finally:
- self.spark.conf.unset("spark.sql.pyspark.plotting.max_rows")
-
- def test_sampled_plot_with_ratio(self):
- try:
- self.spark.conf.set("spark.sql.pyspark.plotting.sample_ratio",
"0.5")
- data = [Row(a=i, b=i + 1, c=i + 2, d=i + 3) for i in range(2500)]
- sdf = self.spark.createDataFrame(data)
- pdf = PySparkSampledPlotBase().get_sampled(sdf)
- self.assertEqual(round(len(pdf) / 2500, 1), 0.5)
- finally:
- self.spark.conf.unset("spark.sql.pyspark.plotting.sample_ratio")
-
- def test_sampled_plot_with_max_rows(self):
- data = [Row(a=i, b=i + 1, c=i + 2, d=i + 3) for i in range(2000)]
- sdf = self.spark.createDataFrame(data)
- pdf = PySparkSampledPlotBase().get_sampled(sdf)
- self.assertEqual(round(len(pdf) / 2000, 1), 0.5)
-
-
-class DataFramePlotTests(DataFramePlotTestsMixin, ReusedSQLTestCase):
- pass
-
-
-if __name__ == "__main__":
- import unittest
- from pyspark.sql.tests.plot.test_frame_plot import * # noqa: F401
-
- try:
- import xmlrunner
-
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
- except ImportError:
- testRunner = None
- unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
deleted file mode 100644
index 72a3ed267d19..000000000000
--- a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import unittest
-import pyspark.sql.plot # noqa: F401
-from pyspark.testing.sqlutils import ReusedSQLTestCase, have_plotly,
plotly_requirement_message
-
-
[email protected](not have_plotly, plotly_requirement_message)
-class DataFramePlotPlotlyTestsMixin:
- @property
- def sdf(self):
- data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
- columns = ["category", "int_val", "float_val"]
- return self.spark.createDataFrame(data, columns)
-
- def _check_fig_data(self, fig_data, expected_x, expected_y,
expected_name=""):
- self.assertEqual(fig_data["mode"], "lines")
- self.assertEqual(fig_data["type"], "scatter")
- self.assertEqual(fig_data["xaxis"], "x")
- self.assertEqual(list(fig_data["x"]), expected_x)
- self.assertEqual(fig_data["yaxis"], "y")
- self.assertEqual(list(fig_data["y"]), expected_y)
- self.assertEqual(fig_data["name"], expected_name)
-
- def test_line_plot(self):
- # single column as vertical axis
- fig = self.sdf.plot(kind="line", x="category", y="int_val")
- self._check_fig_data(fig["data"][0], ["A", "B", "C"], [10, 30, 20])
-
- # multiple columns as vertical axis
- fig = self.sdf.plot.line(x="category", y=["int_val", "float_val"])
- self._check_fig_data(fig["data"][0], ["A", "B", "C"], [10, 30, 20],
"int_val")
- self._check_fig_data(fig["data"][1], ["A", "B", "C"], [1.5, 2.5, 3.5],
"float_val")
-
-
-class DataFramePlotPlotlyTests(DataFramePlotPlotlyTestsMixin,
ReusedSQLTestCase):
- pass
-
-
-if __name__ == "__main__":
- from pyspark.sql.tests.plot.test_frame_plot_plotly import * # noqa: F401
-
- try:
- import xmlrunner
-
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
- except ImportError:
- testRunner = None
- unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 5d9ec92cbc83..11b91612419a 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -41,7 +41,6 @@ from pyspark.errors import ( # noqa: F401
PythonException,
UnknownException,
SparkUpgradeException,
- PySparkImportError,
PySparkNotImplementedError,
PySparkRuntimeError,
)
@@ -116,22 +115,6 @@ def require_test_compiled() -> None:
)
-def require_minimum_plotly_version() -> None:
- """Raise ImportError if plotly is not installed"""
- minimum_plotly_version = "4.8"
-
- try:
- import plotly # noqa: F401
- except ImportError as error:
- raise PySparkImportError(
- errorClass="PACKAGE_NOT_INSTALLED",
- messageParameters={
- "package_name": "plotly",
- "minimum_version": str(minimum_plotly_version),
- },
- ) from error
-
-
class ForeachBatchFunction:
"""
This is the Python implementation of Java interface
'ForeachBatchFunction'. This wraps
diff --git a/python/pyspark/testing/sqlutils.py
b/python/pyspark/testing/sqlutils.py
index 00ad40e68bd7..9f07c44c084c 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -48,13 +48,6 @@ try:
except Exception as e:
test_not_compiled_message = str(e)
-plotly_requirement_message = None
-try:
- import plotly
-except ImportError as e:
- plotly_requirement_message = str(e)
-have_plotly = plotly_requirement_message is None
-
from pyspark.sql import SparkSession
from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
from pyspark.testing.utils import ReusedPySparkTestCase, PySparkErrorTestUtils
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c3a42dfd62a0..094fb8f050bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3169,29 +3169,6 @@ object SQLConf {
.version("4.0.0")
.fallbackConf(Python.PYTHON_WORKER_FAULTHANLDER_ENABLED)
- val PYSPARK_PLOT_MAX_ROWS =
- buildConf("spark.sql.pyspark.plotting.max_rows")
- .doc(
- "The visual limit on top-n-based plots. If set to 1000, the first 1000
data points " +
- "will be used for plotting.")
- .version("4.0.0")
- .intConf
- .createWithDefault(1000)
-
- val PYSPARK_PLOT_SAMPLE_RATIO =
- buildConf("spark.sql.pyspark.plotting.sample_ratio")
- .doc(
- "The proportion of data that will be plotted for sample-based plots.
It is determined " +
- "based on spark.sql.pyspark.plotting.max_rows if not explicitly set."
- )
- .version("4.0.0")
- .doubleConf
- .checkValue(
- ratio => ratio >= 0.0 && ratio <= 1.0,
- "The value should be between 0.0 and 1.0 inclusive."
- )
- .createOptional
-
val ARROW_SPARKR_EXECUTION_ENABLED =
buildConf("spark.sql.execution.arrow.sparkr.enabled")
.doc("When true, make use of Apache Arrow for columnar data transfers in
SparkR. " +
@@ -5887,10 +5864,6 @@ class SQLConf extends Serializable with Logging with
SqlApiConf {
def pythonUDFWorkerFaulthandlerEnabled: Boolean =
getConf(PYTHON_UDF_WORKER_FAULTHANLDER_ENABLED)
- def pysparkPlotMaxRows: Int = getConf(PYSPARK_PLOT_MAX_ROWS)
-
- def pysparkPlotSampleRatio: Option[Double] =
getConf(PYSPARK_PLOT_SAMPLE_RATIO)
-
def arrowSparkREnabled: Boolean = getConf(ARROW_SPARKR_EXECUTION_ENABLED)
def arrowPySparkFallbackEnabled: Boolean =
getConf(ARROW_PYSPARK_FALLBACK_ENABLED)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]