This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 37f1fae3514 [SPARK-41394][PYTHON][TESTS] Skip `MemoryProfilerTests`
when pandas is not installed
37f1fae3514 is described below
commit 37f1fae35149391db80c7b33b6716ab97e0b46a2
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Mon Dec 5 14:29:54 2022 -0800
[SPARK-41394][PYTHON][TESTS] Skip `MemoryProfilerTests` when pandas is not
installed
### What changes were proposed in this pull request?
This PR aims to skip `pandas`-related tests of `MemoryProfilerTests` when
`pandas` is not installed.
### Why are the changes needed?
For Apache Spark 3.4, to recover the module (like `pyspark-core`) tests
pass like before SPARK-40281 (#38584).
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Run individual test.
```
python/run-tests --testnames pyspark.tests.test_memory_profiler
--python-executables python3
```
Closes #38920 from dongjoon-hyun/SPARK-41394.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
python/pyspark/tests/test_memory_profiler.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/tests/test_memory_profiler.py
b/python/pyspark/tests/test_memory_profiler.py
index 3dc8ce4ce22..cdb75e5b6aa 100644
--- a/python/pyspark/tests/test_memory_profiler.py
+++ b/python/pyspark/tests/test_memory_profiler.py
@@ -24,16 +24,16 @@ from io import StringIO
from typing import Iterator
from unittest import mock
-import pandas as pd
-
from pyspark import SparkConf, SparkContext
from pyspark.profiler import has_memory_profiler
from pyspark.sql import SparkSession
from pyspark.sql.functions import pandas_udf, udf
+from pyspark.testing.sqlutils import have_pandas, pandas_requirement_message
from pyspark.testing.utils import PySparkTestCase
@unittest.skipIf(not has_memory_profiler, "Must have memory-profiler
installed.")
[email protected](not have_pandas, pandas_requirement_message) # type: ignore
class MemoryProfilerTests(PySparkTestCase):
def setUp(self):
self._old_sys_path = list(sys.path)
@@ -103,6 +103,8 @@ class MemoryProfilerTests(PySparkTestCase):
self.spark.range(10).select(plus_one("id")).collect()
def exec_pandas_udf_ser_to_ser(self):
+ import pandas as pd
+
@pandas_udf("int")
def ser_to_ser(ser: pd.Series) -> pd.Series:
return ser + 1
@@ -110,6 +112,8 @@ class MemoryProfilerTests(PySparkTestCase):
self.spark.range(10).select(ser_to_ser("id")).collect()
def exec_pandas_udf_ser_to_scalar(self):
+ import pandas as pd
+
@pandas_udf("int")
def ser_to_scalar(ser: pd.Series) -> float:
return ser.median()
@@ -118,6 +122,8 @@ class MemoryProfilerTests(PySparkTestCase):
# Unsupported
def exec_pandas_udf_iter_to_iter(self):
+ import pandas as pd
+
@pandas_udf("int")
def iter_to_iter(batch_ser: Iterator[pd.Series]) ->
Iterator[pd.Series]:
for ser in batch_ser:
@@ -126,6 +132,8 @@ class MemoryProfilerTests(PySparkTestCase):
self.spark.range(10).select(iter_to_iter("id")).collect()
def exec_grouped_map(self):
+ import pandas as pd
+
def grouped_map(pdf: pd.DataFrame) -> pd.DataFrame:
return pdf.assign(v=pdf.v - pdf.v.mean())
@@ -134,6 +142,8 @@ class MemoryProfilerTests(PySparkTestCase):
# Unsupported
def exec_map(self):
+ import pandas as pd
+
def map(pdfs: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
for pdf in pdfs:
yield pdf[pdf.id == 1]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]