This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 37a2416ca4c [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty 37a2416ca4c is described below commit 37a2416ca4c37eebeabfefc3be812594804f5ff5 Author: Ivan Sadikov <ivan.sadi...@databricks.com> AuthorDate: Wed May 25 11:39:54 2022 +0900 [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty ### What changes were proposed in this pull request? ### Why are the changes needed? This PR removes flaky `test_df_is_empty` as reported in https://issues.apache.org/jira/browse/SPARK-39252. I will open a follow-up PR to reintroduce the test and fix the flakiness (or see if it was a regression). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #36656 from sadikovi/SPARK-39252. Authored-by: Ivan Sadikov <ivan.sadi...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 9823bb385cd6dca7c4fb5a6315721420ad42f80a) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/test_dataframe.py | 36 ------------------------------ 1 file changed, 36 deletions(-) diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index fd54c25c705..be5e1d9a6e5 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -22,7 +22,6 @@ import shutil import tempfile import time import unittest -import uuid from typing import cast from pyspark.sql import SparkSession, Row @@ -1142,41 +1141,6 @@ class DataFrameTests(ReusedSQLTestCase): with self.assertRaisesRegex(TypeError, "Parameter 'truncate=foo'"): df.show(truncate="foo") - def test_df_is_empty(self): - # SPARK-39084: Fix df.rdd.isEmpty() resulting in JVM crash. - - # This particular example of DataFrame reproduces an issue in isEmpty call - # which could result in JVM crash. - data = [] - for t in range(0, 10000): - id = str(uuid.uuid4()) - if t == 0: - for i in range(0, 99): - data.append((id,)) - elif t < 10: - for i in range(0, 75): - data.append((id,)) - elif t < 100: - for i in range(0, 50): - data.append((id,)) - elif t < 1000: - for i in range(0, 25): - data.append((id,)) - else: - for i in range(0, 10): - data.append((id,)) - - tmpPath = tempfile.mkdtemp() - shutil.rmtree(tmpPath) - try: - df = self.spark.createDataFrame(data, ["col"]) - df.coalesce(1).write.parquet(tmpPath) - - res = self.spark.read.parquet(tmpPath).groupBy("col").count() - self.assertFalse(res.rdd.isEmpty()) - finally: - shutil.rmtree(tmpPath) - @unittest.skipIf( not have_pandas or not have_pyarrow, cast(str, pandas_requirement_message or pyarrow_requirement_message), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org