This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 745ed93fe45 [SPARK-43423][PYTHON][ML][TESTS] Retry when `test_gmm`
fails
745ed93fe45 is described below
commit 745ed93fe451b3f9e8148b06356c28b889a4db5a
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue May 9 20:16:39 2023 +0900
[SPARK-43423][PYTHON][ML][TESTS] Retry when `test_gmm` fails
### What changes were proposed in this pull request?
`test_gmm` is a bit flaky, I can see it fails about 1~3 times per week, for
example, https://github.com/apache/spark/actions/runs/4921792416/jobs/8791985336
this PR is to retry it if it fails
### Why are the changes needed?
to make CI more stable
### Does this PR introduce _any_ user-facing change?
no, dev-only
### How was this patch tested?
updated tests
Closes #41101 from zhengruifeng/py_gmm_test.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/mllib/tests/test_algorithms.py | 31 ++++++++++++++++-----------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/python/pyspark/mllib/tests/test_algorithms.py
b/python/pyspark/mllib/tests/test_algorithms.py
index 6a9be99ecdf..dc48c2c021d 100644
--- a/python/pyspark/mllib/tests/test_algorithms.py
+++ b/python/pyspark/mllib/tests/test_algorithms.py
@@ -28,6 +28,7 @@ from pyspark.mllib.recommendation import Rating
from pyspark.mllib.regression import LabeledPoint
from pyspark.serializers import CPickleSerializer
from pyspark.testing.mllibutils import MLlibTestCase
+from pyspark.testing.utils import eventually
class ListTests(MLlibTestCase):
@@ -99,18 +100,24 @@ class ListTests(MLlibTestCase):
def test_gmm(self):
from pyspark.mllib.clustering import GaussianMixture
- data = self.sc.parallelize(
- [
- [1, 2],
- [8, 9],
- [-4, -3],
- [-6, -7],
- ]
- )
- clusters = GaussianMixture.train(data, 2, convergenceTol=0.001,
maxIterations=10, seed=1)
- labels = clusters.predict(data).collect()
- self.assertEqual(labels[0], labels[1])
- self.assertEqual(labels[2], labels[3])
+ def condition():
+ data = self.sc.parallelize(
+ [
+ [1, 2],
+ [8, 9],
+ [-4, -3],
+ [-6, -7],
+ ]
+ )
+ clusters = GaussianMixture.train(
+ data, 2, convergenceTol=0.001, maxIterations=10, seed=1
+ )
+ labels = clusters.predict(data).collect()
+ self.assertEqual(labels[0], labels[1])
+ self.assertEqual(labels[2], labels[3])
+ return True
+
+ eventually(condition, timeout=60, catch_assertions=True)
def test_gmm_deterministic(self):
from pyspark.mllib.clustering import GaussianMixture
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]