This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 89a7a5b2981 [SPARK-38854][PYTHON][TEST] Improve the test coverage for
pyspark/statcounter.py
89a7a5b2981 is described below
commit 89a7a5b29815d9547e1d652d97ea07a9b5e9fecf
Author: pralabhkumar <[email protected]>
AuthorDate: Tue Apr 12 20:41:11 2022 +0900
[SPARK-38854][PYTHON][TEST] Improve the test coverage for
pyspark/statcounter.py
### What changes were proposed in this pull request?
This PR adds a test for the case to check merge_stats when
- One StatCounter size largely greater than the other .
- NA, None is passed to StatCounter
### Why are the changes needed?
To cover corner test cases and increase coverage
### Does this PR introduce _any_ user-facing change?
No - test only
### How was this patch tested?
CI in this PR should test it out
Closes #36145 from pralabhkumar/rk_increase_coverage_statcounter.
Authored-by: pralabhkumar <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/tests/test_statcounter.py | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/python/pyspark/tests/test_statcounter.py
b/python/pyspark/tests/test_statcounter.py
index 9651871e113..b10fe7cd911 100644
--- a/python/pyspark/tests/test_statcounter.py
+++ b/python/pyspark/tests/test_statcounter.py
@@ -16,6 +16,7 @@
#
from pyspark.statcounter import StatCounter
from pyspark.testing.utils import ReusedPySparkTestCase
+import math
class StatCounterTests(ReusedPySparkTestCase):
@@ -76,6 +77,31 @@ class StatCounterTests(ReusedPySparkTestCase):
self.assertEqual(stats.sum(), 20.0)
self.assertAlmostEqual(stats.variance(), 1.25)
self.assertAlmostEqual(stats.sampleVariance(), 1.4285714285714286)
+ execution_statements = [
+ StatCounter([1.0, 2.0]).mergeStats(StatCounter(range(1, 301))),
+ StatCounter(range(1, 301)).mergeStats(StatCounter([1.0, 2.0])),
+ ]
+ for stats in execution_statements:
+ self.assertEqual(stats.count(), 302)
+ self.assertEqual(stats.max(), 300.0)
+ self.assertEqual(stats.min(), 1.0)
+ self.assertAlmostEqual(stats.mean(), 149.51324503311)
+ self.assertAlmostEqual(stats.variance(), 7596.302804701549)
+ self.assertAlmostEqual(stats.sampleVariance(), 7621.539691095905)
+
+ def test_variance_when_size_zero(self):
+ # SPARK-38854: Test case to improve test coverage when
+ # StatCounter argument is empty list or None
+ arguments = [[], None]
+
+ for arg in arguments:
+ stats = StatCounter(arg)
+ self.assertTrue(math.isnan(stats.variance()))
+ self.assertTrue(math.isnan(stats.sampleVariance()))
+ self.assertEqual(stats.count(), 0)
+ self.assertTrue(math.isinf(stats.max()))
+ self.assertTrue(math.isinf(stats.min()))
+ self.assertEqual(stats.mean(), 0.0)
def test_merge_stats_with_self(self):
stats = StatCounter([1.0, 2.0, 3.0, 4.0])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]