itholic commented on code in PR #37845:
URL: https://github.com/apache/spark/pull/37845#discussion_r969070379
##########
python/pyspark/pandas/tests/test_stats.py:
##########
@@ -257,6 +257,32 @@ def test_skew_kurt_numerical_stability(self):
self.assert_eq(psdf.skew(), pdf.skew(), almost=True)
self.assert_eq(psdf.kurt(), pdf.kurt(), almost=True)
+ def test_dataframe_corr(self):
+ pdf = makeMissingDataframe(0.3, 42)
+ psdf = ps.from_pandas(pdf)
+
+ with self.assertRaisesRegex(ValueError, "Invalid method"):
+ psdf.corr("std")
+ with self.assertRaisesRegex(NotImplementedError, "kendall for now"):
+ psdf.corr("kendall")
+ with self.assertRaisesRegex(TypeError, "Invalid min_periods type"):
+ psdf.corr(min_periods="3")
+ with self.assertRaisesRegex(NotImplementedError, "spearman for now"):
+ psdf.corr(method="spearman", min_periods=3)
+
+ self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)
+ self.assert_eq(psdf.corr(min_periods=1), pdf.corr(min_periods=1),
check_exact=False)
+ self.assert_eq(psdf.corr(min_periods=3), pdf.corr(min_periods=3),
check_exact=False)
+
+ # multi-index columns
+ columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y",
"C"), ("Z", "D")])
+ pdf.columns = columns
+ psdf.columns = columns
+
+ self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)
+ self.assert_eq(psdf.corr(min_periods=1), pdf.corr(min_periods=1),
check_exact=False)
+ self.assert_eq(psdf.corr(min_periods=3), pdf.corr(min_periods=3),
check_exact=False)
+
def test_corr(self):
Review Comment:
Can we comment this at the top of `test_dataframe_corr` so as not to forget ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]