This is an automated email from the ASF dual-hosted git repository. ueshin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c91ae54 [SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3 c91ae54 is described below commit c91ae544fdd44c67fe1e4c73825570dbe71a3206 Author: itholic <haejoon....@databricks.com> AuthorDate: Wed Aug 18 11:17:01 2021 -0700 [SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3 ### What changes were proposed in this pull request? This PR is followup for https://github.com/apache/spark/pull/33646 to add missing tests. ### Why are the changes needed? Some tests are missing ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unittest Closes #33776 from itholic/SPARK-36388-followup. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Takuya UESHIN <ues...@databricks.com> --- .../pandas/tests/test_ops_on_diff_frames_groupby_expanding.py | 9 ++++++--- .../pandas/tests/test_ops_on_diff_frames_groupby_rolling.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py index 223adea..634cbd7 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py @@ -52,14 +52,17 @@ class OpsOnDiffFramesGroupByExpandingTest(PandasOnSparkTestCase, TestUtils): psdf = ps.from_pandas(pdf) kkey = ps.from_pandas(pkey) + # The behavior of GroupBy.expanding is changed from pandas 1.3. if LooseVersion(pd.__version__) >= LooseVersion("1.3"): - # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3 - pass - else: self.assert_eq( getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(), getattr(pdf.groupby(pkey).expanding(2), f)().sort_index(), ) + else: + self.assert_eq( + getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(), + getattr(pdf.groupby(pkey).expanding(2), f)().drop("a", axis=1).sort_index(), + ) self.assert_eq( getattr(psdf.groupby(kkey)["b"].expanding(2), f)().sort_index(), diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py index 4f97769..04ea448 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py @@ -50,14 +50,17 @@ class OpsOnDiffFramesGroupByRollingTest(PandasOnSparkTestCase, TestUtils): psdf = ps.from_pandas(pdf) kkey = ps.from_pandas(pkey) + # The behavior of GroupBy.rolling is changed from pandas 1.3. if LooseVersion(pd.__version__) >= LooseVersion("1.3"): - # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3 - pass - else: self.assert_eq( getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(), getattr(pdf.groupby(pkey).rolling(2), f)().sort_index(), ) + else: + self.assert_eq( + getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(), + getattr(pdf.groupby(pkey).rolling(2), f)().drop("a", axis=1).sort_index(), + ) self.assert_eq( getattr(psdf.groupby(kkey)["b"].rolling(2), f)().sort_index(), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org