This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 98fef6ea5855 [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize
`OpsOnDiffFramesEnabledTests`: Factor out `test_assignment_*`
98fef6ea5855 is described below
commit 98fef6ea5855580b46d41e269e0ddcd9a2c8bbe8
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sat Dec 23 15:13:26 2023 -0800
[SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize
`OpsOnDiffFramesEnabledTests`: Factor out `test_assignment_*`
### What changes were proposed in this pull request?
Factor out `test_assignment_*`
### Why are the changes needed?
for testing parallelism
### Does this PR introduce _any_ user-facing change?
no, test-only
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #44461 from zhengruifeng/ps_test_diff_ops_1.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
dev/sparktestsupport/modules.py | 4 +
.../diff_frames_ops/test_parity_assign_frame.py | 41 ++++
.../diff_frames_ops/test_parity_assign_series.py | 41 ++++
.../tests/diff_frames_ops/test_assign_frame.py | 243 +++++++++++++++++++++
.../tests/diff_frames_ops/test_assign_series.py | 241 ++++++++++++++++++++
.../pandas/tests/test_ops_on_diff_frames.py | 174 ---------------
6 files changed, 570 insertions(+), 174 deletions(-)
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 47db204e2fa1..33e7dd3af97a 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -870,6 +870,8 @@ pyspark_pandas_slow = Module(
"pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain",
"pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext",
"pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext_float",
+ "pyspark.pandas.tests.diff_frames_ops.test_assign_frame",
+ "pyspark.pandas.tests.diff_frames_ops.test_assign_series",
"pyspark.pandas.tests.diff_frames_ops.test_basic_slow",
"pyspark.pandas.tests.diff_frames_ops.test_cov",
"pyspark.pandas.tests.diff_frames_ops.test_corrwith",
@@ -1235,6 +1237,8 @@ pyspark_pandas_connect_part3 = Module(
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext_float",
+
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame",
+
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_aggregate",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_apply",
diff --git
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_frame.py
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_frame.py
new file mode 100644
index 000000000000..82ce5a2e15bb
--- /dev/null
+++
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_frame.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_assign_frame import
AssignFrameMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class AssignFrameParityTests(
+ AssignFrameMixin,
+ PandasOnSparkTestUtils,
+ ReusedConnectTestCase,
+):
+ pass
+
+
+if __name__ == "__main__":
+ from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame
import * # noqa
+
+ try:
+ import xmlrunner # type: ignore[import]
+
+ testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
+ except ImportError:
+ testRunner = None
+ unittest.main(testRunner=testRunner, verbosity=2)
diff --git
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_series.py
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_series.py
new file mode 100644
index 000000000000..24a1e9b966cf
--- /dev/null
+++
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_series.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_assign_series import
AssignSeriesMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class AssignSeriesParityTests(
+ AssignSeriesMixin,
+ PandasOnSparkTestUtils,
+ ReusedConnectTestCase,
+):
+ pass
+
+
+if __name__ == "__main__":
+ from
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series import *
# noqa
+
+ try:
+ import xmlrunner # type: ignore[import]
+
+ testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
+ except ImportError:
+ testRunner = None
+ unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_assign_frame.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_frame.py
new file mode 100644
index 000000000000..e6f2e78d7499
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_frame.py
@@ -0,0 +1,243 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class AssignFrameMixin:
+ @property
+ def pdf1(self):
+ return pd.DataFrame(
+ {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0,
0]},
+ index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+ )
+
+ @property
+ def pdf2(self):
+ return pd.DataFrame(
+ {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2,
3]},
+ index=list(range(9)),
+ )
+
+ @property
+ def pdf3(self):
+ return pd.DataFrame(
+ {"b": [1, 1, 1, 1, 1, 1, 1, 1, 1], "c": [1, 1, 1, 1, 1, 1, 1, 1,
1]},
+ index=list(range(9)),
+ )
+
+ @property
+ def pdf4(self):
+ return pd.DataFrame(
+ {"e": [2, 2, 2, 2, 2, 2, 2, 2, 2], "f": [2, 2, 2, 2, 2, 2, 2, 2,
2]},
+ index=list(range(9)),
+ )
+
+ @property
+ def pdf5(self):
+ return pd.DataFrame(
+ {
+ "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+ "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ },
+ index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+ ).set_index(["a", "b"])
+
+ @property
+ def pdf6(self):
+ return pd.DataFrame(
+ {
+ "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+ "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
+ "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+ "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ },
+ index=list(range(9)),
+ ).set_index(["a", "b"])
+
+ @property
+ def pser1(self):
+ midx = pd.MultiIndex(
+ [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length",
"power"]],
+ [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+ )
+ return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
+
+ @property
+ def pser2(self):
+ midx = pd.MultiIndex(
+ [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+ [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+ )
+ return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3],
index=midx)
+
+ @property
+ def pser3(self):
+ midx = pd.MultiIndex(
+ [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
+ [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
+ )
+ return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
+
+ @property
+ def psdf1(self):
+ return ps.from_pandas(self.pdf1)
+
+ @property
+ def psdf2(self):
+ return ps.from_pandas(self.pdf2)
+
+ @property
+ def psdf3(self):
+ return ps.from_pandas(self.pdf3)
+
+ @property
+ def psdf4(self):
+ return ps.from_pandas(self.pdf4)
+
+ @property
+ def psdf5(self):
+ return ps.from_pandas(self.pdf5)
+
+ @property
+ def psdf6(self):
+ return ps.from_pandas(self.pdf6)
+
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ set_option("compute.ops_on_diff_frames", True)
+
+ @classmethod
+ def tearDownClass(cls):
+ reset_option("compute.ops_on_diff_frames")
+ super().tearDownClass()
+
+ def test_assignment_frame(self):
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psser = psdf.a
+ pser = pdf.a
+ psdf[["a", "b"]] = self.psdf1
+ pdf[["a", "b"]] = self.pdf1
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+ self.assert_eq(psser, pser)
+
+ # 'c' does not exist in `psdf`.
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psser = psdf.a
+ pser = pdf.a
+ psdf[["b", "c"]] = self.psdf1
+ pdf[["b", "c"]] = self.pdf1
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+ self.assert_eq(psser, pser)
+
+ # 'c' and 'd' do not exist in `psdf`.
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psdf[["c", "d"]] = self.psdf1
+ pdf[["c", "d"]] = self.pdf1
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ # Multi-index columns
+ columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psdf.columns = columns
+ pdf.columns = columns
+ psdf[[("y", "c"), ("z", "d")]] = self.psdf1
+ pdf[[("y", "c"), ("z", "d")]] = self.pdf1
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psdf1 = ps.from_pandas(self.pdf1)
+ pdf1 = self.pdf1
+ psdf1.columns = columns
+ pdf1.columns = columns
+ psdf[["c", "d"]] = psdf1
+ pdf[["c", "d"]] = pdf1
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ def test_assignment_frame_chain(self):
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psdf[["a", "b"]] = self.psdf1
+ pdf[["a", "b"]] = self.pdf1
+
+ psdf[["e", "f"]] = self.psdf3
+ pdf[["e", "f"]] = self.pdf3
+
+ psdf[["b", "c"]] = self.psdf2
+ pdf[["b", "c"]] = self.pdf2
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ def test_multi_index_assignment_frame(self):
+ psdf = ps.from_pandas(self.pdf5)
+ pdf = self.pdf5
+ psdf[["c"]] = self.psdf5
+ pdf[["c"]] = self.pdf5
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ psdf = ps.from_pandas(self.pdf5)
+ pdf = self.pdf5
+ psdf[["x"]] = self.psdf5
+ pdf[["x"]] = self.pdf5
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ psdf = ps.from_pandas(self.pdf6)
+ pdf = self.pdf6
+ psdf[["x", "y"]] = self.psdf6
+ pdf[["x", "y"]] = self.pdf6
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+
+class AssignFrameTests(
+ AssignFrameMixin,
+ PandasOnSparkTestCase,
+ SQLTestUtils,
+):
+ pass
+
+
+if __name__ == "__main__":
+ import unittest
+ from pyspark.pandas.tests.diff_frames_ops.test_assign_frame import * #
noqa: F401
+
+ try:
+ import xmlrunner
+
+ testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
+ except ImportError:
+ testRunner = None
+ unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_assign_series.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_series.py
new file mode 100644
index 000000000000..338214c99e12
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_series.py
@@ -0,0 +1,241 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class AssignSeriesMixin:
+ @property
+ def pdf1(self):
+ return pd.DataFrame(
+ {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0,
0]},
+ index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+ )
+
+ @property
+ def pdf2(self):
+ return pd.DataFrame(
+ {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2,
3]},
+ index=list(range(9)),
+ )
+
+ @property
+ def pdf3(self):
+ return pd.DataFrame(
+ {"b": [1, 1, 1, 1, 1, 1, 1, 1, 1], "c": [1, 1, 1, 1, 1, 1, 1, 1,
1]},
+ index=list(range(9)),
+ )
+
+ @property
+ def pdf4(self):
+ return pd.DataFrame(
+ {"e": [2, 2, 2, 2, 2, 2, 2, 2, 2], "f": [2, 2, 2, 2, 2, 2, 2, 2,
2]},
+ index=list(range(9)),
+ )
+
+ @property
+ def pdf5(self):
+ return pd.DataFrame(
+ {
+ "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+ "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ },
+ index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+ ).set_index(["a", "b"])
+
+ @property
+ def pdf6(self):
+ return pd.DataFrame(
+ {
+ "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+ "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
+ "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+ "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ },
+ index=list(range(9)),
+ ).set_index(["a", "b"])
+
+ @property
+ def pser1(self):
+ midx = pd.MultiIndex(
+ [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length",
"power"]],
+ [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+ )
+ return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
+
+ @property
+ def pser2(self):
+ midx = pd.MultiIndex(
+ [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+ [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+ )
+ return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3],
index=midx)
+
+ @property
+ def pser3(self):
+ midx = pd.MultiIndex(
+ [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
+ [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
+ )
+ return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
+
+ @property
+ def psdf1(self):
+ return ps.from_pandas(self.pdf1)
+
+ @property
+ def psdf2(self):
+ return ps.from_pandas(self.pdf2)
+
+ @property
+ def psdf3(self):
+ return ps.from_pandas(self.pdf3)
+
+ @property
+ def psdf4(self):
+ return ps.from_pandas(self.pdf4)
+
+ @property
+ def psdf5(self):
+ return ps.from_pandas(self.pdf5)
+
+ @property
+ def psdf6(self):
+ return ps.from_pandas(self.pdf6)
+
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ set_option("compute.ops_on_diff_frames", True)
+
+ @classmethod
+ def tearDownClass(cls):
+ reset_option("compute.ops_on_diff_frames")
+ super().tearDownClass()
+
+ def test_assignment_series(self):
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psser = psdf.a
+ pser = pdf.a
+ psdf["a"] = self.psdf2.a
+ pdf["a"] = self.pdf2.a
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+ self.assert_eq(psser, pser)
+
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psser = psdf.a
+ pser = pdf.a
+ psdf["a"] = self.psdf2.b
+ pdf["a"] = self.pdf2.b
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+ self.assert_eq(psser, pser)
+
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psdf["c"] = self.psdf2.a
+ pdf["c"] = self.pdf2.a
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ # Multi-index columns
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
+ psdf.columns = columns
+ pdf.columns = columns
+ psdf[("y", "c")] = self.psdf2.a
+ pdf[("y", "c")] = self.pdf2.a
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ pdf = pd.DataFrame({"a": [1, 2, 3], "Koalas": [0, 1,
2]}).set_index("Koalas", drop=False)
+ psdf = ps.from_pandas(pdf)
+
+ psdf.index.name = None
+ psdf["NEW"] = ps.Series([100, 200, 300])
+
+ pdf.index.name = None
+ pdf["NEW"] = pd.Series([100, 200, 300])
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ def test_assignment_series_chain(self):
+ psdf = ps.from_pandas(self.pdf1)
+ pdf = self.pdf1
+ psdf["a"] = self.psdf1.a
+ pdf["a"] = self.pdf1.a
+
+ psdf["a"] = self.psdf2.b
+ pdf["a"] = self.pdf2.b
+
+ psdf["d"] = self.psdf3.c
+ pdf["d"] = self.pdf3.c
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ def test_multi_index_assignment_series(self):
+ psdf = ps.from_pandas(self.pdf5)
+ pdf = self.pdf5
+ psdf["x"] = self.psdf6.e
+ pdf["x"] = self.pdf6.e
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ psdf = ps.from_pandas(self.pdf5)
+ pdf = self.pdf5
+ psdf["e"] = self.psdf6.e
+ pdf["e"] = self.pdf6.e
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+ psdf = ps.from_pandas(self.pdf5)
+ pdf = self.pdf5
+ psdf["c"] = self.psdf6.e
+ pdf["c"] = self.pdf6.e
+
+ self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+
+class AssignSeriesTests(
+ AssignSeriesMixin,
+ PandasOnSparkTestCase,
+ SQLTestUtils,
+):
+ pass
+
+
+if __name__ == "__main__":
+ import unittest
+ from pyspark.pandas.tests.diff_frames_ops.test_assign_series import * #
noqa: F401
+
+ try:
+ import xmlrunner
+
+ testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
+ except ImportError:
+ testRunner = None
+ unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
index 016908f0a9d4..505e96e68752 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
@@ -559,136 +559,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
self.assert_eq((psdf1 + psdf4).sort_index(), (pdf1 +
pdf4).sort_index(), almost=True)
- def test_assignment_series(self):
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psser = psdf.a
- pser = pdf.a
- psdf["a"] = self.psdf2.a
- pdf["a"] = self.pdf2.a
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
- self.assert_eq(psser, pser)
-
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psser = psdf.a
- pser = pdf.a
- psdf["a"] = self.psdf2.b
- pdf["a"] = self.pdf2.b
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
- self.assert_eq(psser, pser)
-
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psdf["c"] = self.psdf2.a
- pdf["c"] = self.pdf2.a
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- # Multi-index columns
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
- psdf.columns = columns
- pdf.columns = columns
- psdf[("y", "c")] = self.psdf2.a
- pdf[("y", "c")] = self.pdf2.a
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- pdf = pd.DataFrame({"a": [1, 2, 3], "Koalas": [0, 1,
2]}).set_index("Koalas", drop=False)
- psdf = ps.from_pandas(pdf)
-
- psdf.index.name = None
- psdf["NEW"] = ps.Series([100, 200, 300])
-
- pdf.index.name = None
- pdf["NEW"] = pd.Series([100, 200, 300])
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- def test_assignment_frame(self):
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psser = psdf.a
- pser = pdf.a
- psdf[["a", "b"]] = self.psdf1
- pdf[["a", "b"]] = self.pdf1
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
- self.assert_eq(psser, pser)
-
- # 'c' does not exist in `psdf`.
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psser = psdf.a
- pser = pdf.a
- psdf[["b", "c"]] = self.psdf1
- pdf[["b", "c"]] = self.pdf1
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
- self.assert_eq(psser, pser)
-
- # 'c' and 'd' do not exist in `psdf`.
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psdf[["c", "d"]] = self.psdf1
- pdf[["c", "d"]] = self.pdf1
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- # Multi-index columns
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psdf.columns = columns
- pdf.columns = columns
- psdf[[("y", "c"), ("z", "d")]] = self.psdf1
- pdf[[("y", "c"), ("z", "d")]] = self.pdf1
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psdf1 = ps.from_pandas(self.pdf1)
- pdf1 = self.pdf1
- psdf1.columns = columns
- pdf1.columns = columns
- psdf[["c", "d"]] = psdf1
- pdf[["c", "d"]] = pdf1
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- def test_assignment_series_chain(self):
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psdf["a"] = self.psdf1.a
- pdf["a"] = self.pdf1.a
-
- psdf["a"] = self.psdf2.b
- pdf["a"] = self.pdf2.b
-
- psdf["d"] = self.psdf3.c
- pdf["d"] = self.pdf3.c
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- def test_assignment_frame_chain(self):
- psdf = ps.from_pandas(self.pdf1)
- pdf = self.pdf1
- psdf[["a", "b"]] = self.psdf1
- pdf[["a", "b"]] = self.pdf1
-
- psdf[["e", "f"]] = self.psdf3
- pdf[["e", "f"]] = self.pdf3
-
- psdf[["b", "c"]] = self.psdf2
- pdf[["b", "c"]] = self.pdf2
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
def test_multi_index_arithmetic(self):
psdf5 = self.psdf5
psdf6 = self.psdf6
@@ -703,50 +573,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
# DataFrame
self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 +
pdf6).sort_index(), almost=True)
- def test_multi_index_assignment_series(self):
- psdf = ps.from_pandas(self.pdf5)
- pdf = self.pdf5
- psdf["x"] = self.psdf6.e
- pdf["x"] = self.pdf6.e
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- psdf = ps.from_pandas(self.pdf5)
- pdf = self.pdf5
- psdf["e"] = self.psdf6.e
- pdf["e"] = self.pdf6.e
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- psdf = ps.from_pandas(self.pdf5)
- pdf = self.pdf5
- psdf["c"] = self.psdf6.e
- pdf["c"] = self.pdf6.e
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- def test_multi_index_assignment_frame(self):
- psdf = ps.from_pandas(self.pdf5)
- pdf = self.pdf5
- psdf[["c"]] = self.psdf5
- pdf[["c"]] = self.pdf5
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- psdf = ps.from_pandas(self.pdf5)
- pdf = self.pdf5
- psdf[["x"]] = self.psdf5
- pdf[["x"]] = self.pdf5
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
- psdf = ps.from_pandas(self.pdf6)
- pdf = self.pdf6
- psdf[["x", "y"]] = self.psdf6
- pdf[["x", "y"]] = self.pdf6
-
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
class OpsOnDiffFramesEnabledTests(
OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestCase, SQLTestUtils
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]