yeandy commented on a change in pull request #17043:
URL: https://github.com/apache/beam/pull/17043#discussion_r837878826
##########
File path: sdks/python/apache_beam/dataframe/frames_test.py
##########
@@ -1295,6 +1295,114 @@ def s_times_shuffled(times, s):
self._run_test(lambda s: s.pipe(s_times, 2), s)
self._run_test(lambda s: s.pipe((s_times_shuffled, 's'), 2), s)
+ def test_pivot_non_categorical(self):
+ df = pd.DataFrame({
+ 'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+ 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+ 'baz': [1, 2, 3, 4, 5, 6],
+ 'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+ })
+ with self.assertRaisesRegex(
+ frame_base.WontImplementError,
+ r"pivot\(\) of non-categorical type is not supported"):
+ self._run_test(
+ lambda df: df.pivot(index='foo', columns='bar', values='baz'), df)
+
+ def test_pivot_pandas_example1(self):
+ # Simple test 1
+ df = pd.DataFrame({
+ 'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+ 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+ 'baz': [1, 2, 3, 4, 5, 6],
+ 'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+ })
+ df['bar'] = df['bar'].astype(
+ pd.CategoricalDtype(categories=['A', 'B', 'C']))
+ self._run_test(
+ lambda df: df.pivot(index='foo', columns='bar', values='baz'), df)
+
+ def test_pivot_pandas_example3(self):
+ # Multiple values
+ df = pd.DataFrame({
+ 'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+ 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+ 'baz': [1, 2, 3, 4, 5, 6],
+ 'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+ })
+ df['bar'] = df['bar'].astype(
+ pd.CategoricalDtype(categories=['A', 'B', 'C']))
+ self._run_test(
+ lambda df: df.pivot(index='foo', columns='bar', values=['baz', 'zoo']),
+ df)
+
+ def test_pivot_pandas_example4(self):
+ # Multiple columns
+ df = pd.DataFrame({
+ "lev1": [1, 1, 1, 2, 2, 2],
+ "lev2": [1, 1, 2, 1, 1, 2],
+ "lev3": [1, 2, 1, 2, 1, 2],
+ "lev4": [1, 2, 3, 4, 5, 6],
+ "values": [0, 1, 2, 3, 4, 5]
+ })
+ df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['values'] = df['values'].astype('Int64')
+ self._run_test(
+ lambda df: df.pivot(
+ index="lev1", columns=["lev2", "lev3"], values="values"),
+ df)
+
+ @unittest.skipIf(
+ PD_VERSION < (1, 4), "Bug in DF.pivot with MultiIndex for pandas < 1.4")
Review comment:
`NotImplementedError: initializing a Series from a MultiIndex is not
supported`.
Fixed [here](https://github.com/pandas-dev/pandas/pull/45141/files).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]