[
https://issues.apache.org/jira/browse/BEAM-13966?focusedWorklogId=752516&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-752516
]
ASF GitHub Bot logged work on BEAM-13966:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 04/Apr/22 21:30
Start Date: 04/Apr/22 21:30
Worklog Time Spent: 10m
Work Description: TheNeuralBit commented on code in PR #17043:
URL: https://github.com/apache/beam/pull/17043#discussion_r842156113
##########
sdks/python/apache_beam/dataframe/frames_test.py:
##########
@@ -1431,6 +1433,165 @@ def
test_unstack_series_multiple_index_and_column_levels(self):
self._run_test(lambda df: df.unstack(level=['second', 'third']), df)
self._run_test(lambda df: df.unstack(level=['second']), df)
+ def test_pivot_non_categorical(self):
+ df = pd.DataFrame({
+ 'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+ 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+ 'baz': [1, 2, 3, 4, 5, 6],
+ 'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+ })
+ with self.assertRaisesRegex(
+ frame_base.WontImplementError,
+ r"pivot\(\) of non-categorical type is not supported"):
+ self._run_test(
+ lambda df: df.pivot(index='foo', columns='bar', values='baz'), df)
+
+ def test_pivot_pandas_example1(self):
+ # Simple test 1
+ df = pd.DataFrame({
+ 'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+ 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+ 'baz': [1, 2, 3, 4, 5, 6],
+ 'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+ })
+ df['bar'] = df['bar'].astype(
+ pd.CategoricalDtype(categories=['A', 'B', 'C']))
+ self._run_test(
+ lambda df: df.pivot(index='foo', columns='bar', values='baz'), df)
+ self._run_test(
+ lambda df: df.pivot(index=['foo'], columns='bar', values='baz'), df)
+
+ def test_pivot_pandas_example3(self):
+ # Multiple values
+ df = pd.DataFrame({
+ 'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+ 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+ 'baz': [1, 2, 3, 4, 5, 6],
+ 'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+ })
+ df['bar'] = df['bar'].astype(
+ pd.CategoricalDtype(categories=['A', 'B', 'C']))
+ self._run_test(
+ lambda df: df.pivot(index='foo', columns='bar', values=['baz', 'zoo']),
+ df)
+ self._run_test(
+ lambda df: df.pivot(
+ index='foo', columns=['bar'], values=['baz', 'zoo']),
+ df)
+
+ def test_pivot_pandas_example4(self):
+ # Multiple columns
+ df = pd.DataFrame({
+ "lev1": [1, 1, 1, 2, 2, 2],
+ "lev2": [1, 1, 2, 1, 1, 2],
+ "lev3": [1, 2, 1, 2, 1, 2],
+ "lev4": [1, 2, 3, 4, 5, 6],
+ "values": [0, 1, 2, 3, 4, 5]
+ })
+ df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['values'] = df['values'].astype('Int64')
+ self._run_test(
+ lambda df: df.pivot(
+ index="lev1", columns=["lev2", "lev3"], values="values"),
+ df)
+
+ def test_pivot_pandas_example5(self):
+ # Multiple index
+ df = pd.DataFrame({
+ "lev1": [1, 1, 1, 2, 2, 2],
+ "lev2": [1, 1, 2, 1, 1, 2],
+ "lev3": [1, 2, 1, 2, 1, 2],
+ "lev4": [1, 2, 3, 4, 5, 6],
+ "values": [0, 1, 2, 3, 4, 5]
+ })
+ df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ # Cast to nullable Int64 because Beam doesn't do the correct conversion to
+ # float64
+ df['values'] = df['values'].astype('Int64')
+ if PD_VERSION < (1, 4):
+ with self.assertRaisesRegex(
+ frame_base.WontImplementError,
+ r"pivot\(\) is not supported when pandas<1.4 and index is a Multi"):
+ self._run_test(
+ lambda df: df.pivot(
+ index=["lev1", "lev2"], columns=["lev3"], values="values"),
+ df)
+ else:
+ self._run_test(
+ lambda df: df.pivot(
+ index=["lev1", "lev2"], columns=["lev3"], values="values"),
+ df)
+
+ def test_pivot_pandas_example6(self):
+ # Value error when there are duplicates
+ df = pd.DataFrame({
+ "foo": ['one', 'one', 'two', 'two'],
+ "bar": ['A', 'A', 'B', 'C'],
+ "baz": [1, 2, 3, 4]
+ })
+ df['bar'] = df['bar'].astype(
+ pd.CategoricalDtype(categories=['A', 'B', 'C']))
+ self._run_error_test(
+ lambda df: df.pivot(index='foo', columns='bar', values='baz'),
+ df,
+ construction_time=False)
+
+ def test_pivot_no_index_provided_on_single_level_index(self):
+ # Multiple columns, no index value provided
+ df = pd.DataFrame({
+ "lev1": [1, 1, 1, 2, 2, 2],
+ "lev2": [1, 1, 2, 1, 1, 2],
+ "lev3": [1, 2, 1, 2, 1, 2],
+ "lev4": [1, 2, 3, 4, 5, 6],
+ "values": [0, 1, 2, 3, 4, 5]
+ })
+ df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['values'] = df['values'].astype('Int64')
+ self._run_test(
+ lambda df: df.pivot(columns=["lev2", "lev3"], values="values"), df)
+
+ def test_pivot_no_index_provided_on_multiindex(self):
+ # Multiple columns, no index value provided
+ tuples = list(
+ zip(
+ *[
+ ["bar", "bar", "bar", "baz", "baz", "baz"],
+ [
+ "one",
+ "two",
+ "three",
+ "one",
+ "two",
+ "three",
+ ],
+ ]))
+ index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+ df = pd.DataFrame({
+ "lev1": [1, 1, 1, 2, 2, 2],
+ "lev2": [1, 1, 2, 1, 1, 2],
+ "lev3": [1, 2, 1, 2, 1, 2],
+ "lev4": [1, 2, 3, 4, 5, 6],
+ "values": [0, 1, 2, 3, 4, 5]
+ },
+ index=index)
+ df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+ df['values'] = df['values'].astype('Float64')
Review Comment:
This is failing just with pandas 1.1, maybe the capitalized aliases don't
work there?
Issue Time Tracking
-------------------
Worklog Id: (was: 752516)
Time Spent: 5.5h (was: 5h 20m)
> Implement DataFrame.pivot() for DataFrame API
> ---------------------------------------------
>
> Key: BEAM-13966
> URL: https://issues.apache.org/jira/browse/BEAM-13966
> Project: Beam
> Issue Type: Sub-task
> Components: dsl-dataframe, sdk-py-core
> Reporter: Andy Ye
> Assignee: Andy Ye
> Priority: P3
> Labels: dataframe-api
> Time Spent: 5.5h
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)