[ 
https://issues.apache.org/jira/browse/BEAM-13966?focusedWorklogId=752516&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-752516
 ]

ASF GitHub Bot logged work on BEAM-13966:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 04/Apr/22 21:30
            Start Date: 04/Apr/22 21:30
    Worklog Time Spent: 10m 
      Work Description: TheNeuralBit commented on code in PR #17043:
URL: https://github.com/apache/beam/pull/17043#discussion_r842156113


##########
sdks/python/apache_beam/dataframe/frames_test.py:
##########
@@ -1431,6 +1433,165 @@ def 
test_unstack_series_multiple_index_and_column_levels(self):
     self._run_test(lambda df: df.unstack(level=['second', 'third']), df)
     self._run_test(lambda df: df.unstack(level=['second']), df)
 
+  def test_pivot_non_categorical(self):
+    df = pd.DataFrame({
+        'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+        'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+        'baz': [1, 2, 3, 4, 5, 6],
+        'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+    })
+    with self.assertRaisesRegex(
+        frame_base.WontImplementError,
+        r"pivot\(\) of non-categorical type is not supported"):
+      self._run_test(
+          lambda df: df.pivot(index='foo', columns='bar', values='baz'), df)
+
+  def test_pivot_pandas_example1(self):
+    # Simple test 1
+    df = pd.DataFrame({
+        'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+        'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+        'baz': [1, 2, 3, 4, 5, 6],
+        'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+    })
+    df['bar'] = df['bar'].astype(
+        pd.CategoricalDtype(categories=['A', 'B', 'C']))
+    self._run_test(
+        lambda df: df.pivot(index='foo', columns='bar', values='baz'), df)
+    self._run_test(
+        lambda df: df.pivot(index=['foo'], columns='bar', values='baz'), df)
+
+  def test_pivot_pandas_example3(self):
+    # Multiple values
+    df = pd.DataFrame({
+        'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+        'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+        'baz': [1, 2, 3, 4, 5, 6],
+        'zoo': ['x', 'y', 'z', 'q', 'w', 't']
+    })
+    df['bar'] = df['bar'].astype(
+        pd.CategoricalDtype(categories=['A', 'B', 'C']))
+    self._run_test(
+        lambda df: df.pivot(index='foo', columns='bar', values=['baz', 'zoo']),
+        df)
+    self._run_test(
+        lambda df: df.pivot(
+            index='foo', columns=['bar'], values=['baz', 'zoo']),
+        df)
+
+  def test_pivot_pandas_example4(self):
+    # Multiple columns
+    df = pd.DataFrame({
+        "lev1": [1, 1, 1, 2, 2, 2],
+        "lev2": [1, 1, 2, 1, 1, 2],
+        "lev3": [1, 2, 1, 2, 1, 2],
+        "lev4": [1, 2, 3, 4, 5, 6],
+        "values": [0, 1, 2, 3, 4, 5]
+    })
+    df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    df['values'] = df['values'].astype('Int64')
+    self._run_test(
+        lambda df: df.pivot(
+            index="lev1", columns=["lev2", "lev3"], values="values"),
+        df)
+
+  def test_pivot_pandas_example5(self):
+    # Multiple index
+    df = pd.DataFrame({
+        "lev1": [1, 1, 1, 2, 2, 2],
+        "lev2": [1, 1, 2, 1, 1, 2],
+        "lev3": [1, 2, 1, 2, 1, 2],
+        "lev4": [1, 2, 3, 4, 5, 6],
+        "values": [0, 1, 2, 3, 4, 5]
+    })
+    df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    # Cast to nullable Int64 because Beam doesn't do the correct conversion to
+    # float64
+    df['values'] = df['values'].astype('Int64')
+    if PD_VERSION < (1, 4):
+      with self.assertRaisesRegex(
+          frame_base.WontImplementError,
+          r"pivot\(\) is not supported when pandas<1.4 and index is a Multi"):
+        self._run_test(
+            lambda df: df.pivot(
+                index=["lev1", "lev2"], columns=["lev3"], values="values"),
+            df)
+    else:
+      self._run_test(
+          lambda df: df.pivot(
+              index=["lev1", "lev2"], columns=["lev3"], values="values"),
+          df)
+
+  def test_pivot_pandas_example6(self):
+    # Value error when there are duplicates
+    df = pd.DataFrame({
+        "foo": ['one', 'one', 'two', 'two'],
+        "bar": ['A', 'A', 'B', 'C'],
+        "baz": [1, 2, 3, 4]
+    })
+    df['bar'] = df['bar'].astype(
+        pd.CategoricalDtype(categories=['A', 'B', 'C']))
+    self._run_error_test(
+        lambda df: df.pivot(index='foo', columns='bar', values='baz'),
+        df,
+        construction_time=False)
+
+  def test_pivot_no_index_provided_on_single_level_index(self):
+    # Multiple columns, no index value provided
+    df = pd.DataFrame({
+        "lev1": [1, 1, 1, 2, 2, 2],
+        "lev2": [1, 1, 2, 1, 1, 2],
+        "lev3": [1, 2, 1, 2, 1, 2],
+        "lev4": [1, 2, 3, 4, 5, 6],
+        "values": [0, 1, 2, 3, 4, 5]
+    })
+    df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    df['values'] = df['values'].astype('Int64')
+    self._run_test(
+        lambda df: df.pivot(columns=["lev2", "lev3"], values="values"), df)
+
+  def test_pivot_no_index_provided_on_multiindex(self):
+    # Multiple columns, no index value provided
+    tuples = list(
+        zip(
+            *[
+                ["bar", "bar", "bar", "baz", "baz", "baz"],
+                [
+                    "one",
+                    "two",
+                    "three",
+                    "one",
+                    "two",
+                    "three",
+                ],
+            ]))
+    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    df = pd.DataFrame({
+        "lev1": [1, 1, 1, 2, 2, 2],
+        "lev2": [1, 1, 2, 1, 1, 2],
+        "lev3": [1, 2, 1, 2, 1, 2],
+        "lev4": [1, 2, 3, 4, 5, 6],
+        "values": [0, 1, 2, 3, 4, 5]
+    },
+                      index=index)
+    df['lev2'] = df['lev2'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    df['lev3'] = df['lev3'].astype(pd.CategoricalDtype(categories=[1, 2]))
+    df['values'] = df['values'].astype('Float64')

Review Comment:
   This is failing just with pandas 1.1, maybe the capitalized aliases don't 
work there?





Issue Time Tracking
-------------------

    Worklog Id:     (was: 752516)
    Time Spent: 5.5h  (was: 5h 20m)

> Implement DataFrame.pivot() for DataFrame API
> ---------------------------------------------
>
>                 Key: BEAM-13966
>                 URL: https://issues.apache.org/jira/browse/BEAM-13966
>             Project: Beam
>          Issue Type: Sub-task
>          Components: dsl-dataframe, sdk-py-core
>            Reporter: Andy Ye
>            Assignee: Andy Ye
>            Priority: P3
>              Labels: dataframe-api
>          Time Spent: 5.5h
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to