Slight Type resending..
def pivot_table(arrow_table, pivot_column, pivot_key_column,
pivot_value_column):
"""
:param arrow_table: table to pivot
:param pivot_column: column to pivot. str or str array
:param pivot_key_column: key column
:param pivot_values_column: column with pivot values
:return pivot table
"""
unique_keys = pc.unique(arrow_table[pivot_key_column])
unique_values = pc.unique(arrow_table[pivot_column])
pivot_table = pa.Table.from_arrays([unique_keys], names=[pivot_key_column])
for column in unique_values:
pivot_table = pivot_table.join(
pc.filter(arrow_table, pc.equal(arrow_table[pivot_column], column))
.remove_column(arrow_table.schema.names.index(pivot_column))
.rename_columns([pivot_key_column, str(column)]),
pivot_key_column,
)
return pivot_tabt
>>> abc = pivot_table(animals, "body_part", "animal", "number")
>>> abc
pyarrow.Table
animal: string
n_legs: int64
n_wings: int64
n_tails: int64
----
animal: [["dog","ant","bird"]]
n_legs: [[4,6,2]]
n_wings: [[0,0,2]]
n_tails: [[1,0,1]]
-----Original Message-----
From: Lee, David
Sent: Monday, June 27, 2022 11:29 AM
To: '[email protected]' <[email protected]>
Subject: RE: Using pyarrow to pivot rows into columns
I ended up writing a function to handle this..
def pivot_table(arrow_table, pivot_column, pivot_key_column,
pivot_value_column):
"""
:param arrow_table: table to pivot
:param pivot_column: column to pivot. str or str array
:param pivot_key_column: key column
:param pivot_values_column: column with pivot values
:return pivot table
"""
unique_keys = pc.unique(arrow_table[pivot_key_column])
unique_values = pc.unique(arrow_table[pivot_column])
pivot_table = pa.Table.from_arrays([unique_keys], names =
[pivot_key_column])
for column in unique_values:
pivot_table = pivot_table.join(pc.filter(arrow_table,
pc.equal(arrow_table[pivot_column],
column)).remove_column(1).rename_columns([pivot_key_column, str(column)]),
pivot_key_column)
return pivot_table
>>> abc = pivot_table(animals, "body_part", "animal", "body_part") abc
pyarrow.Table
animal: string
n_legs: int64
n_wings: int64
n_tails: int64
----
animal: [["dog","ant","bird"]]
n_legs: [[4,6,2]]
n_wings: [[0,0,2]]
n_tails: [[1,0,1]]
-----Original Message-----
From: Lee, David
Sent: Friday, June 24, 2022 2:58 PM
To: '[email protected]' <[email protected]>
Subject: Using pyarrow to pivot rows into columns
I'm trying to convert row values into columns and I can't figure out how to
apply pyarrow.compute.case_when to a .groupby + .aggregation to wrangle data.
Any suggestions?
Original Data Structure: with body_part and number columns
>>> animals
pyarrow.Table
animal: string
body_part: string
number: int64
----
animal: [["dog","ant","bird","dog","ant","bird","dog","ant","bird"]]
body_part:
[["n_legs","n_legs","n_legs","n_wings","n_wings","n_wings","n_tails","n_tails","n_tails"]]
number: [[4,6,2,0,0,2,1,0,1]]
>>> a = pa.array(["dog", "ant", "bird"]) b = pa.array([4, 6, 2]) c =
>>> pa.array([0, 0, 2]) d = pa.array([1, 0, 1]) names = ['animal',
>>> 'n_legs', 'n_wings', 'n_tails']
Desired Data Structure: with body_part converted into columns
>>> animals_pivot
pyarrow.Table
animal: string
n_legs: int64
n_wings: int64
n_tails: int64
----
animal: [["dog","ant","bird"]]
n_legs: [[4,6,2]]
n_wings: [[0,0,2]]
n_tails: [[1,0,1]]
------------------------------------------------------------------------------
Some code:
import pyarrow as pa
a = pa.array(["dog", "ant", "bird", "dog", "ant", "bird", "dog", "ant",
"bird"]) b = pa.array(["n_legs", "n_legs", "n_legs", "n_wings", "n_wings",
"n_wings", "n_tails", "n_tails", "n_tails"]) c = pa.array([4, 6, 2, 0, 0, 2, 1,
0, 1]) names = ['animal', 'body_part', 'number']
animals = pa.Table.from_arrays([a, b, c], names = names)
a = pa.array(["dog", "ant", "bird"])
b = pa.array([4, 6, 2])
c = pa.array([0, 0, 2])
d = pa.array([1, 0, 1])
names = ['animal', 'n_legs', 'n_wings', 'n_tails']
animals_pivot = pa.Table.from_arrays([a, b, c, d], names = names)
This message may contain information that is confidential or privileged. If you
are not the intended recipient, please advise the sender immediately and delete
this message. See
http://www.blackrock.com/corporate/compliance/email-disclaimers for further
information. Please refer to
http://www.blackrock.com/corporate/compliance/privacy-policy for more
information about BlackRock’s Privacy Policy.
For a list of BlackRock's office addresses worldwide, see
http://www.blackrock.com/corporate/about-us/contacts-locations.
© 2022 BlackRock, Inc. All rights reserved.