westonpace commented on issue #15113:
URL: https://github.com/apache/arrow/issues/15113#issuecomment-1369138029
I think it is possible to do this with extension types. Though there may
still be merit in supporting this more properly. CC @jorisvandenbossche
```
import pyarrow as pa
import pyarrow.parquet as pq
class TupleScalar(pa.ExtensionScalar):
def as_py(self) -> tuple:
return tuple(self.value.as_py())
class TupleType(pa.ExtensionType):
def __init__(self, item_type):
pa.ExtensionType.__init__(self, pa.list_(item_type),
'my_package.tuple')
def __reduce__(self):
return TupleType, ()
def __arrow_ext_scalar_class__(self):
return TupleScalar
def __arrow_ext_serialize__(self):
# since we don't have a parameterized type, we don't need extra
# metadata to be deserialized
return b''
@classmethod
def __arrow_ext_deserialize__(self, storage_type, serialized):
# return an instance of this subclass given the serialized
# metadata.
return TupleType(storage_type.value_type)
# Unfortunate that we need to specify all possible permutations here
pa.register_extension_type(TupleType(pa.float32()))
storage = pa.array([[1, 2, 3], [4, 5, 6]], pa.list_(pa.float32()))
arr = pa.ExtensionArray.from_storage(TupleType(pa.float32()), storage)
tab = pa.Table.from_arrays([arr], names=['f0'])
pq.write_table(tab, '/tmp/foo.parquet')
roundtrip = pq.read_table('/tmp/foo.parquet')
print(roundtrip.column(0).to_pylist())
# [(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]