This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 655ae960fc GH-39277: [Python] Fix missing byte_width attribute on
DataType class (#39592)
655ae960fc is described below
commit 655ae960fcbc53061dadb243cd584944b74b140d
Author: Kevin Mingtarja <[email protected]>
AuthorDate: Wed Feb 28 03:24:00 2024 -0800
GH-39277: [Python] Fix missing byte_width attribute on DataType class
(#39592)
### Rationale for this change
As mentioned in the issue, the byte_width attribute was missing on most
data types, which is a small annoyance.
### What changes are included in this PR?
Add the byte_width attribute on the DataType class (which is the base class
of all Arrow data types), instead of on FixedSizeBinaryType (which is a child
class of DataType).
### Are these changes tested?
Yes, tests were added in `python/pyarrow/tests/test_types.py`.
### Are there any user-facing changes?
Yes, users can now access the byte_width attribute on all fixed width data
types.
* Closes: #39277
Authored-by: Kevin Mingtarja <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
python/pyarrow/includes/libarrow.pxd | 1 +
python/pyarrow/tests/test_types.py | 39 +++++++++++++++++++++++++++---------
python/pyarrow/types.pxi | 36 ++++++++++++++++++++-------------
3 files changed, 52 insertions(+), 24 deletions(-)
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 935fb4d34b..05d3318020 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -245,6 +245,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType):
int bit_width()
+ int byte_width()
cdef cppclass CNullArray" arrow::NullArray"(CArray):
CNullArray(int64_t length)
diff --git a/python/pyarrow/tests/test_types.py
b/python/pyarrow/tests/test_types.py
index a79702a8ca..1d132a6af8 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -945,18 +945,37 @@ def test_type_id():
assert isinstance(ty.id, int)
-def test_bit_width():
- for ty, expected in [(pa.bool_(), 1),
- (pa.int8(), 8),
- (pa.uint32(), 32),
- (pa.float16(), 16),
- (pa.decimal128(19, 4), 128),
- (pa.decimal256(76, 38), 256),
- (pa.binary(42), 42 * 8)]:
- assert ty.bit_width == expected
- for ty in [pa.binary(), pa.string(), pa.list_(pa.int16())]:
+def test_bit_and_byte_width():
+ for ty, expected_bit_width, expected_byte_width in [
+ (pa.bool_(), 1, 0),
+ (pa.int8(), 8, 1),
+ (pa.uint32(), 32, 4),
+ (pa.float16(), 16, 2),
+ (pa.timestamp('s'), 64, 8),
+ (pa.date32(), 32, 4),
+ (pa.decimal128(19, 4), 128, 16),
+ (pa.decimal256(76, 38), 256, 32),
+ (pa.binary(42), 42 * 8, 42)
+ ]:
+ assert ty.bit_width == expected_bit_width
+
+ if expected_byte_width == 0:
+ with pytest.raises(ValueError, match="Less than one byte"):
+ ty.byte_width
+ else:
+ assert ty.byte_width == expected_byte_width
+
+ for ty in [
+ pa.binary(),
+ pa.string(),
+ pa.list_(pa.int16()),
+ pa.map_(pa.string(), pa.int32()),
+ pa.struct([('f1', pa.int32())])
+ ]:
with pytest.raises(ValueError, match="fixed width"):
ty.bit_width
+ with pytest.raises(ValueError, match="fixed width"):
+ ty.byte_width
def test_fixed_size_binary_byte_width():
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index e9bf56c621..fbbf36ae9f 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -257,6 +257,28 @@ cdef class DataType(_Weakrefable):
raise ValueError("Non-fixed width type")
return ty.bit_width()
+ @property
+ def byte_width(self):
+ """
+ Byte width for fixed width type.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> pa.int64()
+ DataType(int64)
+ >>> pa.int64().byte_width
+ 8
+ """
+ cdef _CFixedWidthTypePtr ty
+ ty = dynamic_cast[_CFixedWidthTypePtr](self.type)
+ if ty == nullptr:
+ raise ValueError("Non-fixed width type")
+ byte_width = ty.byte_width()
+ if byte_width == 0:
+ raise ValueError("Less than one byte")
+ return byte_width
+
@property
def num_fields(self):
"""
@@ -1342,20 +1364,6 @@ cdef class FixedSizeBinaryType(DataType):
def __reduce__(self):
return binary, (self.byte_width,)
- @property
- def byte_width(self):
- """
- The binary size in bytes.
-
- Examples
- --------
- >>> import pyarrow as pa
- >>> t = pa.binary(3)
- >>> t.byte_width
- 3
- """
- return self.fixed_size_binary_type.byte_width()
-
cdef class Decimal128Type(FixedSizeBinaryType):
"""