This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2949fe82cb GH-47123: [Python] Add Enums to PyArrow Types (#47139)
2949fe82cb is described below
commit 2949fe82cb674aa72bbdf4b6886a9fc370c0a35c
Author: Bogdan Romenskii <[email protected]>
AuthorDate: Fri Jul 25 15:14:26 2025 +0200
GH-47123: [Python] Add Enums to PyArrow Types (#47139)
### Rationale for this change
Please see Github Issue #47123
### What changes are included in this PR?
Added public Type Enums that mimic the original private variable groups
used for internal type checking.
### Are these changes tested?
Yes. Partly for now.
### Are there any user-facing changes?
No, just additional features were added: they will now able to access the
underlying types directly via the Type Enums.
* GitHub Issue: #47123
Lead-authored-by: Bogdan Romenskii <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
docs/source/python/api/datatypes.rst | 7 ++++
python/pyarrow/includes/libarrow.pxd | 2 +
python/pyarrow/lib.pyx | 2 +
python/pyarrow/tests/test_types.py | 13 ++++++
python/pyarrow/types.py | 81 ++++++++++++++++++++++++++++++++++++
5 files changed, 105 insertions(+)
diff --git a/docs/source/python/api/datatypes.rst
b/docs/source/python/api/datatypes.rst
index ddf71cbfa3..ea9e547d32 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -211,3 +211,10 @@ represents a given data type (such as ``int32``) or
general category
is_map
is_dictionary
is_primitive
+
+Types enum that can be used for direct type checking.
+
+.. autosummary::
+ :toctree: ../generated/
+
+ TypesEnum
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index deb86cb1f2..b2f32ac645 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -142,6 +142,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
_Type_TIME64" arrow::Type::TIME64"
_Type_DURATION" arrow::Type::DURATION"
_Type_INTERVAL_MONTH_DAY_NANO" arrow::Type::INTERVAL_MONTH_DAY_NANO"
+ _Type_INTERVAL_DAY_TIME" arrow::Type::INTERVAL_DAY_TIME"
+ _Type_INTERVAL_MONTHS" arrow::Type::INTERVAL_MONTHS"
_Type_BINARY" arrow::Type::BINARY"
_Type_STRING" arrow::Type::STRING"
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 2c92ecbfa7..5dca6fd8d2 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -138,6 +138,8 @@ Type_SPARSE_UNION = _Type_SPARSE_UNION
Type_DENSE_UNION = _Type_DENSE_UNION
Type_DICTIONARY = _Type_DICTIONARY
Type_RUN_END_ENCODED = _Type_RUN_END_ENCODED
+Type_INTERVAL_MONTHS = _Type_INTERVAL_MONTHS
+Type_INTERVAL_DAY_TIME = _Type_INTERVAL_DAY_TIME
UnionMode_SPARSE = _UnionMode_SPARSE
UnionMode_DENSE = _UnionMode_DENSE
diff --git a/python/pyarrow/tests/test_types.py
b/python/pyarrow/tests/test_types.py
index 60a713eb71..e628e559b8 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -1445,3 +1445,16 @@ def test_field_import_c_schema_interface():
assert pa.field(wrapped_field, nullable=False).nullable is False
result = pa.field(wrapped_field, metadata={"other": "meta"})
assert result.metadata == {b"other": b"meta"}
+
+
+def test_types_enum():
+ # GH-47123: [Python] Add Enums to PyArrow Types
+ # Since not all the underlying types are implemented in PyArrow,
+ # test only the ones that were imported specifically for this Enum
+
+ import pyarrow.lib as lib
+
+ types_enum = types.TypesEnum
+
+ assert types_enum.INTERVAL_MONTHS.value == lib.Type_INTERVAL_MONTHS
+ assert types_enum.INTERVAL_DAY_TIME.value == lib.Type_INTERVAL_DAY_TIME
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 2bb5cfcf8b..ab4e5d1b99 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -18,6 +18,8 @@
# Tools for dealing with Arrow type metadata in Python
+from enum import IntEnum
+
from pyarrow.lib import (is_boolean_value, # noqa
is_integer_value,
is_float_value)
@@ -46,6 +48,85 @@ _NESTED_TYPES = {lib.Type_LIST, lib.Type_FIXED_SIZE_LIST,
lib.Type_LARGE_LIST,
lib.Type_STRUCT, lib.Type_MAP} | _UNION_TYPES
+class TypesEnum(IntEnum):
+ """
+ An Enum that maps constant values to data types.
+ Exposes the underlying data types representation for type checking
purposes.
+ Note that some of the types listed here are not supported by PyArrow yet:
+ INTERVAL_MONTHS and INTERVAL_DAY_TIME.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> from pyarrow.types import TypesEnum
+ >>> int8_field = pa.field('int8_field', pa.int8())
+ >>> int8_field.type.id == TypesEnum.INT8
+ True
+
+ >>> fixed_size_list = pa.list_(pa.uint16(), 3)
+ >>> fixed_size_list.id == TypesEnum.LIST
+ False
+
+ >>> fixed_size_list.id == TypesEnum.FIXED_SIZE_LIST
+ True
+ """
+
+ NA = lib.Type_NA
+ BOOL = lib.Type_BOOL
+
+ INT8 = lib.Type_INT8
+ INT16 = lib.Type_INT16
+ INT32 = lib.Type_INT32
+ INT64 = lib.Type_INT64
+
+ UINT8 = lib.Type_UINT8
+ UINT16 = lib.Type_UINT16
+ UINT32 = lib.Type_UINT32
+ UINT64 = lib.Type_UINT64
+
+ HALF_FLOAT = lib.Type_HALF_FLOAT
+ FLOAT = lib.Type_FLOAT
+ DOUBLE = lib.Type_DOUBLE
+
+ BINARY = lib.Type_BINARY
+ BINARY_VIEW = lib.Type_BINARY_VIEW
+ LARGE_BINARY = lib.Type_LARGE_BINARY
+ STRING = lib.Type_STRING
+ STRING_VIEW = lib.Type_STRING_VIEW
+ LARGE_STRING = lib.Type_LARGE_STRING
+ FIXED_SIZE_BINARY = lib.Type_FIXED_SIZE_BINARY
+
+ DECIMAL32 = lib.Type_DECIMAL32
+ DECIMAL64 = lib.Type_DECIMAL64
+ DECIMAL128 = lib.Type_DECIMAL128
+ DECIMAL256 = lib.Type_DECIMAL256
+
+ LIST = lib.Type_LIST
+ LARGE_LIST = lib.Type_LARGE_LIST
+ LIST_VIEW = lib.Type_LIST_VIEW
+ LARGE_LIST_VIEW = lib.Type_LARGE_LIST_VIEW
+ MAP = lib.Type_MAP
+ FIXED_SIZE_LIST = lib.Type_FIXED_SIZE_LIST
+
+ STRUCT = lib.Type_STRUCT
+ SPARSE_UNION = lib.Type_SPARSE_UNION
+ DENSE_UNION = lib.Type_DENSE_UNION
+ RUN_END_ENCODED = lib.Type_RUN_END_ENCODED
+
+ DATE32 = lib.Type_DATE32
+ DATE64 = lib.Type_DATE64
+ TIME32 = lib.Type_TIME32
+ TIME64 = lib.Type_TIME64
+ TIMESTAMP = lib.Type_TIMESTAMP
+
+ INTERVAL_MONTHS = lib.Type_INTERVAL_MONTHS
+ INTERVAL_DAY_TIME = lib.Type_INTERVAL_DAY_TIME
+ INTERVAL_MONTH_DAY_NANO = lib.Type_INTERVAL_MONTH_DAY_NANO
+
+ DURATION = lib.Type_DURATION
+ DICTIONARY = lib.Type_DICTIONARY
+
+
@doc(datatype="null")
def is_null(t):
"""