dangotbanned commented on code in PR #48622: URL: https://github.com/apache/arrow/pull/48622#discussion_r2937043664
########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,658 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Mapping, Sequence, Iterable, Iterator +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportArrowSchema, TimeUnit +from pyarrow.io import Buffer +from pyarrow.lib import ( # noqa: F401 + Array, + ChunkedArray, + ExtensionArray, + MemoryPool, + MonthDayNano, + Table, +) +from pyarrow.scalar import ExtensionScalar + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + # Properties that exist on specific subtypes but accessed generically + @property + def list_size(self) -> int: ... + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) + +class Decimal32Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + @property + def scale(self) -> _Scale: ... + +class Decimal64Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + @property + def scale(self) -> _Scale: ... + +class Decimal128Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + @property + def scale(self) -> _Scale: ... + +class Decimal256Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + @property + def scale(self) -> _Scale: ... + +class ListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + +class LargeListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + +class ListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + +class LargeListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + @property + def list_size(self) -> int: ... + +class DictionaryMemo(_Weakrefable): ... + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + @property + def index_type(self) -> _IndexT: ... + @property + def value_type(self) -> _BasicValueT: ... + +_K = TypeVar("_K", bound=DataType) + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + @property + def key_type(self) -> _K: ... + @property + def item_field(self) -> Field[_ValueT]: ... + @property + def item_type(self) -> _ValueT: ... + @property + def keys_sorted(self) -> _Ordered: ... + +_Size = TypeVar("_Size", default=int) + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... + def field(self, i: int | str) -> Field: ... + def get_all_field_indices(self, name: str) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + @property + def fields(self) -> list[Field]: ... + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + @property + def type_codes(self) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field]: ... + def field(self, i: int) -> Field: ... + + __getitem__ = field + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + +_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + @property + def extension_name(self) -> str: ... + @property + def storage_type(self) -> DataType: ... + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + def __arrow_ext_serialize__(self) -> bytes: ... + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + @property + def shape(self) -> list[int]: ... + @property + def dim_names(self) -> list[str] | None: ... + @property + def permutation(self) -> list[int] | None: ... + +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + @property + def vendor_name(self) -> str: ... + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + +def register_extension_type(ext_type: ExtensionType) -> None: ... +def unregister_extension_type(type_name: str) -> None: ... + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, + __arg0__: Mapping[str | bytes, str | bytes] + | Iterable[tuple[str | bytes, str | bytes]] + | KeyValueMetadata + | None = None, + **kwargs: str, + ) -> None: ... + def equals(self, other: KeyValueMetadata) -> bool: ... + def __len__(self) -> int: ... + def __contains__(self, /, __key: object) -> bool: ... + def __getitem__(self, /, __key: Any) -> Any: ... + def __iter__(self) -> Iterator[bytes]: ... + def get_all(self, key: str) -> list[bytes]: ... + def to_dict(self) -> dict[bytes, bytes]: ... + +class Field(_Weakrefable, Generic[_DataTypeT]): + def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + def __hash__(self) -> int: ... + @property + def nullable(self) -> bool: ... + @property + def name(self) -> str: ... + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + @property + def type(self) -> _DataTypeT: ... + def with_metadata( + self, + metadata: dict[bytes | str, bytes | str] + | Mapping[bytes | str, bytes | str] + | Any, + ) -> Self: ... Review Comment: This would be another spot for (https://github.com/apache/arrow/pull/48622#discussion_r2937040911) `TypeAlias` IIUC? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
