Copilot commented on code in PR #48622: URL: https://github.com/apache/arrow/pull/48622#discussion_r3381543835
########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,723 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... Review Comment: `DataType.to_pandas_dtype()` returns a NumPy dtype / scalar type and sometimes pandas extension dtypes (e.g. tz-aware). The stub currently returns `np.generic`, which is a NumPy scalar base class and doesn’t match actual return values. ########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,723 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd Review Comment: `pandas` is an optional runtime dependency in pyarrow (it’s imported lazily in the implementation). Importing it unconditionally in a stub can cause type checking to fail in environments that don’t have pandas installed. Consider marking this import as ignorable for type checkers. ########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,723 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) +_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) +_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) + +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): + @property + def precision(self) -> _Precision_co: ... + @property + def scale(self) -> _Scale_co: ... + +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class ListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + @property + def list_size(self) -> int: ... + +class DictionaryMemo(_Weakrefable): ... + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType, default=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) +_K = TypeVar("_K", bound=DataType, default=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + @property + def index_type(self) -> _IndexT: ... + @property + def value_type(self) -> _BasicValueT: ... + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + @property + def key_type(self) -> _K: ... + @property + def item_field(self) -> Field[_ValueT]: ... + @property + def item_type(self) -> _ValueT: ... + @property + def keys_sorted(self) -> _Ordered: ... + +_Size = TypeVar("_Size", default=int) + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... + def field(self, i: int | str) -> Field: ... + def get_all_field_indices(self, name: str) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + @property + def fields(self) -> list[Field]: ... + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + @property + def type_codes(self) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field]: ... + def field(self, i: int) -> Field: ... + + __getitem__ = field + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + +_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + @property + def extension_name(self) -> str: ... + @property + def storage_type(self) -> DataType: ... + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + def __arrow_ext_serialize__(self) -> bytes: ... + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + @property + def shape(self) -> list[int]: ... + @property + def dim_names(self) -> list[str] | None: ... + @property + def permutation(self) -> list[int] | None: ... + +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + @property + def vendor_name(self) -> str: ... + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + +def register_extension_type(ext_type: ExtensionType) -> None: ... +def unregister_extension_type(type_name: str) -> None: ... + +_StrOrBytes: TypeAlias = str | bytes +_MetadataMapping: TypeAlias = Mapping[_StrOrBytes, _StrOrBytes] +_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] +_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None +_DataTypeAlias: TypeAlias = Literal[ + "null", + "bool", + "boolean", + "i1", + "int8", + "i2", + "int16", + "i4", + "int32", + "i8", + "int64", + "u1", + "uint8", + "u2", + "uint16", + "u4", + "uint32", + "u8", + "uint64", + "f2", + "halffloat", + "float16", + "f4", + "float", + "float32", + "f8", + "double", + "float64", + "string", + "str", + "utf8", + "binary", + "large_string", + "large_str", + "large_utf8", + "large_binary", + "binary_view", + "string_view", + "date32", + "date64", + "date32[day]", + "date64[ms]", + "time32[s]", + "time32[ms]", + "time64[us]", + "time64[ns]", + "timestamp[s]", + "timestamp[ms]", + "timestamp[us]", + "timestamp[ns]", + "duration[s]", + "duration[ms]", + "duration[us]", + "duration[ns]", + "month_day_nano_interval", +] +_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str +_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput +_FieldTypeInput: TypeAlias = _DataTypeLike | None +_SchemaMetadataInput: TypeAlias = ( + Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] +) + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, + __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, + **kwargs: str, + ) -> None: ... + def equals(self, other: KeyValueMetadata) -> bool: ... + def __len__(self) -> int: ... + def __contains__(self, /, __key: object) -> bool: ... + def __getitem__(self, /, __key: Any) -> Any: ... + def __iter__(self) -> Iterator[bytes]: ... + def get_all(self, key: str) -> list[bytes]: ... + def to_dict(self) -> dict[bytes, bytes]: ... + +class Field(_Weakrefable, Generic[_DataTypeT_co]): + def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + def __hash__(self) -> int: ... + @property + def nullable(self) -> bool: ... + @property + def name(self) -> str: ... + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + @property + def type(self) -> _DataTypeT_co: ... + def with_metadata( + self, + metadata: _MetadataMapping | Any, + ) -> Self: ... + def remove_metadata(self) -> Self: ... + def with_type(self, new_type: DataType) -> Field: ... + def with_name(self, name: str) -> Self: ... + def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... + def flatten(self) -> list[Field]: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + +_StructFieldTuple: TypeAlias = ( + tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] +) +_StructFieldsInput: TypeAlias = ( + Iterable[Field[Any] | _StructFieldTuple] + | Mapping[str, Field[Any] | _FieldTypeInput] +) + +class Schema(_Weakrefable): + def __len__(self) -> int: ... + def __getitem__(self, key: str | int) -> Field: ... + + _field = __getitem__ + def __iter__(self) -> Iterator[Field]: ... + def __hash__(self) -> int: ... + def __sizeof__(self) -> int: ... + @property + def pandas_metadata(self) -> dict: ... Review Comment: `Schema.pandas_metadata` can be `None` when there is no pandas metadata (implementation returns `None` if missing). The stub currently declares it as always `dict`. ########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,723 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) +_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) +_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) + +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): + @property + def precision(self) -> _Precision_co: ... + @property + def scale(self) -> _Scale_co: ... + +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class ListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + @property + def list_size(self) -> int: ... + +class DictionaryMemo(_Weakrefable): ... + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType, default=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) +_K = TypeVar("_K", bound=DataType, default=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + @property + def index_type(self) -> _IndexT: ... + @property + def value_type(self) -> _BasicValueT: ... + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + @property + def key_type(self) -> _K: ... + @property + def item_field(self) -> Field[_ValueT]: ... + @property + def item_type(self) -> _ValueT: ... + @property + def keys_sorted(self) -> _Ordered: ... + +_Size = TypeVar("_Size", default=int) + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... + def field(self, i: int | str) -> Field: ... + def get_all_field_indices(self, name: str) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + @property + def fields(self) -> list[Field]: ... + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + @property + def type_codes(self) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field]: ... + def field(self, i: int) -> Field: ... + + __getitem__ = field + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + +_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + @property + def extension_name(self) -> str: ... + @property + def storage_type(self) -> DataType: ... + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + def __arrow_ext_serialize__(self) -> bytes: ... + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + @property + def shape(self) -> list[int]: ... + @property + def dim_names(self) -> list[str] | None: ... + @property + def permutation(self) -> list[int] | None: ... + +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + @property + def vendor_name(self) -> str: ... + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + +def register_extension_type(ext_type: ExtensionType) -> None: ... +def unregister_extension_type(type_name: str) -> None: ... + +_StrOrBytes: TypeAlias = str | bytes +_MetadataMapping: TypeAlias = Mapping[_StrOrBytes, _StrOrBytes] +_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] +_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None +_DataTypeAlias: TypeAlias = Literal[ + "null", + "bool", + "boolean", + "i1", + "int8", + "i2", + "int16", + "i4", + "int32", + "i8", + "int64", + "u1", + "uint8", + "u2", + "uint16", + "u4", + "uint32", + "u8", + "uint64", + "f2", + "halffloat", + "float16", + "f4", + "float", + "float32", + "f8", + "double", + "float64", + "string", + "str", + "utf8", + "binary", + "large_string", + "large_str", + "large_utf8", + "large_binary", + "binary_view", + "string_view", + "date32", + "date64", + "date32[day]", + "date64[ms]", + "time32[s]", + "time32[ms]", + "time64[us]", + "time64[ns]", + "timestamp[s]", + "timestamp[ms]", + "timestamp[us]", + "timestamp[ns]", + "duration[s]", + "duration[ms]", + "duration[us]", + "duration[ns]", + "month_day_nano_interval", +] +_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str +_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput +_FieldTypeInput: TypeAlias = _DataTypeLike | None +_SchemaMetadataInput: TypeAlias = ( + Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] +) + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, + __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, + **kwargs: str, + ) -> None: ... + def equals(self, other: KeyValueMetadata) -> bool: ... + def __len__(self) -> int: ... + def __contains__(self, /, __key: object) -> bool: ... + def __getitem__(self, /, __key: Any) -> Any: ... + def __iter__(self) -> Iterator[bytes]: ... + def get_all(self, key: str) -> list[bytes]: ... + def to_dict(self) -> dict[bytes, bytes]: ... + +class Field(_Weakrefable, Generic[_DataTypeT_co]): + def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + def __hash__(self) -> int: ... + @property + def nullable(self) -> bool: ... + @property + def name(self) -> str: ... + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + @property + def type(self) -> _DataTypeT_co: ... + def with_metadata( + self, + metadata: _MetadataMapping | Any, + ) -> Self: ... + def remove_metadata(self) -> Self: ... + def with_type(self, new_type: DataType) -> Field: ... + def with_name(self, name: str) -> Self: ... + def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... + def flatten(self) -> list[Field]: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + +_StructFieldTuple: TypeAlias = ( + tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] +) +_StructFieldsInput: TypeAlias = ( + Iterable[Field[Any] | _StructFieldTuple] + | Mapping[str, Field[Any] | _FieldTypeInput] +) + +class Schema(_Weakrefable): + def __len__(self) -> int: ... + def __getitem__(self, key: str | int) -> Field: ... + + _field = __getitem__ + def __iter__(self) -> Iterator[Field]: ... + def __hash__(self) -> int: ... + def __sizeof__(self) -> int: ... + @property + def pandas_metadata(self) -> dict: ... + @property + def names(self) -> list[str]: ... + @property + def types(self) -> list[DataType]: ... + @property + def metadata(self) -> dict[bytes, bytes]: ... Review Comment: `Schema.metadata` returns `None` when no metadata is set (see implementation). The stub currently declares it as always `dict[bytes, bytes]`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
