AlenkaF commented on code in PR #14613: URL: https://github.com/apache/arrow/pull/14613#discussion_r1017615984
########## python/pyarrow/interchange/dataframe.py: ########## @@ -0,0 +1,132 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations +from typing import ( + Any, + Iterable, + Optional, + Sequence, +) + +import pyarrow as pa + +from pyarrow.interchange.column import PyArrowColumn +from pyarrow.interchange.dataframe_protocol import DataFrame as DataFrameXchg + + +class TableXchg(DataFrameXchg): + """ + A data frame class, with only the methods required by the interchange + protocol defined. + Instances of this (private) class are returned from + ``pd.DataFrame.__dataframe__`` as objects with the methods and + attributes defined on this class. + """ + + def __init__( + self, df: pa.Table, nan_as_null: bool = False, allow_copy: bool = True + ) -> None: + """ + Constructor - an instance of this (private) class is returned from + `pa.Table.__dataframe__`. + """ + self._df = df + # ``nan_as_null`` is a keyword intended for the consumer to tell the + # producer to overwrite null values in the data with ``NaN`` (or + # ``NaT``). This currently has no effect; once support for nullable + # extension dtypes is added, this value should be propagated to + # columns. + self._nan_as_null = nan_as_null + self._allow_copy = allow_copy + + def __dataframe__( + self, nan_as_null: bool = False, allow_copy: bool = True + ) -> TableXchg: + return TableXchg(self._df, nan_as_null, allow_copy) + + @property + def metadata(self) -> dict[str, Any]: + # The metadata for the data frame, as a dictionary with string keys. + # Add schema metadata here (pandas metadata, ot custom metadata) + if self._df.schema.metadata: + schema_metadata = {k.decode('utf8'): v.decode('utf8') + for k, v in self._df.schema.metadata.items()} + return schema_metadata + else: + return {} Review Comment: Added this part after testing `pa.Table -> pd.DataFrame` to not get an error for reading the metadata from a NoneType. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
