pitrou commented on a change in pull request #11302: URL: https://github.com/apache/arrow/pull/11302#discussion_r722137618
########## File path: cpp/src/arrow/python/arrow_to_python.h ########## @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Functions for converting between pandas's NumPy-based data representation +// and Arrow data structures + +#pragma once + +#include "arrow/python/common.h" +#include "arrow/python/platform.h" + +namespace arrow { + +class Array; +struct Scalar; + +namespace py { + +/// \brief Utility class for converting Arrow to Python obects. A class instead Review comment: Trailing unfinished sentence here? ########## File path: python/pyarrow/tests/test_scalars.py ########## @@ -364,6 +367,14 @@ def test_duration_nanos_nopandas(): arr[0].as_py() +def test_month_day_nano_interval(): + triple = pa.MonthDayNano([3600, 3600, 3600]) Review comment: Please use different values for components here. ########## File path: cpp/src/arrow/python/arrow_to_python.cc ########## @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Functions for converting between pandas's NumPy-based data representation +// and Arrow data structures + +#include "arrow/python/arrow_to_python.h" + +#include "arrow/python/arrow_to_python_internal.h" +#include "arrow/python/datetime.h" +#include "arrow/python/helpers.h" +#include "arrow/result_internal.h" +#include "arrow/scalar.h" + +namespace arrow { +namespace py { +namespace { + +Status CheckInterval(const DataType& datatype) { + if (datatype.id() != Type::INTERVAL_MONTH_DAY_NANO) { + return Status::NotImplemented( + "Only MonthDayIntervalNanoIntervalType supported. Provided.", + datatype.ToString()); + } + return Status::OK(); +} + +// Wrapper around a Python list object that mimics dereference and assignment +// operations. +struct PyListAssigner { + public: + explicit PyListAssigner(PyObject* list) : list_(list) { DCHECK(PyList_Check(list_)); } + + PyListAssigner& operator*() { return *this; } + + void operator=(PyObject* obj) { + if (ARROW_PREDICT_FALSE(PyList_SetItem(list_, current_index_, obj) == -1)) { + Py_FatalError("list did not have the correct preallocated size."); + } + } + + PyListAssigner& operator++() { + current_index_++; + return *this; + } + + PyListAssigner& operator+=(int64_t offset) { + current_index_ += offset; + return *this; + } + + private: + PyObject* list_; + int64_t current_index_ = 0; +}; + +} // namespace + +Result<PyObject*> ArrowToPython::ToPyList(const Array& array) { + RETURN_NOT_OK(CheckInterval(*array.type())); Review comment: So these methods only work for month_day_nano_interval? That seems like a weird API choice. ########## File path: cpp/src/arrow/python/arrow_to_python.h ########## @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Functions for converting between pandas's NumPy-based data representation +// and Arrow data structures + +#pragma once + +#include "arrow/python/common.h" +#include "arrow/python/platform.h" + +namespace arrow { + +class Array; +struct Scalar; + +namespace py { + +/// \brief Utility class for converting Arrow to Python obects. A class instead +/// +/// A class is chosen because in the future some amount of state will be +/// (e.g. imported python classes), doing this one lazily will be helpful +/// and having members present avoids static C++ variables. +class ARROW_PYTHON_EXPORT ArrowToPython { + public: + /// \brief Converts the given Array to a PyList object. Returns NULL if there + /// is an error converting the Array. The list elements are the same ones + /// generated via ToLogical() + /// + /// N.B. This has limited type support. ARROW-12976 tracks extending the + /// implementation. + Result<PyObject*> ToPyList(const Array& array); + + /// \brief Converts the given Scalar the type that is closest to its arrow + /// representation. + /// + /// For instance timestamp would be translated to a integer representing an + // offset from the unix epoch. + /// + /// N.B. This has limited type support. ARROW-12976 tracks full implementation. + Result<PyObject*> ToPrimitive(const Scalar& scalar); Review comment: Why isn't this called "ToPyObject"? Is it only for primitive types? ########## File path: cpp/src/arrow/python/arrow_to_python.h ########## @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Functions for converting between pandas's NumPy-based data representation Review comment: Do these functions have anything to do with Pandas? ########## File path: python/pyarrow/array.pxi ########## @@ -163,6 +163,12 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, representation). Timezone-naive data will be implicitly interpreted as UTC. + Pandas's DateOffsets and dateutil.relativedelta.relativedetla are by Review comment: "delta" ########## File path: cpp/src/arrow/python/arrow_to_python.h ########## @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Functions for converting between pandas's NumPy-based data representation +// and Arrow data structures + +#pragma once + +#include "arrow/python/common.h" +#include "arrow/python/platform.h" + +namespace arrow { + +class Array; +struct Scalar; + +namespace py { + +/// \brief Utility class for converting Arrow to Python obects. A class instead +/// +/// A class is chosen because in the future some amount of state will be +/// (e.g. imported python classes), doing this one lazily will be helpful +/// and having members present avoids static C++ variables. +class ARROW_PYTHON_EXPORT ArrowToPython { + public: + /// \brief Converts the given Array to a PyList object. Returns NULL if there + /// is an error converting the Array. The list elements are the same ones + /// generated via ToLogical() Review comment: What is ToLogical? ########## File path: cpp/src/arrow/python/arrow_to_pandas.cc ########## @@ -1097,6 +1081,40 @@ struct ObjectWriterVisitor { return Status::OK(); } + template <typename Type> + enable_if_t<std::is_same<Type, MonthDayNanoIntervalType>::value, Status> Visit( + const Type& type) { + OwnedRef args(PyTuple_New(0)); + OwnedRef kwargs(PyDict_New()); + RETURN_IF_PYERROR(); + auto to_date_offset = [&](const MonthDayNanoIntervalType::MonthDayNanos& interval, + PyObject** out) { + DCHECK(internal::BorrowPandasDataOffsetType() != nullptr); + // TimeDelta objects do not add nanoseconds component to timestamp. + // so convert microseconds and remainder to preserve data + // but give users more expected results. Review comment: Code looks ok, but I don't understand this comment. What do you mean with "do not add nanoseconds component to timestamp"? ########## File path: cpp/src/arrow/python/arrow_to_python.h ########## @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Functions for converting between pandas's NumPy-based data representation +// and Arrow data structures + +#pragma once + +#include "arrow/python/common.h" +#include "arrow/python/platform.h" + +namespace arrow { + +class Array; +struct Scalar; + +namespace py { + +/// \brief Utility class for converting Arrow to Python obects. A class instead +/// +/// A class is chosen because in the future some amount of state will be +/// (e.g. imported python classes), doing this one lazily will be helpful +/// and having members present avoids static C++ variables. +class ARROW_PYTHON_EXPORT ArrowToPython { + public: + /// \brief Converts the given Array to a PyList object. Returns NULL if there + /// is an error converting the Array. The list elements are the same ones + /// generated via ToLogical() + /// + /// N.B. This has limited type support. ARROW-12976 tracks extending the + /// implementation. + Result<PyObject*> ToPyList(const Array& array); + + /// \brief Converts the given Scalar the type that is closest to its arrow + /// representation. + /// + /// For instance timestamp would be translated to a integer representing an + // offset from the unix epoch. Review comment: Would it? I thought a timestamp would be converted to a datetime. ########## File path: python/pyarrow/array.pxi ########## @@ -163,6 +163,12 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None, representation). Timezone-naive data will be implicitly interpreted as UTC. + Pandas's DateOffsets and dateutil.relativedelta.relativedetla are by + default converted as MonthDayNanoIntervalArray. relativedelta leapday's + are ignored as are all absolute fields on both objects. datetime.timedelta + also be converted to MonthDayNanoIntervalArray but require passing Review comment: "also be"? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
