pitrou commented on a change in pull request #11302:
URL: https://github.com/apache/arrow/pull/11302#discussion_r721506360



##########
File path: cpp/src/arrow/python/datetime.cc
##########
@@ -71,6 +71,19 @@ bool MatchFixedOffset(const std::string& tz, 
util::string_view* sign,
   return iter == (tz.data() + tz.size());
 }
 
+static PyTypeObject MonthDayNanoTupleType = {0, 0};
+
+static PyStructSequence_Field MonthDayNanoField[] = {
+    {"months", "The number of months in the interval"},
+    {"days", "The number days in the interval"},
+    {"nanoseconds", "The number of nanoseconds in the interval"},
+    {nullptr, nullptr}};
+
+static PyStructSequence_Desc MonthDayNanoTupleDesc = {
+    "MonthDayNanoTuple", "A interval consistent of months, days and 
nanoseconds.",

Review comment:
       What does "consistent" mean here?

##########
File path: cpp/src/arrow/python/datetime.cc
##########
@@ -270,6 +283,18 @@ static inline Status PyDate_convert_int(int64_t val, const 
DateUnit unit, int64_
   return Status::OK();
 }
 
+PyObject* NewMonthDayNanoTupleType() {
+  if (MonthDayNanoTupleType.tp_name == nullptr) {
+    if (PyStructSequence_InitType2(&MonthDayNanoTupleType, 
&MonthDayNanoTupleDesc) != 0) {
+      Py_FatalError("Could not initialize MonthDayNanoTuple");
+    }
+  }
+  Py_INCREF(&MonthDayNanoTupleType);
+  return (PyObject*)&MonthDayNanoTupleType;
+}
+
+PyTypeObject* BorrowMonthDayNanoTupleType() { return &MonthDayNanoTupleType; }

Review comment:
       This doesn't seem used?

##########
File path: python/pyarrow/scalar.pxi
##########
@@ -514,6 +514,32 @@ cdef class DurationScalar(Scalar):
             return datetime.timedelta(microseconds=sp.value // 1000)
 
 
+cdef class MonthDayNanoIntervalScalar(Scalar):
+    """
+    Concrete class for month, day, nanosecond scalars.
+    """
+
+    @property
+    def value(self):
+        """
+        Returns this value pyarrow.MonthDayNanoTuple.
+        """
+        cdef PyObject* val
+        val = GetResultValue(ARROW_TO_PYTHON.ToPrimitive(
+            (deref(self.wrapped.get()))))
+        return PyObject_to_object(val)
+
+    def as_py(self):
+        """
+        Return this value as a Pandas DateOffset instance if Pandas is present
+        otherwise as a named tuple containing months days and nanoseconds.

Review comment:
       I'm a bit concerned that this makes user code fragile: depending on 
whether Pandas is installed or not, you'll get an object with a different API.
   
   Instead, perhaps always return a `pa.MonthDayNano`, but add a 
`MonthDayNano.to_pandas()` method to produce a Pandas DateOffset?

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -68,6 +69,95 @@ using internal::MakeConverter;
 
 namespace py {
 
+enum class MonthDayNanoField { kMonths, kWeeksAndDays, kDaysOnly, kNanoseconds 
};
+
+template <MonthDayNanoField field>
+struct MonthDayNanoTraits;
+
+struct MonthDayNanoAttrData {
+  const char* name;
+  const int64_t multiplier;
+};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kMonths> {
+  using c_type = int32_t;
+  static constexpr char name[] = "months";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kMonths>::attrs[] = {
+    {"years", 1}, {"months", /*months_in_year=*/12}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays> {
+  using c_type = int32_t;
+  static constexpr char name[] = "days";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays>::attrs[] =
+    {{"weeks", 1}, {"days", /*days_in_week=*/7}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kDaysOnly> {
+  using c_type = int32_t;
+  static constexpr char name[] = "days";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kDaysOnly>::attrs[] = {
+    {"days", 1}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kNanoseconds> {
+  using c_type = int64_t;
+  static constexpr char name[] = "nanoseconds";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::attrs[] =
+    {{"hours", 1},
+     {"minutes", /*minutes_in_hours=*/60},
+     {"seconds", /*seconds_in_minute=*/60},
+     {"milliseconds", /*milliseconds_in_seconds*/ 1000},
+     {"microseconds", /*microseconds_in_millseconds=*/1000},
+     {"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
+     {nullptr, 0}};
+
+template <MonthDayNanoField field>
+struct PopulateMonthDayNano {
+  using Traits = MonthDayNanoTraits<field>;
+  inline static Status Field(PyObject* obj,
+                             typename MonthDayNanoTraits<field>::c_type* out,
+                             bool* found_attrs) {
+    *out = 0;
+    for (const MonthDayNanoAttrData* attr = &Traits::attrs[0]; 
attr->multiplier != 0;
+         ++attr) {
+      if (attr->multiplier != 1 &&
+          ::arrow::internal::MultiplyWithOverflow(
+              static_cast<typename Traits::c_type>(attr->multiplier), *out, 
out)) {
+        return Status::Invalid("Overflow on: ", (attr - 1)->name);
+      }
+      if (PyObject_HasAttrString(obj, attr->name)) {
+        OwnedRef field_value(PyObject_GetAttrString(obj, attr->name));

Review comment:
       If you want to avoid a double hash lookup, you can simply call 
`PyObject_GetAttrString` and catch the `AttributeError` on failure.

##########
File path: cpp/src/arrow/python/datetime.cc
##########
@@ -450,6 +475,19 @@ Result<std::string> TzinfoToString(PyObject* tzinfo) {
   return PyTZInfo_utcoffset_hhmm(tzinfo);
 }
 
+Result<PyObject*> MonthDayNanoIntervalToNamedTuple(

Review comment:
       Since NULL is return an error, you don't need to wrap the return value 
in a `Result<>`.

##########
File path: cpp/src/arrow/python/CMakeLists.txt
##########
@@ -28,6 +28,7 @@ add_dependencies(arrow_python-all arrow_python 
arrow_python-tests)
 
 set(ARROW_PYTHON_SRCS
     arrow_to_pandas.cc
+    arrow_to_python.cc

Review comment:
       Looks like you forgot to add this file?

##########
File path: python/pyarrow/tests/test_array.py
##########
@@ -2162,6 +2163,101 @@ def test_array_from_numpy_ascii():
     assert arrow_arr.equals(expected)
 
 
[email protected]

Review comment:
       "nopandas" is for tests that fail if Pandas is installed, I think.

##########
File path: python/pyarrow/array.pxi
##########
@@ -1508,6 +1508,30 @@ cdef class DurationArray(NumericArray):
     Concrete class for Arrow arrays of duration data type.
     """
 
+cdef class MonthDayNanoIntervalArray(Array):

Review comment:
       Nit, but please let's remain consistent about spacing between classes.

##########
File path: python/pyarrow/lib.pyx
##########
@@ -37,6 +37,10 @@ arrow_init_numpy()
 # (used from some of our C++ code, see e.g. ARROW-5260)
 import_pyarrow()
 
+cdef libarrow.ArrowToPython ARROW_TO_PYTHON
+
+MonthDayNanoTuple = NewMonthDayNanoTupleType()

Review comment:
       Nit, but I would simply call it `MonthDayNano`.

##########
File path: python/pyarrow/array.pxi
##########
@@ -1508,6 +1508,30 @@ cdef class DurationArray(NumericArray):
     Concrete class for Arrow arrays of duration data type.
     """
 
+cdef class MonthDayNanoIntervalArray(Array):
+    """
+    Concrete class for Arrow arrays of interval[MonthDayNano] type.
+    """
+
+    def to_pylist(self):
+        """
+        Convert to a list of native Python objects.
+
+        is installed the objects will be
+        pd.tseries.offsets.DateOffset objects.  Otherwise they are
+        pyarrow.MonthDayNanoTuple objects.

Review comment:
       Same remark as for the corresponding scalar class. Also, it seems the 
docstring is a bit garbled?

##########
File path: python/pyarrow/lib.pyx
##########
@@ -37,6 +37,10 @@ arrow_init_numpy()
 # (used from some of our C++ code, see e.g. ARROW-5260)
 import_pyarrow()
 
+cdef libarrow.ArrowToPython ARROW_TO_PYTHON

Review comment:
       This seems rather cryptic. What does this do? Is this a globally defined 
converter?
   (is initialization costly?)

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -68,6 +69,95 @@ using internal::MakeConverter;
 
 namespace py {
 
+enum class MonthDayNanoField { kMonths, kWeeksAndDays, kDaysOnly, kNanoseconds 
};

Review comment:
       Since we are at it, can you perhaps enclose all private APIs in the 
anonymous namespace?

##########
File path: python/pyarrow/tests/test_array.py
##########
@@ -2162,6 +2163,101 @@ def test_array_from_numpy_ascii():
     assert arrow_arr.equals(expected)
 
 
[email protected]
+def test_interval_array_from_timedelta():
+    data = [
+        None,
+        datetime.timedelta(days=1, seconds=1, microseconds=1,
+                           milliseconds=1, minutes=1, hours=1, weeks=1)]
+
+    # From timedelta (explicit type required)
+    arr = pa.array(data, pa.month_day_nano_interval())
+    assert isinstance(arr, pa.MonthDayNanoIntervalArray)
+    assert arr.type == pa.month_day_nano_interval()
+    expected_list = [
+        None,
+        pa.MonthDayNanoTuple([0, 8,

Review comment:
       Can you test with a non-zero number of months?

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -68,6 +69,95 @@ using internal::MakeConverter;
 
 namespace py {
 
+enum class MonthDayNanoField { kMonths, kWeeksAndDays, kDaysOnly, kNanoseconds 
};
+
+template <MonthDayNanoField field>
+struct MonthDayNanoTraits;
+
+struct MonthDayNanoAttrData {
+  const char* name;
+  const int64_t multiplier;
+};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kMonths> {
+  using c_type = int32_t;
+  static constexpr char name[] = "months";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kMonths>::attrs[] = {
+    {"years", 1}, {"months", /*months_in_year=*/12}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays> {
+  using c_type = int32_t;
+  static constexpr char name[] = "days";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays>::attrs[] =
+    {{"weeks", 1}, {"days", /*days_in_week=*/7}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kDaysOnly> {
+  using c_type = int32_t;
+  static constexpr char name[] = "days";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kDaysOnly>::attrs[] = {
+    {"days", 1}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kNanoseconds> {
+  using c_type = int64_t;
+  static constexpr char name[] = "nanoseconds";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::attrs[] =
+    {{"hours", 1},
+     {"minutes", /*minutes_in_hours=*/60},
+     {"seconds", /*seconds_in_minute=*/60},
+     {"milliseconds", /*milliseconds_in_seconds*/ 1000},
+     {"microseconds", /*microseconds_in_millseconds=*/1000},
+     {"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
+     {nullptr, 0}};
+
+template <MonthDayNanoField field>
+struct PopulateMonthDayNano {
+  using Traits = MonthDayNanoTraits<field>;

Review comment:
       Add a `using c_type = ...` as well?

##########
File path: cpp/src/arrow/python/python_to_arrow.cc
##########
@@ -68,6 +69,95 @@ using internal::MakeConverter;
 
 namespace py {
 
+enum class MonthDayNanoField { kMonths, kWeeksAndDays, kDaysOnly, kNanoseconds 
};
+
+template <MonthDayNanoField field>
+struct MonthDayNanoTraits;
+
+struct MonthDayNanoAttrData {
+  const char* name;
+  const int64_t multiplier;
+};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kMonths> {
+  using c_type = int32_t;
+  static constexpr char name[] = "months";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kMonths>::attrs[] = {
+    {"years", 1}, {"months", /*months_in_year=*/12}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays> {
+  using c_type = int32_t;
+  static constexpr char name[] = "days";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays>::attrs[] =
+    {{"weeks", 1}, {"days", /*days_in_week=*/7}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kDaysOnly> {
+  using c_type = int32_t;
+  static constexpr char name[] = "days";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kDaysOnly>::attrs[] = {
+    {"days", 1}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kNanoseconds> {
+  using c_type = int64_t;
+  static constexpr char name[] = "nanoseconds";
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::attrs[] =
+    {{"hours", 1},
+     {"minutes", /*minutes_in_hours=*/60},
+     {"seconds", /*seconds_in_minute=*/60},
+     {"milliseconds", /*milliseconds_in_seconds*/ 1000},
+     {"microseconds", /*microseconds_in_millseconds=*/1000},
+     {"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
+     {nullptr, 0}};
+
+template <MonthDayNanoField field>
+struct PopulateMonthDayNano {
+  using Traits = MonthDayNanoTraits<field>;
+  inline static Status Field(PyObject* obj,
+                             typename MonthDayNanoTraits<field>::c_type* out,
+                             bool* found_attrs) {
+    *out = 0;
+    for (const MonthDayNanoAttrData* attr = &Traits::attrs[0]; 
attr->multiplier != 0;
+         ++attr) {
+      if (attr->multiplier != 1 &&
+          ::arrow::internal::MultiplyWithOverflow(
+              static_cast<typename Traits::c_type>(attr->multiplier), *out, 
out)) {
+        return Status::Invalid("Overflow on: ", (attr - 1)->name);
+      }
+      if (PyObject_HasAttrString(obj, attr->name)) {
+        OwnedRef field_value(PyObject_GetAttrString(obj, attr->name));
+        RETURN_IF_PYERROR();
+        *found_attrs = true;
+        if (field_value.obj() == Py_None) {

Review comment:
       What does `None` mean in this context?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to