pitrou commented on a change in pull request #11147:
URL: https://github.com/apache/arrow/pull/11147#discussion_r709915241



##########
File path: python/pyarrow/_compute.pyx
##########
@@ -284,18 +283,9 @@ cdef class Function(_Weakrefable):
         The function kind.
         """
         cdef FunctionKind c_kind = self.base_func.kind()
-        if c_kind == FunctionKind_SCALAR:
-            return 'scalar'
-        elif c_kind == FunctionKind_VECTOR:
-            return 'vector'
-        elif c_kind == FunctionKind_SCALAR_AGGREGATE:
-            return 'scalar_aggregate'
-        elif c_kind == FunctionKind_HASH_AGGREGATE:
-            return 'hash_aggregate'
-        elif c_kind == FunctionKind_META:
-            return 'meta'
-        else:
+        if c_kind not in type(self).kind_dict:
             raise NotImplementedError("Unknown Function::Kind")
+        return type(self).kind_dict[c_kind]

Review comment:
       ```python
   try:
       return self.kind_dict[c_kind]
   except KeyError:
       raise NotImplementedError("Unknown Function::Kind")
   ```

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -792,7 +775,7 @@ cdef class _PadOptions(FunctionOptions):
 
 
 class PadOptions(_PadOptions):
-    def __init__(self, width, padding=' '):
+    def __init__(self, width, *, padding=" "):

Review comment:
       Similarly, this doesn't seem desirable. I don't think there's any 
ambiguity as to what the second argument is.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -690,99 +678,94 @@ class CastOptions(_CastOptions):
 
 
 cdef class _ElementWiseAggregateOptions(FunctionOptions):
-    def _set_options(self, bint skip_nulls):
+    def _set_options(self, skip_nulls):
         self.wrapped.reset(new CElementWiseAggregateOptions(skip_nulls))
 
 
 class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
-    def __init__(self, bint skip_nulls=True):
+    def __init__(self, *, skip_nulls=True):
         self._set_options(skip_nulls)
 
 
 cdef CRoundMode unwrap_round_mode(round_mode) except *:
-    if round_mode == 'down':
+    # Explicit if-else since Cython does not supports storing C++ enum classes
+    # as dictionary values because they do not resolve to integers.
+    if round_mode == "down":
         return CRoundMode_DOWN
-    elif round_mode == 'up':
+    elif round_mode == "up":
         return CRoundMode_UP
-    elif round_mode == 'towards_zero':
+    elif round_mode == "towards_zero":
         return CRoundMode_TOWARDS_ZERO
-    elif round_mode == 'towards_infinity':
+    elif round_mode == "towards_infinity":
         return CRoundMode_TOWARDS_INFINITY
-    elif round_mode == 'half_down':
+    elif round_mode == "half_down":
         return CRoundMode_HALF_DOWN
-    elif round_mode == 'half_up':
+    elif round_mode == "half_up":
         return CRoundMode_HALF_UP
-    elif round_mode == 'half_towards_zero':
+    elif round_mode == "half_towards_zero":
         return CRoundMode_HALF_TOWARDS_ZERO
-    elif round_mode == 'half_towards_infinity':
+    elif round_mode == "half_towards_infinity":
         return CRoundMode_HALF_TOWARDS_INFINITY
-    elif round_mode == 'half_to_even':
+    elif round_mode == "half_to_even":
         return CRoundMode_HALF_TO_EVEN
-    elif round_mode == 'half_to_odd':
+    elif round_mode == "half_to_odd":
         return CRoundMode_HALF_TO_ODD
     else:
-        raise ValueError('"{}" is not a valid round mode'.format(round_mode))
+        raise ValueError(f"\"{round_mode}\" is not a valid 'round mode'")
 
 
 cdef class _RoundOptions(FunctionOptions):
-    def _set_options(self, int64_t ndigits, round_mode):
-        cdef:
-            CRoundMode c_round_mode = CRoundMode_HALF_TO_EVEN
-        c_round_mode = unwrap_round_mode(round_mode)
-        self.wrapped.reset(new CRoundOptions(ndigits, c_round_mode))
+    def _set_options(self, ndigits, round_mode):
+        self.wrapped.reset(new CRoundOptions(ndigits,
+                                             unwrap_round_mode(round_mode)))
 
 
 class RoundOptions(_RoundOptions):
-    def __init__(self, ndigits=0, round_mode='half_to_even'):
+    def __init__(self, *, ndigits=0, round_mode="half_to_even"):

Review comment:
       This change doesn't look desirable to me. The Python build-in 
[round](https://docs.python.org/3/library/functions.html#round) function takes  
positional arguments.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -236,10 +226,19 @@ cdef class Function(_Weakrefable):
 
     * "meta" functions dispatch to other functions.
     """
+
     cdef:
         shared_ptr[CFunction] sp_func
         CFunction* base_func
 
+    kind_dict = {

Review comment:
       Call this `_kind_dict`? This isn't a public API.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -844,53 +823,53 @@ class ExtractRegexOptions(_ExtractRegexOptions):
 
 
 cdef class _SliceOptions(FunctionOptions):
-    def _set_options(self, int64_t start, int64_t stop, int64_t step):
+    def _set_options(self, start, stop, step):
         self.wrapped.reset(new CSliceOptions(start, stop, step))
 
 
 class SliceOptions(_SliceOptions):
-    def __init__(self, int64_t start, int64_t stop=sys.maxsize,
-                 int64_t step=1):
+    def __init__(self, start, *, stop=sys.maxsize, step=1):

Review comment:
       Same here. Why would `stop` be a keyword-only parameter?

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -690,99 +678,94 @@ class CastOptions(_CastOptions):
 
 
 cdef class _ElementWiseAggregateOptions(FunctionOptions):
-    def _set_options(self, bint skip_nulls):
+    def _set_options(self, skip_nulls):
         self.wrapped.reset(new CElementWiseAggregateOptions(skip_nulls))
 
 
 class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
-    def __init__(self, bint skip_nulls=True):
+    def __init__(self, *, skip_nulls=True):
         self._set_options(skip_nulls)
 
 
 cdef CRoundMode unwrap_round_mode(round_mode) except *:
-    if round_mode == 'down':
+    # Explicit if-else since Cython does not supports storing C++ enum classes
+    # as dictionary values because they do not resolve to integers.
+    if round_mode == "down":
         return CRoundMode_DOWN
-    elif round_mode == 'up':
+    elif round_mode == "up":
         return CRoundMode_UP
-    elif round_mode == 'towards_zero':
+    elif round_mode == "towards_zero":
         return CRoundMode_TOWARDS_ZERO
-    elif round_mode == 'towards_infinity':
+    elif round_mode == "towards_infinity":
         return CRoundMode_TOWARDS_INFINITY
-    elif round_mode == 'half_down':
+    elif round_mode == "half_down":
         return CRoundMode_HALF_DOWN
-    elif round_mode == 'half_up':
+    elif round_mode == "half_up":
         return CRoundMode_HALF_UP
-    elif round_mode == 'half_towards_zero':
+    elif round_mode == "half_towards_zero":
         return CRoundMode_HALF_TOWARDS_ZERO
-    elif round_mode == 'half_towards_infinity':
+    elif round_mode == "half_towards_infinity":
         return CRoundMode_HALF_TOWARDS_INFINITY
-    elif round_mode == 'half_to_even':
+    elif round_mode == "half_to_even":
         return CRoundMode_HALF_TO_EVEN
-    elif round_mode == 'half_to_odd':
+    elif round_mode == "half_to_odd":
         return CRoundMode_HALF_TO_ODD
     else:
-        raise ValueError('"{}" is not a valid round mode'.format(round_mode))
+        raise ValueError(f"\"{round_mode}\" is not a valid 'round mode'")
 
 
 cdef class _RoundOptions(FunctionOptions):
-    def _set_options(self, int64_t ndigits, round_mode):
-        cdef:
-            CRoundMode c_round_mode = CRoundMode_HALF_TO_EVEN
-        c_round_mode = unwrap_round_mode(round_mode)
-        self.wrapped.reset(new CRoundOptions(ndigits, c_round_mode))
+    def _set_options(self, ndigits, round_mode):
+        self.wrapped.reset(new CRoundOptions(ndigits,
+                                             unwrap_round_mode(round_mode)))
 
 
 class RoundOptions(_RoundOptions):
-    def __init__(self, ndigits=0, round_mode='half_to_even'):
+    def __init__(self, *, ndigits=0, round_mode="half_to_even"):
         self._set_options(ndigits, round_mode)
 
 
 cdef class _RoundToMultipleOptions(FunctionOptions):
-    def _set_options(self, double multiple, round_mode):
-        cdef:
-            CRoundMode c_round_mode = CRoundMode_HALF_TO_EVEN
-        c_round_mode = unwrap_round_mode(round_mode)
-        self.wrapped.reset(new CRoundToMultipleOptions(multiple, c_round_mode))
+    def _set_options(self, multiple, round_mode):
+        self.wrapped.reset(
+            new CRoundToMultipleOptions(multiple,
+                                        unwrap_round_mode(round_mode)))
 
 
 class RoundToMultipleOptions(_RoundToMultipleOptions):
-    def __init__(self, multiple=1.0, round_mode='half_to_even'):
+    def __init__(self, *, multiple=1.0, round_mode="half_to_even"):

Review comment:
       Similarly, this change doesn't look desirable.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -980,51 +958,43 @@ cdef class _ModeOptions(FunctionOptions):
 
 
 class ModeOptions(_ModeOptions):
-    def __init__(self, n=1, skip_nulls=True, min_count=0):
+    def __init__(self, *, n=1, skip_nulls=True, min_count=0):

Review comment:
       `n` could very well be positional.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -844,53 +823,53 @@ class ExtractRegexOptions(_ExtractRegexOptions):
 
 
 cdef class _SliceOptions(FunctionOptions):
-    def _set_options(self, int64_t start, int64_t stop, int64_t step):
+    def _set_options(self, start, stop, step):
         self.wrapped.reset(new CSliceOptions(start, stop, step))
 
 
 class SliceOptions(_SliceOptions):
-    def __init__(self, int64_t start, int64_t stop=sys.maxsize,
-                 int64_t step=1):
+    def __init__(self, start, *, stop=sys.maxsize, step=1):
         self._set_options(start, stop, step)
 
 
 cdef class _FilterOptions(FunctionOptions):
+    null_selection_dict = {
+        "drop": CFilterNullSelectionBehavior_DROP,
+        "emit_null": CFilterNullSelectionBehavior_EMIT_NULL,
+    }
+
     def _set_options(self, null_selection_behavior):
-        if null_selection_behavior == 'drop':
-            self.wrapped.reset(
-                new CFilterOptions(CFilterNullSelectionBehavior_DROP))
-        elif null_selection_behavior == 'emit_null':
-            self.wrapped.reset(
-                new CFilterOptions(CFilterNullSelectionBehavior_EMIT_NULL))
-        else:
-            raise ValueError(
-                '"{}" is not a valid null_selection_behavior'
-                .format(null_selection_behavior))
+        if null_selection_behavior not in type(self).null_selection_dict:
+            raise ValueError(f"\"{null_selection_behavior}\""
+                             "is not a valid 'null selection behavior'")
+        self.wrapped.reset(
+            new CFilterOptions(
+                type(self).null_selection_dict[null_selection_behavior]))
 
 
 class FilterOptions(_FilterOptions):
-    def __init__(self, null_selection_behavior='drop'):
+    def __init__(self, *, null_selection_behavior="drop"):

Review comment:
       Having to type `null_selection_behavior` is rather annoying... 

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -844,53 +823,53 @@ class ExtractRegexOptions(_ExtractRegexOptions):
 
 
 cdef class _SliceOptions(FunctionOptions):
-    def _set_options(self, int64_t start, int64_t stop, int64_t step):
+    def _set_options(self, start, stop, step):
         self.wrapped.reset(new CSliceOptions(start, stop, step))
 
 
 class SliceOptions(_SliceOptions):
-    def __init__(self, int64_t start, int64_t stop=sys.maxsize,
-                 int64_t step=1):
+    def __init__(self, start, *, stop=sys.maxsize, step=1):
         self._set_options(start, stop, step)
 
 
 cdef class _FilterOptions(FunctionOptions):
+    null_selection_dict = {

Review comment:
       `_null_selection_dict`

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -929,34 +907,34 @@ class MakeStructOptions(_MakeStructOptions):
 
 cdef class _ScalarAggregateOptions(FunctionOptions):
     def _set_options(self, skip_nulls, min_count):
-        self.wrapped.reset(
-            new CScalarAggregateOptions(skip_nulls, min_count))
+        self.wrapped.reset(new CScalarAggregateOptions(skip_nulls, min_count))
 
 
 class ScalarAggregateOptions(_ScalarAggregateOptions):
-    def __init__(self, skip_nulls=True, min_count=1):
+    def __init__(self, *, skip_nulls=True, min_count=1):
         self._set_options(skip_nulls, min_count)
 
 
 cdef class _CountOptions(FunctionOptions):
+    mode_dict = {
+        "only_valid": CCountMode_ONLY_VALID,
+        "only_null": CCountMode_ONLY_NULL,
+        "all": CCountMode_ALL,
+    }
+
     def _set_options(self, mode):
-        if mode == 'only_valid':
-            self.wrapped.reset(new CCountOptions(CCountMode_ONLY_VALID))
-        elif mode == 'only_null':
-            self.wrapped.reset(new CCountOptions(CCountMode_ONLY_NULL))
-        elif mode == 'all':
-            self.wrapped.reset(new CCountOptions(CCountMode_ALL))
-        else:
-            raise ValueError(f'"{mode}" is not a valid mode')
+        if mode not in type(self).mode_dict:
+            raise ValueError(f"\"{mode}\" is not a valid 'count mode'")
+        self.wrapped.reset(new CCountOptions(type(self).mode_dict[mode]))
 
 
 class CountOptions(_CountOptions):
-    def __init__(self, mode='only_valid'):
+    def __init__(self, *, mode="only_valid"):

Review comment:
       Making this keyword-only doesn't seem useful, IMHO.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1131,106 +1092,83 @@ cdef class _SplitPatternOptions(FunctionOptions):
 
 
 class SplitPatternOptions(_SplitPatternOptions):
-    def __init__(self, *, pattern, max_splits=-1, reverse=False):
+    def __init__(self, pattern, *, max_splits=-1, reverse=False):
         self._set_options(pattern, max_splits, reverse)
 
 
+cdef CSortOrder unwrap_sort_order(order) except *:
+    # Explicit if-else since Cython does not supports storing C++ enum classes
+    # as dictionary values because they do not resolve to integers.
+    if order == "ascending":
+        return CSortOrder_Ascending
+    elif order == "descending":
+        return CSortOrder_Descending
+    else:
+        raise ValueError(f'"{order}" is not a valid order')
+
+
 cdef class _ArraySortOptions(FunctionOptions):
     def _set_options(self, order):
-        if order == "ascending":
-            self.wrapped.reset(new CArraySortOptions(CSortOrder_Ascending))
-        elif order == "descending":
-            self.wrapped.reset(new CArraySortOptions(CSortOrder_Descending))
-        else:
-            raise ValueError(
-                "{!r} is not a valid order".format(order)
-            )
+        self.wrapped.reset(new CArraySortOptions(unwrap_sort_order(order)))
 
 
 class ArraySortOptions(_ArraySortOptions):
-    def __init__(self, *, order='ascending'):
+    def __init__(self, *, order="ascending"):
         self._set_options(order)
 
 
 cdef class _SortOptions(FunctionOptions):
     def _set_options(self, sort_keys):
-        cdef:
-            vector[CSortKey] c_sort_keys
-            c_string c_name
-            CSortOrder c_order
-
+        cdef vector[CSortKey] c_sort_keys
         for name, order in sort_keys:
-            if order == "ascending":
-                c_order = CSortOrder_Ascending
-            elif order == "descending":
-                c_order = CSortOrder_Descending
-            else:
-                raise ValueError(
-                    "{!r} is not a valid order".format(order)
-                )
-            c_name = tobytes(name)
-            c_sort_keys.push_back(CSortKey(c_name, c_order))
-
+            c_sort_keys.push_back(CSortKey(tobytes(name),
+                                  unwrap_sort_order(order)))
         self.wrapped.reset(new CSortOptions(c_sort_keys))
 
 
 class SortOptions(_SortOptions):
-    def __init__(self, sort_keys=None):
+    def __init__(self, *, sort_keys=None):
         if sort_keys is None:
             sort_keys = []
         self._set_options(sort_keys)
 
 
 cdef class _SelectKOptions(FunctionOptions):
     def _set_options(self, k, sort_keys):
-        cdef:
-            c_string c_name
-            vector[CSortKey] c_sort_keys
-            CSortOrder c_order
-
+        cdef vector[CSortKey] c_sort_keys
         for name, order in sort_keys:
-            if order == "ascending":
-                c_order = CSortOrder_Ascending
-            elif order == "descending":
-                c_order = CSortOrder_Descending
-            else:
-                raise ValueError(
-                    "{!r} is not a valid order".format(order)
-                )
-            c_name = tobytes(name)
-            c_sort_keys.push_back(CSortKey(c_name, c_order))
-
+            c_sort_keys.push_back(CSortKey(tobytes(name),
+                                  unwrap_sort_order(order)))
         self.wrapped.reset(new CSelectKOptions(k, c_sort_keys))
 
 
 class SelectKOptions(_SelectKOptions):
-    def __init__(self, k, sort_keys=None):
+    def __init__(self, *, k=-1, sort_keys=None):

Review comment:
       Well, it should be clear that the first argument to `select_k` is the 
`k` number, no?

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1131,106 +1092,83 @@ cdef class _SplitPatternOptions(FunctionOptions):
 
 
 class SplitPatternOptions(_SplitPatternOptions):
-    def __init__(self, *, pattern, max_splits=-1, reverse=False):
+    def __init__(self, pattern, *, max_splits=-1, reverse=False):
         self._set_options(pattern, max_splits, reverse)
 
 
+cdef CSortOrder unwrap_sort_order(order) except *:
+    # Explicit if-else since Cython does not supports storing C++ enum classes
+    # as dictionary values because they do not resolve to integers.
+    if order == "ascending":
+        return CSortOrder_Ascending
+    elif order == "descending":
+        return CSortOrder_Descending
+    else:
+        raise ValueError(f'"{order}" is not a valid order')
+
+
 cdef class _ArraySortOptions(FunctionOptions):
     def _set_options(self, order):
-        if order == "ascending":
-            self.wrapped.reset(new CArraySortOptions(CSortOrder_Ascending))
-        elif order == "descending":
-            self.wrapped.reset(new CArraySortOptions(CSortOrder_Descending))
-        else:
-            raise ValueError(
-                "{!r} is not a valid order".format(order)
-            )
+        self.wrapped.reset(new CArraySortOptions(unwrap_sort_order(order)))
 
 
 class ArraySortOptions(_ArraySortOptions):
-    def __init__(self, *, order='ascending'):
+    def __init__(self, *, order="ascending"):
         self._set_options(order)
 
 
 cdef class _SortOptions(FunctionOptions):
     def _set_options(self, sort_keys):
-        cdef:
-            vector[CSortKey] c_sort_keys
-            c_string c_name
-            CSortOrder c_order
-
+        cdef vector[CSortKey] c_sort_keys
         for name, order in sort_keys:
-            if order == "ascending":
-                c_order = CSortOrder_Ascending
-            elif order == "descending":
-                c_order = CSortOrder_Descending
-            else:
-                raise ValueError(
-                    "{!r} is not a valid order".format(order)
-                )
-            c_name = tobytes(name)
-            c_sort_keys.push_back(CSortKey(c_name, c_order))
-
+            c_sort_keys.push_back(CSortKey(tobytes(name),
+                                  unwrap_sort_order(order)))
         self.wrapped.reset(new CSortOptions(c_sort_keys))
 
 
 class SortOptions(_SortOptions):
-    def __init__(self, sort_keys=None):
+    def __init__(self, *, sort_keys=None):

Review comment:
       Similarly, this doesn't seem to increase usability or readability.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to