jorisvandenbossche commented on a change in pull request #11147:
URL: https://github.com/apache/arrow/pull/11147#discussion_r712934181
##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1035,8 +1022,7 @@ class StrptimeOptions(_StrptimeOptions):
cdef class _StrftimeOptions(FunctionOptions):
def _set_options(self, format, locale):
self.wrapped.reset(
- new CStrftimeOptions(tobytes(format), tobytes(locale))
- )
+ new CStrftimeOptions(tobytes(format), tobytes(locale)))
Review comment:
(another styling nit: note that some formatters (eg black) will actually
prefer how it was before)
##########
File path: python/pyarrow/_compute.pyx
##########
@@ -904,59 +885,71 @@ class TakeOptions(_TakeOptions):
cdef class _PartitionNthOptions(FunctionOptions):
- def _set_options(self, int64_t pivot):
+ def _set_options(self, pivot):
self.wrapped.reset(new CPartitionNthOptions(pivot))
class PartitionNthOptions(_PartitionNthOptions):
- def __init__(self, int64_t pivot):
+ def __init__(self, pivot):
self._set_options(pivot)
cdef class _MakeStructOptions(FunctionOptions):
- def _set_options(self, field_names):
+ def _set_options(self, field_names, field_nullability, field_metadata):
cdef:
vector[c_string] c_field_names
- for n in field_names:
- c_field_names.push_back(tobytes(n))
- self.wrapped.reset(new CMakeStructOptions(c_field_names))
+ vector[shared_ptr[const CKeyValueMetadata]] c_field_metadata
+ for name in field_names:
+ c_field_names.push_back(tobytes(name))
+ for metadata in field_metadata:
+ c_field_metadata.push_back(pyarrow_unwrap_metadata(metadata))
+ self.wrapped.reset(
+ new CMakeStructOptions(c_field_names,
+ field_nullability,
+ c_field_metadata))
class MakeStructOptions(_MakeStructOptions):
- def __init__(self, field_names):
- self._set_options(field_names)
+ def __init__(self, field_names, field_nullability=None,
Review comment:
```suggestion
def __init__(self, field_names, *, field_nullability=None,
```
?
##########
File path: python/pyarrow/compute.py
##########
@@ -656,8 +673,13 @@ def top_k_unstable(values, k, sort_keys=None,
memory_pool=None):
Parameters
----------
values : Array, ChunkedArray, RecordBatch, or Table
- k : The number of `k` elements to keep.
- sort_keys : Column key names to order by when input is table-like data.
+ Data to sort and get top indices from.
+ k : int
+ The number of `k` elements to keep.
+ sort_keys : List or tuple
Review comment:
```suggestion
sort_keys : list
```
Or "list-like" in you prefer. Since it supports any sequence, I don't think
it's needed to specifically mention "tuple" in addition to list.
(reason this catched my attention is actually because I first misread it as
"list of tuple", while `sort_keys` in general follow that structure, while here
it should be just be a list of strings)
##########
File path: python/pyarrow/compute.py
##########
@@ -697,8 +719,13 @@ def bottom_k_unstable(values, k, sort_keys=None,
memory_pool=None):
Parameters
----------
values : Array, ChunkedArray, RecordBatch, or Table
- k : The number of `k` elements to keep.
- sort_keys : Column key names to order by when input is table-like data.
+ Data to sort and get bottom indices from.
+ k : int
+ The number of `k` elements to keep.
+ sort_keys : List or tuple
Review comment:
```suggestion
sort_keys : list
```
##########
File path: python/pyarrow/_compute.pyx
##########
@@ -327,23 +317,17 @@ cdef class Function(_Weakrefable):
CExecContext c_exec_ctx = CExecContext(pool)
vector[CDatum] c_args
CDatum result
-
_pack_compute_args(args, &c_args)
-
if options is not None:
c_options = options.get_options()
-
Review comment:
A small nit (and you don't have to change it back, but just noting
because you are doing quite some styling-only changes), but personally I don't
think removing all those whitespace lines necessarily improves readability ..
##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1131,122 +1106,97 @@ cdef class _SplitPatternOptions(FunctionOptions):
class SplitPatternOptions(_SplitPatternOptions):
- def __init__(self, *, pattern, max_splits=-1, reverse=False):
+ def __init__(self, pattern, *, max_splits=-1, reverse=False):
self._set_options(pattern, max_splits, reverse)
+cdef CSortOrder unwrap_sort_order(order) except *:
+ # TODO: Cython 0.29.x does not supports C++ scoped enums as dictionary
+ # values because they do not resolve to integers, so we use if-elses.
+ # Cython 3 supports C++ scoped enums resolving to integers, refer to
+ #
https://cython.readthedocs.io/en/latest/src/userguide/wrapping_CPlusPlus.html#scoped-enumerations
+ if order == "ascending":
+ return CSortOrder_Ascending
+ elif order == "descending":
+ return CSortOrder_Descending
+ _raise_invalid_function_option(order, "sort order")
+
+
cdef class _ArraySortOptions(FunctionOptions):
def _set_options(self, order):
- if order == "ascending":
- self.wrapped.reset(new CArraySortOptions(CSortOrder_Ascending))
- elif order == "descending":
- self.wrapped.reset(new CArraySortOptions(CSortOrder_Descending))
- else:
- raise ValueError(
- "{!r} is not a valid order".format(order)
- )
+ self.wrapped.reset(new CArraySortOptions(unwrap_sort_order(order)))
class ArraySortOptions(_ArraySortOptions):
- def __init__(self, *, order='ascending'):
+ def __init__(self, order="ascending"):
self._set_options(order)
cdef class _SortOptions(FunctionOptions):
def _set_options(self, sort_keys):
- cdef:
- vector[CSortKey] c_sort_keys
- c_string c_name
- CSortOrder c_order
-
+ cdef vector[CSortKey] c_sort_keys
for name, order in sort_keys:
- if order == "ascending":
- c_order = CSortOrder_Ascending
- elif order == "descending":
- c_order = CSortOrder_Descending
- else:
- raise ValueError(
- "{!r} is not a valid order".format(order)
- )
- c_name = tobytes(name)
- c_sort_keys.push_back(CSortKey(c_name, c_order))
-
+ c_sort_keys.push_back(CSortKey(tobytes(name),
+ unwrap_sort_order(order)))
self.wrapped.reset(new CSortOptions(c_sort_keys))
class SortOptions(_SortOptions):
- def __init__(self, sort_keys=None):
- if sort_keys is None:
- sort_keys = []
+ def __init__(self, sort_keys):
self._set_options(sort_keys)
cdef class _SelectKOptions(FunctionOptions):
def _set_options(self, k, sort_keys):
- cdef:
- c_string c_name
- vector[CSortKey] c_sort_keys
- CSortOrder c_order
-
+ cdef vector[CSortKey] c_sort_keys
for name, order in sort_keys:
- if order == "ascending":
- c_order = CSortOrder_Ascending
- elif order == "descending":
- c_order = CSortOrder_Descending
- else:
- raise ValueError(
- "{!r} is not a valid order".format(order)
- )
- c_name = tobytes(name)
- c_sort_keys.push_back(CSortKey(c_name, c_order))
-
+ c_sort_keys.push_back(CSortKey(tobytes(name),
+ unwrap_sort_order(order)))
self.wrapped.reset(new CSelectKOptions(k, c_sort_keys))
class SelectKOptions(_SelectKOptions):
- def __init__(self, k, sort_keys=None):
- if sort_keys is None:
- sort_keys = []
+ def __init__(self, k, sort_keys):
self._set_options(k, sort_keys)
cdef class _QuantileOptions(FunctionOptions):
+ _interp_map = {
+ "linear": CQuantileInterp_LINEAR,
+ "lower": CQuantileInterp_LOWER,
+ "higher": CQuantileInterp_HIGHER,
+ "nearest": CQuantileInterp_NEAREST,
+ "midpoint": CQuantileInterp_MIDPOINT,
+ }
+
def _set_options(self, quantiles, interp, skip_nulls, min_count):
- interp_dict = {
- 'linear': CQuantileInterp_LINEAR,
- 'lower': CQuantileInterp_LOWER,
- 'higher': CQuantileInterp_HIGHER,
- 'nearest': CQuantileInterp_NEAREST,
- 'midpoint': CQuantileInterp_MIDPOINT,
- }
- if interp not in interp_dict:
- raise ValueError(
- '{!r} is not a valid interpolation'
- .format(interp))
- self.wrapped.reset(
- new CQuantileOptions(quantiles, interp_dict[interp],
- skip_nulls, min_count))
+ try:
+ self.wrapped.reset(
+ new CQuantileOptions(quantiles, self._interp_map[interp],
+ skip_nulls, min_count))
+ except KeyError:
+ _raise_invalid_function_option(interp, "quantile interpolation")
class QuantileOptions(_QuantileOptions):
- def __init__(self, *, q=0.5, interpolation='linear',
- skip_nulls=True, min_count=0):
+ def __init__(self, *, q=0.5, interpolation="linear", skip_nulls=True,
Review comment:
```suggestion
def __init__(self, q=0.5, *, interpolation="linear", skip_nulls=True,
```
?
(since `q` is not a very descriptive keyword name anyway, it's not
necessarily useful to have to spell it out)
##########
File path: python/pyarrow/_compute.pyx
##########
@@ -569,6 +543,10 @@ cdef class FunctionOptions(_Weakrefable):
return self.get_options().Equals(deref(other.get_options()))
+def _raise_invalid_function_option(value, description, *, error="keyword"):
Review comment:
Or could also leave out the final word entirely: `"foo" is not a valid
count mode`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]