westonpace commented on a change in pull request #11405:
URL: https://github.com/apache/arrow/pull/11405#discussion_r728391288
##########
File path: r/R/arrow-datum.R
##########
@@ -232,6 +232,7 @@ is.sliceable <- function(i) {
is.numeric(i) &&
length(i) > 0 &&
all(i > 0) &&
+ i[1] <= i[length(i)] &&
Review comment:
I assume this is just some improvement you are adding on the side but
given it's unrelated I figured I'd point it out just in case you didn't mean to
include it.
##########
File path: r/R/query-engine.R
##########
@@ -194,29 +194,41 @@ ExecPlan <- R6Class("ExecPlan",
},
Run = function(node) {
assert_is(node, "ExecNode")
- # TODO (ARROW-12763): pass head/tail to ExecPlan_run so we can maybe TopK
- out <- ExecPlan_run(self, node, node$sort %||% list())
- if (is.null(node$sort)) {
+ # Sorting and head/tail (if sorted) are handled in the SinkNode,
+ # created in ExecPlan_run
+ sorting <- node$sort %||% list()
+ select_k <- node$head %||% -1L
+ has_sorting <- length(sorting) > 0
+ if (has_sorting) {
+ if (!is.null(node$tail)) {
+ # Reverse the sort order and take the top K, then after we'll reverse
+ # the resulting rows so that it is ordered as expected
+ sorting$orders <- !sorting$orders
+ select_k <- node$tail
+ }
+ sorting$orders <- as.integer(sorting$orders)
+ }
+
+ out <- ExecPlan_run(self, node, sorting, select_k)
+
+ if (!has_sorting) {
# Since ExecPlans don't scan in deterministic order, head/tail are both
# essentially taking a random slice from somewhere in the dataset.
# And since the head() implementation is way more efficient than
tail(),
# just use it to take the random slice
slice_size <- node$head %||% node$tail
if (!is.null(slice_size)) {
+ # TODO (ARROW-14289): make the head methods return RBR not Table
out <- head(out, slice_size)
}
- # TODO (ARROW-12763): delete these else cases because they'll be
handled
- # with SelectK
- } else if (!is.null(node$head)) {
- # These methods are on RecordBatchReader (but return Table)
- # TODO (ARROW-14289): make the head/tail methods return RBR not Table
- out <- head(out, node$head)
- # We can now tell `self` to StopProducing: we already have
- # everything we need for the head
- self$Stop()
+ # Can we now tell `self$Stop()` to StopProducing? We already have
+ # everything we need for the head (but it seems to segfault)
Review comment:
Can we add a JIRA for this? It would be good to not lose track of it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]