westonpace commented on code in PR #13700:
URL: https://github.com/apache/arrow/pull/13700#discussion_r936075098


##########
cpp/src/arrow/compute/api_vector.h:
##########
@@ -174,6 +174,23 @@ class ARROW_EXPORT SelectKOptions : public FunctionOptions 
{
   std::vector<SortKey> sort_keys;
 };
 
+/// \brief Fetch options
+class ARROW_EXPORT FetchOptions : public FunctionOptions {

Review Comment:
   This shouldn't be a subclass of `FunctionOptions`.  `FunctionOptions` is 
only needed for kernel functions.  Fetch is an exec node.  Just take all these 
fields and put them in `FetchSinkNodeOptions` directly.



##########
cpp/src/arrow/compute/api_vector.cc:
##########
@@ -157,6 +157,11 @@ static auto kPartitionNthOptionsType = 
GetFunctionOptionsType<PartitionNthOption
 static auto kSelectKOptionsType = GetFunctionOptionsType<SelectKOptions>(
     DataMember("k", &SelectKOptions::k),
     DataMember("sort_keys", &SelectKOptions::sort_keys));
+static auto kFetchOptionsType = GetFunctionOptionsType<FetchOptions>(

Review Comment:
   You can remove this whole block once fetch options is no longer a child of 
`FunctionOptions`.



##########
cpp/src/arrow/compute/api_vector.h:
##########
@@ -174,6 +174,23 @@ class ARROW_EXPORT SelectKOptions : public FunctionOptions 
{
   std::vector<SortKey> sort_keys;
 };
 
+/// \brief Fetch options
+class ARROW_EXPORT FetchOptions : public FunctionOptions {
+ public:
+  explicit FetchOptions(int64_t offset = 0, int64_t count = 0,
+                        std::vector<SortKey> sort_keys = {}, bool sort_first = 
true);
+  static constexpr char const kTypeName[] = "FetchOptions";
+
+  /// The number of `offset` elements to skip.
+  int64_t offset;
+  /// The number of `count` elements to select.
+  int64_t count;

Review Comment:
   ```suggestion
     /// The number of rows to skip.
     int64_t offset;
     /// The number of rows to select.
     int64_t count;
   ```



##########
cpp/src/arrow/compute/api_vector.h:
##########
@@ -174,6 +174,23 @@ class ARROW_EXPORT SelectKOptions : public FunctionOptions 
{
   std::vector<SortKey> sort_keys;
 };
 
+/// \brief Fetch options
+class ARROW_EXPORT FetchOptions : public FunctionOptions {
+ public:
+  explicit FetchOptions(int64_t offset = 0, int64_t count = 0,
+                        std::vector<SortKey> sort_keys = {}, bool sort_first = 
true);
+  static constexpr char const kTypeName[] = "FetchOptions";
+
+  /// The number of `offset` elements to skip.
+  int64_t offset;
+  /// The number of `count` elements to select.
+  int64_t count;
+  /// Column key(s) to order by and how to order by these sort keys.

Review Comment:
   Do I have to specify sort keys?



##########
cpp/src/arrow/compute/exec/options.h:
##########
@@ -430,6 +430,23 @@ class ARROW_EXPORT SelectKSinkNodeOptions : public 
SinkNodeOptions {
   SelectKOptions select_k_options;
 };
 
+/// \brief Make a node which selects a range of rows passed through it
+///
+/// All batches pushed to this node will be accumulated, then selected, by the 
given
+/// fields. Then sorted batches will be forwarded to the generator in sorted 
order and
+/// select the rows with an offset and a limit.
+class ARROW_EXPORT FetchSinkNodeOptions : public SinkNodeOptions {

Review Comment:
   Can we specify sort options (e.g. nulls first or last?)



##########
cpp/src/arrow/compute/exec/options.h:
##########
@@ -430,6 +430,23 @@ class ARROW_EXPORT SelectKSinkNodeOptions : public 
SinkNodeOptions {
   SelectKOptions select_k_options;
 };
 
+/// \brief Make a node which selects a range of rows passed through it
+///
+/// All batches pushed to this node will be accumulated, then selected, by the 
given

Review Comment:
   We already have 
[ARROW-14202](https://issues.apache.org/jira/browse/ARROW-14202) which is 
pretty similar to ARROW-17196.  I'm not sure if a new JIRA is needed. CC 
@ArianaVillegas who has done some initial thinking about ARROW-14202 and was 
thinking about implementing it at  some point.



##########
cpp/src/arrow/compute/exec/options.h:
##########
@@ -430,6 +430,23 @@ class ARROW_EXPORT SelectKSinkNodeOptions : public 
SinkNodeOptions {
   SelectKOptions select_k_options;
 };
 
+/// \brief Make a node which selects a range of rows passed through it
+///
+/// All batches pushed to this node will be accumulated, then selected, by the 
given
+/// fields. Then sorted batches will be forwarded to the generator in sorted 
order and

Review Comment:
   @nealrichardson this isn't much different than the hacky end-of-plan-only 
top-k that we have today (and rely on for tpc-h testing).  We're mostly just 
extending that a little so we can adapt that to Substrait.



##########
cpp/src/arrow/compute/exec/options.h:
##########
@@ -430,6 +430,23 @@ class ARROW_EXPORT SelectKSinkNodeOptions : public 
SinkNodeOptions {
   SelectKOptions select_k_options;
 };
 
+/// \brief Make a node which selects a range of rows passed through it
+///
+/// All batches pushed to this node will be accumulated, then selected, by the 
given
+/// fields. Then sorted batches will be forwarded to the generator in sorted 
order and

Review Comment:
   Do we still need TopK now that there is a fetch node?  Couldn't `TopK` just 
be a fetch node with `offset == 0`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to