pitrou commented on code in PR #14753:
URL: https://github.com/apache/arrow/pull/14753#discussion_r1047661511
##########
cpp/src/arrow/compute/kernels/vector_sort_internal.h:
##########
@@ -456,6 +458,402 @@ Status SortChunkedArray(ExecContext* ctx, uint64_t*
indices_begin, uint64_t* ind
const ChunkedArray& values, SortOrder sort_order,
NullPlacement null_placement);
+// ----------------------------------------------------------------------
+// Helpers for Sort/SelectK implementations
+
+struct SortField {
+ int field_index;
+ SortOrder order;
+};
+
+inline Status CheckNonNested(const FieldRef& ref) {
+ if (ref.IsNested()) {
+ return Status::KeyError("Nested keys not supported for SortKeys");
+ }
+ return Status::OK();
+}
+
+template <typename T>
+Result<T> PrependInvalidColumn(Result<T> res) {
+ if (res.ok()) return res;
+ return res.status().WithMessage("Invalid sort key column: ",
res.status().message());
+}
+
+// Return the field indices of the sort keys, deduplicating them along the way
+inline Result<std::vector<SortField>> FindSortKeys(
+ const Schema& schema, const std::vector<SortKey>& sort_keys) {
+ std::vector<SortField> fields;
+ std::unordered_set<int> seen;
+ fields.reserve(sort_keys.size());
+ seen.reserve(sort_keys.size());
+
+ for (const auto& sort_key : sort_keys) {
+ RETURN_NOT_OK(CheckNonNested(sort_key.target));
+
+ ARROW_ASSIGN_OR_RAISE(auto match,
+
PrependInvalidColumn(sort_key.target.FindOne(schema)));
+ if (seen.insert(match[0]).second) {
+ fields.push_back({match[0], sort_key.order});
+ }
+ }
+ return fields;
+}
+
+template <typename ResolvedSortKey, typename ResolvedSortKeyFactory>
+Result<std::vector<ResolvedSortKey>> ResolveSortKeys(
+ const Schema& schema, const std::vector<SortKey>& sort_keys,
+ ResolvedSortKeyFactory&& factory) {
+ ARROW_ASSIGN_OR_RAISE(const auto fields, FindSortKeys(schema, sort_keys));
+ std::vector<ResolvedSortKey> resolved;
+ resolved.reserve(fields.size());
+ std::transform(fields.begin(), fields.end(), std::back_inserter(resolved),
factory);
+ return resolved;
+}
+
+template <typename ResolvedSortKey, typename TableOrBatch>
+Result<std::vector<ResolvedSortKey>> ResolveSortKeys(
+ const TableOrBatch& table_or_batch, const std::vector<SortKey>& sort_keys)
{
+ return ResolveSortKeys<ResolvedSortKey>(
+ *table_or_batch.schema(), sort_keys, [&](const SortField& f) {
+ return ResolvedSortKey{table_or_batch.column(f.field_index), f.order};
+ });
+}
+
+// Returns nullptr if no column matching `ref` is found, or if the FieldRef is
+// a nested reference.
+inline std::shared_ptr<ChunkedArray> GetTableColumn(const Table& table,
Review Comment:
I think you could just do it in this PR.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]