zhidongqu-db commented on code in PR #55629:
URL: https://github.com/apache/spark/pull/55629#discussion_r3183860835
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:
##########
@@ -2420,3 +2420,58 @@ object AsOfJoin {
}
}
}
+
+object NearestByJoin {
+ /** Upper bound on `numResults`. Mirrors the K-overload limit of
`MaxMinByK`. */
+ val MaxNumResults: Int = 100000
+}
+
+/**
+ * A logical plan for a nearest-by top-K ranking join. For each row on the
left side it returns
+ * up to `numResults` rows from the right side ordered by `rankingExpression`:
+ * - `NearestByDistance`: smallest values of `rankingExpression` first.
+ * - `NearestBySimilarity`: largest values of `rankingExpression` first.
+ *
+ * The `approx` field records the user's APPROX/EXACT choice from the SPIP.
Today both modes
+ * use the same brute-force rewrite. The flag is preserved on the logical plan
so future
+ * indexed approximate-nearest-neighbor strategies can fire only when `approx
= true`,
+ * leaving EXACT queries unaffected. See the SPIP linked from SPARK-56395.
+ */
+case class NearestByJoin(
Review Comment:
+1
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]