srielau commented on code in PR #55717:
URL: https://github.com/apache/spark/pull/55717#discussion_r3196674039
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -52,6 +53,38 @@ class CatalogManager(
// TODO: create a real SYSTEM catalog to host `TempVariableManager` under
the SESSION namespace.
val tempVariableManager: TempVariableManager = new TempVariableManager
+ // Wire the path-driven kinds provider into SessionCatalog so the
function-resolution loop
+ // and the security check that blocks temp functions from shadowing builtins
read the live
+ // SQL PATH (post-`SET PATH`, with `DEFAULT_PATH` and `defaultPathOrder`
fallbacks already
+ // applied) instead of the legacy
[[SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER]] proxy.
+ // Order is every `system.builtin` / `system.session` entry on the PATH in
path order
+ // (user-catalog segments in between are skipped for this list).
+ v1SessionCatalog.sessionFunctionKindsProvider = () =>
leadingSystemFunctionKinds()
+
+ /**
+ * Walk the effective SQL PATH and collect every system function namespace
entry
+ * (`system.builtin` / `system.session`) in path order, mapped to
+ * [[SessionCatalog.SessionFunctionKind]].
+ *
+ * User-catalog entries (e.g. `spark_catalog.default`) appear on the PATH
before
+ * `system.builtin` / `system.session`; they must not hide later system
entries.
+ * Unqualified builtin/temp resolution follows this ordered kind list via
+ *
[[org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunctionWithShadowing]].
+ *
+ * When no system entries appear on the PATH (user-defined path without
builtins/session),
+ * falls back to builtin-then-session so bare names still resolve like the
legacy default.
+ */
+ private def leadingSystemFunctionKinds():
Seq[SessionCatalog.SessionFunctionKind] = {
+ val path = sqlResolutionPathEntries(currentCatalog.name(),
currentNamespace.toSeq)
+ val kinds = path.flatMap { e =>
Review Comment:
test 79
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala:
##########
@@ -113,17 +113,34 @@ class SessionCatalog(
identifier.copy(funcName = "") == SESSION_NAMESPACE_TEMPLATE
/**
- * Session function kinds in resolution order for unqualified lookups.
- * Matches [[SQLConf.sessionFunctionResolutionOrder]]: "first" (session
first),
- * "second" (default), "last" (builtin only; session tried after persistent).
+ * Provider for the ordered `system.builtin` / `system.session` entries on
the effective SQL
+ * PATH (mapped to [[SessionFunctionKind]]). Wired by
+ * [[org.apache.spark.sql.connector.catalog.CatalogManager]] after
construction so unqualified
+ * function lookups and the security check that blocks temp functions from
shadowing builtins
+ * read the live PATH (post-`SET PATH`, with [[SQLConf.DEFAULT_PATH]] and
+ * [[SQLConf.defaultPathOrder]] fallbacks already applied) instead of the
legacy
+ * [[SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER]] proxy.
+ *
+ * The default reproduces the previous "second" behavior
(builtin-then-session) for tests
+ * that construct a [[SessionCatalog]] directly without going through
[[CatalogManager]].
*/
- private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind]
= {
- conf.sessionFunctionResolutionOrder match {
- case "first" => Seq(Temp, Builtin)
- case "last" => Seq(Builtin)
- case _ => Seq(Builtin, Temp) // "second" (default)
- }
- }
+ private[sql] var sessionFunctionKindsProvider: () =>
Seq[SessionFunctionKind] =
Review Comment:
test 127
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -138,8 +171,39 @@ class CatalogManager(
private var _sessionPath: Option[Seq[SessionPathEntry]] = None
- /** Returns the raw stored session path entries, or None if no path is set.
*/
- def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
_sessionPath }
+ /**
+ * Returns the effective session path entries: the explicit `SET PATH` value
if stored,
+ * else the parsed [[SQLConf.DEFAULT_PATH]] conf if non-empty (mirroring how
+ * [[currentCatalog]] falls back to [[SQLConf.DEFAULT_CATALOG]]). Returns
`None` when
+ * [[SQLConf.PATH_ENABLED]] is false or both sources are empty.
+ */
+ def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
+ if (!conf.pathEnabled) None
+ else _sessionPath.orElse(workspaceDefaultPathEntries)
+ }
+
+ /** Raw `_sessionPath` (post-`SET PATH`), without the
[[SQLConf.DEFAULT_PATH]] fallback. */
+ def storedSessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
_sessionPath }
+
+ /**
+ * Parsed [[SQLConf.DEFAULT_PATH]] value, or `None` when the conf is empty /
blank. Reuses
+ * the SET PATH grammar via [[CatalystSqlParser.parseSqlPathElements]] so
the workspace
+ * default accepts the same syntax as `SET PATH = ...`.
+ *
+ * An inner `DEFAULT_PATH` token inside the conf value resolves to the
spark-builtin
+ * default ordering rather than recursing back into this method (cycle
break).
+ */
+ def workspaceDefaultPathEntries: Option[Seq[SessionPathEntry]] = {
+ val confValue = conf.defaultPath
+ if (confValue == null || confValue.trim.isEmpty) {
+ None
+ } else {
+ val elements = CatalystSqlParser.parseSqlPathElements(confValue)
Review Comment:
test 201
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -52,6 +53,38 @@ class CatalogManager(
// TODO: create a real SYSTEM catalog to host `TempVariableManager` under
the SESSION namespace.
val tempVariableManager: TempVariableManager = new TempVariableManager
+ // Wire the path-driven kinds provider into SessionCatalog so the
function-resolution loop
+ // and the security check that blocks temp functions from shadowing builtins
read the live
+ // SQL PATH (post-`SET PATH`, with `DEFAULT_PATH` and `defaultPathOrder`
fallbacks already
+ // applied) instead of the legacy
[[SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER]] proxy.
+ // Order is every `system.builtin` / `system.session` entry on the PATH in
path order
+ // (user-catalog segments in between are skipped for this list).
+ v1SessionCatalog.sessionFunctionKindsProvider = () =>
leadingSystemFunctionKinds()
+
+ /**
+ * Walk the effective SQL PATH and collect every system function namespace
entry
+ * (`system.builtin` / `system.session`) in path order, mapped to
+ * [[SessionCatalog.SessionFunctionKind]].
+ *
+ * User-catalog entries (e.g. `spark_catalog.default`) appear on the PATH
before
+ * `system.builtin` / `system.session`; they must not hide later system
entries.
+ * Unqualified builtin/temp resolution follows this ordered kind list via
+ *
[[org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunctionWithShadowing]].
+ *
+ * When no system entries appear on the PATH (user-defined path without
builtins/session),
+ * falls back to builtin-then-session so bare names still resolve like the
legacy default.
+ */
+ private def leadingSystemFunctionKinds():
Seq[SessionCatalog.SessionFunctionKind] = {
Review Comment:
test 77
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -138,8 +171,39 @@ class CatalogManager(
private var _sessionPath: Option[Seq[SessionPathEntry]] = None
- /** Returns the raw stored session path entries, or None if no path is set.
*/
- def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
_sessionPath }
+ /**
+ * Returns the effective session path entries: the explicit `SET PATH` value
if stored,
+ * else the parsed [[SQLConf.DEFAULT_PATH]] conf if non-empty (mirroring how
+ * [[currentCatalog]] falls back to [[SQLConf.DEFAULT_CATALOG]]). Returns
`None` when
+ * [[SQLConf.PATH_ENABLED]] is false or both sources are empty.
+ */
+ def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
+ if (!conf.pathEnabled) None
+ else _sessionPath.orElse(workspaceDefaultPathEntries)
+ }
+
+ /** Raw `_sessionPath` (post-`SET PATH`), without the
[[SQLConf.DEFAULT_PATH]] fallback. */
+ def storedSessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
_sessionPath }
+
+ /**
+ * Parsed [[SQLConf.DEFAULT_PATH]] value, or `None` when the conf is empty /
blank. Reuses
+ * the SET PATH grammar via [[CatalystSqlParser.parseSqlPathElements]] so
the workspace
+ * default accepts the same syntax as `SET PATH = ...`.
+ *
+ * An inner `DEFAULT_PATH` token inside the conf value resolves to the
spark-builtin
+ * default ordering rather than recursing back into this method (cycle
break).
+ */
+ def workspaceDefaultPathEntries: Option[Seq[SessionPathEntry]] = {
Review Comment:
test 196
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala:
##########
@@ -113,17 +113,34 @@ class SessionCatalog(
identifier.copy(funcName = "") == SESSION_NAMESPACE_TEMPLATE
/**
- * Session function kinds in resolution order for unqualified lookups.
- * Matches [[SQLConf.sessionFunctionResolutionOrder]]: "first" (session
first),
- * "second" (default), "last" (builtin only; session tried after persistent).
+ * Provider for the ordered `system.builtin` / `system.session` entries on
the effective SQL
+ * PATH (mapped to [[SessionFunctionKind]]). Wired by
+ * [[org.apache.spark.sql.connector.catalog.CatalogManager]] after
construction so unqualified
+ * function lookups and the security check that blocks temp functions from
shadowing builtins
+ * read the live PATH (post-`SET PATH`, with [[SQLConf.DEFAULT_PATH]] and
+ * [[SQLConf.defaultPathOrder]] fallbacks already applied) instead of the
legacy
+ * [[SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER]] proxy.
+ *
+ * The default reproduces the previous "second" behavior
(builtin-then-session) for tests
+ * that construct a [[SessionCatalog]] directly without going through
[[CatalogManager]].
*/
- private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind]
= {
- conf.sessionFunctionResolutionOrder match {
- case "first" => Seq(Temp, Builtin)
- case "last" => Seq(Builtin)
- case _ => Seq(Builtin, Temp) // "second" (default)
- }
- }
+ private[sql] var sessionFunctionKindsProvider: () =>
Seq[SessionFunctionKind] =
Review Comment:
test 127
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -52,6 +53,38 @@ class CatalogManager(
// TODO: create a real SYSTEM catalog to host `TempVariableManager` under
the SESSION namespace.
val tempVariableManager: TempVariableManager = new TempVariableManager
+ // Wire the path-driven kinds provider into SessionCatalog so the
function-resolution loop
+ // and the security check that blocks temp functions from shadowing builtins
read the live
+ // SQL PATH (post-`SET PATH`, with `DEFAULT_PATH` and `defaultPathOrder`
fallbacks already
+ // applied) instead of the legacy
[[SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER]] proxy.
+ // Order is every `system.builtin` / `system.session` entry on the PATH in
path order
+ // (user-catalog segments in between are skipped for this list).
+ v1SessionCatalog.sessionFunctionKindsProvider = () =>
leadingSystemFunctionKinds()
+
+ /**
+ * Walk the effective SQL PATH and collect every system function namespace
entry
+ * (`system.builtin` / `system.session`) in path order, mapped to
+ * [[SessionCatalog.SessionFunctionKind]].
+ *
+ * User-catalog entries (e.g. `spark_catalog.default`) appear on the PATH
before
+ * `system.builtin` / `system.session`; they must not hide later system
entries.
+ * Unqualified builtin/temp resolution follows this ordered kind list via
+ *
[[org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunctionWithShadowing]].
+ *
+ * When no system entries appear on the PATH (user-defined path without
builtins/session),
+ * falls back to builtin-then-session so bare names still resolve like the
legacy default.
+ */
+ private def leadingSystemFunctionKinds():
Seq[SessionCatalog.SessionFunctionKind] = {
+ val path = sqlResolutionPathEntries(currentCatalog.name(),
currentNamespace.toSeq)
+ val kinds = path.flatMap { e =>
Review Comment:
test 79
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -138,8 +171,39 @@ class CatalogManager(
private var _sessionPath: Option[Seq[SessionPathEntry]] = None
- /** Returns the raw stored session path entries, or None if no path is set.
*/
- def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
_sessionPath }
+ /**
+ * Returns the effective session path entries: the explicit `SET PATH` value
if stored,
+ * else the parsed [[SQLConf.DEFAULT_PATH]] conf if non-empty (mirroring how
+ * [[currentCatalog]] falls back to [[SQLConf.DEFAULT_CATALOG]]). Returns
`None` when
+ * [[SQLConf.PATH_ENABLED]] is false or both sources are empty.
+ */
+ def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
+ if (!conf.pathEnabled) None
+ else _sessionPath.orElse(workspaceDefaultPathEntries)
+ }
+
+ /** Raw `_sessionPath` (post-`SET PATH`), without the
[[SQLConf.DEFAULT_PATH]] fallback. */
+ def storedSessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
_sessionPath }
+
+ /**
+ * Parsed [[SQLConf.DEFAULT_PATH]] value, or `None` when the conf is empty /
blank. Reuses
+ * the SET PATH grammar via [[CatalystSqlParser.parseSqlPathElements]] so
the workspace
+ * default accepts the same syntax as `SET PATH = ...`.
+ *
+ * An inner `DEFAULT_PATH` token inside the conf value resolves to the
spark-builtin
+ * default ordering rather than recursing back into this method (cycle
break).
+ */
+ def workspaceDefaultPathEntries: Option[Seq[SessionPathEntry]] = {
+ val confValue = conf.defaultPath
+ if (confValue == null || confValue.trim.isEmpty) {
+ None
+ } else {
+ val elements = CatalystSqlParser.parseSqlPathElements(confValue)
Review Comment:
test 201
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala:
##########
@@ -52,6 +53,38 @@ class CatalogManager(
// TODO: create a real SYSTEM catalog to host `TempVariableManager` under
the SESSION namespace.
val tempVariableManager: TempVariableManager = new TempVariableManager
+ // Wire the path-driven kinds provider into SessionCatalog so the
function-resolution loop
+ // and the security check that blocks temp functions from shadowing builtins
read the live
+ // SQL PATH (post-`SET PATH`, with `DEFAULT_PATH` and `defaultPathOrder`
fallbacks already
+ // applied) instead of the legacy
[[SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER]] proxy.
+ // Order is every `system.builtin` / `system.session` entry on the PATH in
path order
+ // (user-catalog segments in between are skipped for this list).
+ v1SessionCatalog.sessionFunctionKindsProvider = () =>
leadingSystemFunctionKinds()
+
+ /**
+ * Walk the effective SQL PATH and collect every system function namespace
entry
+ * (`system.builtin` / `system.session`) in path order, mapped to
+ * [[SessionCatalog.SessionFunctionKind]].
+ *
+ * User-catalog entries (e.g. `spark_catalog.default`) appear on the PATH
before
+ * `system.builtin` / `system.session`; they must not hide later system
entries.
+ * Unqualified builtin/temp resolution follows this ordered kind list via
+ *
[[org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunctionWithShadowing]].
+ *
+ * When no system entries appear on the PATH (user-defined path without
builtins/session),
+ * falls back to builtin-then-session so bare names still resolve like the
legacy default.
+ */
+ private def leadingSystemFunctionKinds():
Seq[SessionCatalog.SessionFunctionKind] = {
Review Comment:
test 77
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]