Re: [PR] [SPARK-56853] Improve PATH Tests [spark]

via GitHub Thu, 14 May 2026 09:05:04 -0700


cloud-fan commented on code in PR #55866:
URL: https://github.com/apache/spark/pull/55866#discussion_r3242643197



##########
sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala:
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog, 
SupportsNamespaces}
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * End-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through 
non-session V2 catalogs.
+ *
+ * Other path tests live in `SetPathSuite` (session catalog) and 
`ProcedureSuite`
+ * (procedures via CALL). This suite specifically exercises:
+ *   - unqualified table resolution across two V2 catalogs in SET PATH,
+ *   - first-match ordering when both catalogs hold the same name,
+ *   - unqualified V2 function resolution across two V2 catalogs in SET PATH,
+ *   - the negative case where the unqualified name only lives in a catalog
+ *     that is NOT on the path.
+ */
+class SqlPathV2CatalogSuite extends SharedSparkSession {
+
+  private val emptyProps: java.util.Map[String, String] = 
Collections.emptyMap()
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set("spark.sql.catalog.pathcat", 
classOf[InMemoryCatalog].getName)
+    spark.conf.set("spark.sql.catalog.pathcat2", 
classOf[InMemoryCatalog].getName)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      spark.sessionState.catalogManager.reset()
+      spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat")
+      spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat2")
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  private def v2Catalog(name: String): InMemoryCatalog =
+    
spark.sessionState.catalogManager.catalog(name).asInstanceOf[InMemoryCatalog]
+
+  private def createV2Namespace(catalog: String, ns: String): Unit = {
+    v2Catalog(catalog).asInstanceOf[SupportsNamespaces]
+      .createNamespace(Array(ns), emptyProps)
+  }
+
+  private def addV2Function(
+      catalog: String,
+      ns: String,
+      name: String,
+      fn: UnboundFunction): Unit = {
+    v2Catalog(catalog).createFunction(Identifier.of(Array(ns), name), fn)
+  }
+
+  test("V2 catalogs on SET PATH: unqualified table follows first match") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      // pathcat and pathcat2 each have a namespace `ns` and a table 
`path_v2_t` with
+      // different contents, so we can tell which catalog supplied the row.
+      createV2Namespace("pathcat", "ns")
+      createV2Namespace("pathcat2", "ns")
+      sql("CREATE TABLE pathcat.ns.path_v2_t (id INT) USING foo")
+      sql("INSERT INTO pathcat.ns.path_v2_t VALUES (10)")
+      sql("CREATE TABLE pathcat2.ns.path_v2_t (id INT) USING foo")
+      sql("INSERT INTO pathcat2.ns.path_v2_t VALUES (20)")
+
+      try {
+        sql("SET PATH = pathcat.ns, pathcat2.ns, system.builtin")
+        checkAnswer(sql("SELECT id FROM path_v2_t"), Row(10))
+
+        sql("SET PATH = pathcat2.ns, pathcat.ns, system.builtin")
+        checkAnswer(sql("SELECT id FROM path_v2_t"), Row(20))
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TABLE IF EXISTS pathcat.ns.path_v2_t")
+        sql("DROP TABLE IF EXISTS pathcat2.ns.path_v2_t")
+      }
+    }
+  }
+
+  test("V2 catalogs on SET PATH: unqualified table only in a non-path catalog 
is not found") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      createV2Namespace("pathcat", "ns_only_here")
+      sql("CREATE TABLE pathcat.ns_only_here.hidden_t (id INT) USING foo")
+      try {
+        // Path does not include pathcat.ns_only_here; bare `hidden_t` must 
not resolve.
+        sql("SET PATH = pathcat2.ns, system.builtin")
+        val e = intercept[AnalysisException] {
+          sql("SELECT id FROM hidden_t").collect()
+        }
+        assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" ||
+            e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"),
+          s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: 
${e.getMessage}")
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TABLE IF EXISTS pathcat.ns_only_here.hidden_t")
+      }
+    }
+  }
+
+  test("V2 catalogs on SET PATH: unqualified function follows first match") {

Review Comment:
   Test name claims "follows first match", but the body can't verify that: both 
`StrLenDefault` (`DataSourceV2FunctionSuite.scala:743`) and `StrLenMagic` 
(`DataSourceV2FunctionSuite.scala:754`) return `s.length`. Swapping path order 
yields identical results — the comment at lines 129-130 admits this and 
downgrades the assertion to "neither catalog raised not found". The parallel 
table test above (lines 75-98) properly uses distinguishable contents (`id=10` 
vs `id=20`).
   
   Either rename to reflect what's actually verified, or pick function impls 
that return different values — e.g., a tiny `ScalarFunction` registered against 
`pathcat2` that returns `s.length * 100`. Then `strlen('abc')` returns 3 or 300 
depending on which catalog supplied the function, and the test would prove 
first-match for V2 functions.



##########
sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala:
##########
@@ -150,6 +152,115 @@ class CatalogManagerSuite extends SparkFunSuite with 
SQLHelper {
       assert(CatalogManager.deserializePathEntries(payload).isEmpty, 
s"payload=$payload")
     }
   }
+
+  test("serializePathEntries round-trips through deserialize for typical 
inputs") {
+    val cases = Seq(
+      Seq(Seq("spark_catalog", "default"), Seq("system", "builtin")),
+      Seq(Seq("system", "session")),
+      Seq.empty[Seq[String]])
+    cases.foreach { entries =>
+      val payload = CatalogManager.serializePathEntries(entries)
+      val parsed = CatalogManager.deserializePathEntries(payload)
+        .getOrElse(fail(s"Expected payload to round-trip: $payload"))
+      assert(parsed === entries, s"Round-trip mismatch for $entries; got 
$parsed")
+    }
+  }
+
+  test("serializePathEntries round-trips multi-level and quoted identifiers") {
+    val entries = Seq(
+      Seq("cat", "ns1", "ns2"),
+      Seq("spark_catalog", "sch.with.dots"),
+      Seq("spark_catalog", "schema with spaces"))
+    val payload = CatalogManager.serializePathEntries(entries)
+    val parsed = CatalogManager.deserializePathEntries(payload)
+      .getOrElse(fail(s"Expected payload to round-trip: $payload"))
+    assert(parsed === entries)
+  }
+
+  test("deserializePathEntriesOrFail raises a clear AnalysisException for bad 
payloads") {
+    val e = intercept[AnalysisException] {
+      CatalogManager.deserializePathEntriesOrFail(
+        storedPathStr = "{bad-json",
+        objectType = "view",
+        objectName = "default.v_broken")
+    }
+    assert(e.getMessage.contains("Invalid stored SQL path metadata for view"))
+    assert(e.getMessage.contains("default.v_broken"))
+  }
+
+  // 
---------------------------------------------------------------------------
+  // Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The 
end-to-end

Review Comment:
   These 6 tests (lines 192-263) target 
`PathElement.validateNoStaticDuplicates` rather than `CatalogManager`. The PR 
description even says "new `PathElementSuite`", but no such file is in the 
patch — the tests went here instead.
   
   `PathElement.scala` is its own self-contained class with no dedicated suite 
today; moving these into a sibling `PathElementSuite.scala` would mirror 
exactly the pattern this PR already uses for `SqlPathFormat.scala` / 
`SqlPathFormatSuite.scala`. The `serializePathEntries` / 
`deserializePathEntriesOrFail` cases would stay here since they genuinely test 
`CatalogManager`.
   
   Alternatively, just update the PR description to match where the tests 
actually live.



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala:
##########
@@ -1453,6 +1453,106 @@ abstract class SQLViewSuite extends QueryTest {
     }
   }
 
+  test("stored view path is ignored when PATH is disabled at read time") {
+    // A view created with PATH enabled persists two things in metadata: the 
frozen
+    // resolution path AND the creator session's current catalog+namespace at 
CREATE
+    // VIEW time (the view's `viewCatalogAndNamespace` property). If the 
reader's
+    // session has `spark.sql.path.enabled=false`, the pinned entries are 
intentionally
+    // dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view 
body's
+    // unqualified references fall back to that captured catalog+namespace, 
which is
+    // the creator's USE state at CREATE time -- NOT the schema the view 
physically
+    // lives in (the two coincide below only because the test runs
+    // `USE spark_catalog.compat_view_b` before CREATE VIEW). Verify both 
directions:
+    //   - fully-qualified bodies keep working (qualification doesn't depend 
on PATH),
+    //   - unqualified bodies that relied on the frozen path now resolve via 
the
+    //     captured viewCatalogAndNamespace.
+    withDatabase("compat_view_a", "compat_view_b") {
+      sql("CREATE DATABASE compat_view_a")
+      sql("CREATE DATABASE compat_view_b")
+      withTable(
+          "compat_view_a.compat_t",
+          "compat_view_b.compat_t") {
+        sql("CREATE TABLE compat_view_a.compat_t USING parquet AS SELECT 1 AS 
id")
+        sql("CREATE TABLE compat_view_b.compat_t USING parquet AS SELECT 2 AS 
id")
+        withView(
+            "compat_view_b.v_unq_path",
+            "compat_view_b.v_fq_path") {
+          // Create both views with USE compat_view_b in effect so the stored
+          // viewCatalogAndNamespace points at compat_view_b, then SET PATH=a 
so the
+          // frozen path pins compat_view_a.
+          withSQLConf(PATH_ENABLED.key -> "true") {
+            try {
+              sql("USE spark_catalog.compat_view_b")
+              sql("SET PATH = spark_catalog.compat_view_a, system.builtin")
+              sql(
+                """
+                  |CREATE VIEW compat_view_b.v_unq_path AS
+                  |SELECT id FROM compat_t
+                  |""".stripMargin)
+              sql(
+                """
+                  |CREATE VIEW compat_view_b.v_fq_path AS
+                  |SELECT id FROM spark_catalog.compat_view_a.compat_t
+                  |""".stripMargin)
+            } finally {
+              sql("SET PATH = DEFAULT_PATH")
+              sql("USE spark_catalog.default")
+            }
+          }
+
+          // Now read with PATH disabled. The fully-qualified view body is 
independent of
+          // PATH and must keep returning rows from compat_view_a. The 
unqualified-body view
+          // drops its frozen-path pin and falls back to 
viewCatalogAndNamespace
+          // (compat_view_b), so unqualified `compat_t` resolves to 
compat_view_b.compat_t.
+          withSQLConf(PATH_ENABLED.key -> "false") {
+            checkAnswer(sql("SELECT id FROM compat_view_b.v_fq_path"), Row(1))
+            checkAnswer(sql("SELECT id FROM compat_view_b.v_unq_path"), Row(2))
+          }
+        }
+      }
+    }
+  }
+
+  test("stored view path with no fallback target fails clearly when PATH is 
off") {

Review Comment:
   Same convention as the other test added in this file.
   
   ```suggestion
     test("SPARK-56853: stored view path with no fallback target fails clearly 
when PATH is off") {
   ```



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala:
##########
@@ -1453,6 +1453,106 @@ abstract class SQLViewSuite extends QueryTest {
     }
   }
 
+  test("stored view path is ignored when PATH is disabled at read time") {

Review Comment:
   Every other test in this file uses a `SPARK-xxxx:` prefix (32374, 23519, 
33141, 37932, 52521, 56639, ...). The PR description's own command `build/sbt 
"sql/testOnly *SimpleSQLViewSuite -- -z SPARK-56853"` won't match this test as 
a result.
   
   ```suggestion
     test("SPARK-56853: stored view path is ignored when PATH is disabled at 
read time") {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-56853] Improve PATH Tests [spark]

Reply via email to