cloud-fan commented on code in PR #55866: URL: https://github.com/apache/spark/pull/55866#discussion_r3242643197
########## sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala: ########## @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util.Collections + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog, SupportsNamespaces} +import org.apache.spark.sql.connector.catalog.functions.UnboundFunction +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +/** + * End-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through non-session V2 catalogs. + * + * Other path tests live in `SetPathSuite` (session catalog) and `ProcedureSuite` + * (procedures via CALL). This suite specifically exercises: + * - unqualified table resolution across two V2 catalogs in SET PATH, + * - first-match ordering when both catalogs hold the same name, + * - unqualified V2 function resolution across two V2 catalogs in SET PATH, + * - the negative case where the unqualified name only lives in a catalog + * that is NOT on the path. + */ +class SqlPathV2CatalogSuite extends SharedSparkSession { + + private val emptyProps: java.util.Map[String, String] = Collections.emptyMap() + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set("spark.sql.catalog.pathcat", classOf[InMemoryCatalog].getName) + spark.conf.set("spark.sql.catalog.pathcat2", classOf[InMemoryCatalog].getName) + } + + override def afterAll(): Unit = { + try { + spark.sessionState.catalogManager.reset() + spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat") + spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat2") + } finally { + super.afterAll() + } + } + + private def v2Catalog(name: String): InMemoryCatalog = + spark.sessionState.catalogManager.catalog(name).asInstanceOf[InMemoryCatalog] + + private def createV2Namespace(catalog: String, ns: String): Unit = { + v2Catalog(catalog).asInstanceOf[SupportsNamespaces] + .createNamespace(Array(ns), emptyProps) + } + + private def addV2Function( + catalog: String, + ns: String, + name: String, + fn: UnboundFunction): Unit = { + v2Catalog(catalog).createFunction(Identifier.of(Array(ns), name), fn) + } + + test("V2 catalogs on SET PATH: unqualified table follows first match") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + // pathcat and pathcat2 each have a namespace `ns` and a table `path_v2_t` with + // different contents, so we can tell which catalog supplied the row. + createV2Namespace("pathcat", "ns") + createV2Namespace("pathcat2", "ns") + sql("CREATE TABLE pathcat.ns.path_v2_t (id INT) USING foo") + sql("INSERT INTO pathcat.ns.path_v2_t VALUES (10)") + sql("CREATE TABLE pathcat2.ns.path_v2_t (id INT) USING foo") + sql("INSERT INTO pathcat2.ns.path_v2_t VALUES (20)") + + try { + sql("SET PATH = pathcat.ns, pathcat2.ns, system.builtin") + checkAnswer(sql("SELECT id FROM path_v2_t"), Row(10)) + + sql("SET PATH = pathcat2.ns, pathcat.ns, system.builtin") + checkAnswer(sql("SELECT id FROM path_v2_t"), Row(20)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TABLE IF EXISTS pathcat.ns.path_v2_t") + sql("DROP TABLE IF EXISTS pathcat2.ns.path_v2_t") + } + } + } + + test("V2 catalogs on SET PATH: unqualified table only in a non-path catalog is not found") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + createV2Namespace("pathcat", "ns_only_here") + sql("CREATE TABLE pathcat.ns_only_here.hidden_t (id INT) USING foo") + try { + // Path does not include pathcat.ns_only_here; bare `hidden_t` must not resolve. + sql("SET PATH = pathcat2.ns, system.builtin") + val e = intercept[AnalysisException] { + sql("SELECT id FROM hidden_t").collect() + } + assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" || + e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"), + s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}") + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TABLE IF EXISTS pathcat.ns_only_here.hidden_t") + } + } + } + + test("V2 catalogs on SET PATH: unqualified function follows first match") { Review Comment: Test name claims "follows first match", but the body can't verify that: both `StrLenDefault` (`DataSourceV2FunctionSuite.scala:743`) and `StrLenMagic` (`DataSourceV2FunctionSuite.scala:754`) return `s.length`. Swapping path order yields identical results — the comment at lines 129-130 admits this and downgrades the assertion to "neither catalog raised not found". The parallel table test above (lines 75-98) properly uses distinguishable contents (`id=10` vs `id=20`). Either rename to reflect what's actually verified, or pick function impls that return different values — e.g., a tiny `ScalarFunction` registered against `pathcat2` that returns `s.length * 100`. Then `strlen('abc')` returns 3 or 300 depending on which catalog supplied the function, and the test would prove first-match for V2 functions. ########## sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala: ########## @@ -150,6 +152,115 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper { assert(CatalogManager.deserializePathEntries(payload).isEmpty, s"payload=$payload") } } + + test("serializePathEntries round-trips through deserialize for typical inputs") { + val cases = Seq( + Seq(Seq("spark_catalog", "default"), Seq("system", "builtin")), + Seq(Seq("system", "session")), + Seq.empty[Seq[String]]) + cases.foreach { entries => + val payload = CatalogManager.serializePathEntries(entries) + val parsed = CatalogManager.deserializePathEntries(payload) + .getOrElse(fail(s"Expected payload to round-trip: $payload")) + assert(parsed === entries, s"Round-trip mismatch for $entries; got $parsed") + } + } + + test("serializePathEntries round-trips multi-level and quoted identifiers") { + val entries = Seq( + Seq("cat", "ns1", "ns2"), + Seq("spark_catalog", "sch.with.dots"), + Seq("spark_catalog", "schema with spaces")) + val payload = CatalogManager.serializePathEntries(entries) + val parsed = CatalogManager.deserializePathEntries(payload) + .getOrElse(fail(s"Expected payload to round-trip: $payload")) + assert(parsed === entries) + } + + test("deserializePathEntriesOrFail raises a clear AnalysisException for bad payloads") { + val e = intercept[AnalysisException] { + CatalogManager.deserializePathEntriesOrFail( + storedPathStr = "{bad-json", + objectType = "view", + objectName = "default.v_broken") + } + assert(e.getMessage.contains("Invalid stored SQL path metadata for view")) + assert(e.getMessage.contains("default.v_broken")) + } + + // --------------------------------------------------------------------------- + // Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end Review Comment: These 6 tests (lines 192-263) target `PathElement.validateNoStaticDuplicates` rather than `CatalogManager`. The PR description even says "new `PathElementSuite`", but no such file is in the patch — the tests went here instead. `PathElement.scala` is its own self-contained class with no dedicated suite today; moving these into a sibling `PathElementSuite.scala` would mirror exactly the pattern this PR already uses for `SqlPathFormat.scala` / `SqlPathFormatSuite.scala`. The `serializePathEntries` / `deserializePathEntriesOrFail` cases would stay here since they genuinely test `CatalogManager`. Alternatively, just update the PR description to match where the tests actually live. ########## sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala: ########## @@ -1453,6 +1453,106 @@ abstract class SQLViewSuite extends QueryTest { } } + test("stored view path is ignored when PATH is disabled at read time") { + // A view created with PATH enabled persists two things in metadata: the frozen + // resolution path AND the creator session's current catalog+namespace at CREATE + // VIEW time (the view's `viewCatalogAndNamespace` property). If the reader's + // session has `spark.sql.path.enabled=false`, the pinned entries are intentionally + // dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view body's + // unqualified references fall back to that captured catalog+namespace, which is + // the creator's USE state at CREATE time -- NOT the schema the view physically + // lives in (the two coincide below only because the test runs + // `USE spark_catalog.compat_view_b` before CREATE VIEW). Verify both directions: + // - fully-qualified bodies keep working (qualification doesn't depend on PATH), + // - unqualified bodies that relied on the frozen path now resolve via the + // captured viewCatalogAndNamespace. + withDatabase("compat_view_a", "compat_view_b") { + sql("CREATE DATABASE compat_view_a") + sql("CREATE DATABASE compat_view_b") + withTable( + "compat_view_a.compat_t", + "compat_view_b.compat_t") { + sql("CREATE TABLE compat_view_a.compat_t USING parquet AS SELECT 1 AS id") + sql("CREATE TABLE compat_view_b.compat_t USING parquet AS SELECT 2 AS id") + withView( + "compat_view_b.v_unq_path", + "compat_view_b.v_fq_path") { + // Create both views with USE compat_view_b in effect so the stored + // viewCatalogAndNamespace points at compat_view_b, then SET PATH=a so the + // frozen path pins compat_view_a. + withSQLConf(PATH_ENABLED.key -> "true") { + try { + sql("USE spark_catalog.compat_view_b") + sql("SET PATH = spark_catalog.compat_view_a, system.builtin") + sql( + """ + |CREATE VIEW compat_view_b.v_unq_path AS + |SELECT id FROM compat_t + |""".stripMargin) + sql( + """ + |CREATE VIEW compat_view_b.v_fq_path AS + |SELECT id FROM spark_catalog.compat_view_a.compat_t + |""".stripMargin) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("USE spark_catalog.default") + } + } + + // Now read with PATH disabled. The fully-qualified view body is independent of + // PATH and must keep returning rows from compat_view_a. The unqualified-body view + // drops its frozen-path pin and falls back to viewCatalogAndNamespace + // (compat_view_b), so unqualified `compat_t` resolves to compat_view_b.compat_t. + withSQLConf(PATH_ENABLED.key -> "false") { + checkAnswer(sql("SELECT id FROM compat_view_b.v_fq_path"), Row(1)) + checkAnswer(sql("SELECT id FROM compat_view_b.v_unq_path"), Row(2)) + } + } + } + } + } + + test("stored view path with no fallback target fails clearly when PATH is off") { Review Comment: Same convention as the other test added in this file. ```suggestion test("SPARK-56853: stored view path with no fallback target fails clearly when PATH is off") { ``` ########## sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala: ########## @@ -1453,6 +1453,106 @@ abstract class SQLViewSuite extends QueryTest { } } + test("stored view path is ignored when PATH is disabled at read time") { Review Comment: Every other test in this file uses a `SPARK-xxxx:` prefix (32374, 23519, 33141, 37932, 52521, 56639, ...). The PR description's own command `build/sbt "sql/testOnly *SimpleSQLViewSuite -- -z SPARK-56853"` won't match this test as a result. ```suggestion test("SPARK-56853: stored view path is ignored when PATH is disabled at read time") { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
