uros-b commented on code in PR #56617:
URL: https://github.com/apache/spark/pull/56617#discussion_r3466210579
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala:
##########
@@ -370,6 +372,55 @@ class SupportsCatalogOptionsSuite extends
SharedSparkSession with BeforeAndAfter
.contains("Cannot specify both version and timestamp when time
travelling the table."))
}
+ test("useCatalogResolution=false: read is resolved via the TableProvider
path, not the catalog") {
+ // The provider opts out of catalog resolution, so load() goes through
getTable instead of
+ // extractIdentifier/extractCatalog. The resulting relation therefore has
no catalog/identifier.
+ val df = spark.read.format(optOutFormat).option("name", "t1").load()
+ val relation = df.logicalPlan.collectFirst {
+ case r: DataSourceV2Relation => r
+ }.getOrElse(fail("Expected a DataSourceV2Relation"))
+ assert(relation.catalog.isEmpty && relation.identifier.isEmpty,
+ "Opting out of catalog resolution should bypass the catalog")
+ }
+
+ test("useCatalogResolution=false: a user-specified schema is allowed (no
catalog check)") {
+ // The schema check only fires on the catalog-resolution path; opting out
skips it.
+ val df = spark.read.format(optOutFormat).option("name", "t1").schema("i
int, j int").load()
+ assert(df.schema.fieldNames === Array("i", "j"))
+ }
+
+ test("failWriteIfTableDoesNotExist=false: append creates a missing table
(create-on-write)") {
+ val df = spark.range(10)
+ // t1 does not exist yet; append should create it from the query instead
of failing.
+ df.write.format(createOnWriteFormat).option("name",
"t1").option("catalog", catalogName)
+ .mode(SaveMode.Append).save()
+ assert(catalog(catalogName).tableExists("t1"), "append should have created
the table")
+ checkAnswer(load("t1", Some(catalogName)), df.toDF())
+ }
+
+ test("failWriteIfTableDoesNotExist=false: overwrite creates a missing table
(create-on-write)") {
+ val df = spark.range(10, 20)
+ df.write.format(createOnWriteFormat).option("name",
"t1").option("catalog", catalogName)
+ .mode(SaveMode.Overwrite).save()
+ assert(catalog(catalogName).tableExists("t1"), "overwrite should have
created the table")
+ checkAnswer(load("t1", Some(catalogName)), df.toDF())
+ }
+
+ test("failWriteIfTableDoesNotExist=false: append to an existing table still
appends") {
+ sql(s"create table $catalogName.t1 (id bigint) using $createOnWriteFormat")
+ spark.range(10).write.format(createOnWriteFormat).option("name", "t1")
+ .option("catalog", catalogName).mode(SaveMode.Append).save()
+ checkAnswer(load("t1", Some(catalogName)), spark.range(10).toDF())
+ }
+
+ test("append to a missing table fails by default
(failWriteIfTableDoesNotExist=true)") {
+ // The default provider keeps the prior behavior: append/overwrite to a
missing table fails.
+ intercept[NoSuchTableException] {
+ spark.range(10).write.format(format).option("name",
"t1").option("catalog", catalogName)
+ .mode(SaveMode.Append).save()
+ }
+ }
+
Review Comment:
There might be some test-matrix gaps. Missing coverage for:
- the useCatalogResolution=false write path (where it falls through to case
_: TableProvider -> V1 fallback or writeWithSaveModeUnsupportedBySourceError —
behavior currently unspecified).
- Overwrite default-throws (only Append default-throw is tested).
- the schema-evolution + create-on-write case from the point above
(https://github.com/apache/spark/pull/56617/changes#r3466203964).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]