cloud-fan commented on code in PR #52731:
URL: https://github.com/apache/spark/pull/52731#discussion_r2472540054
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala:
##########
@@ -1951,4 +1951,80 @@ abstract class SessionCatalogSuite extends AnalysisTest
with Eventually {
assert(catalog.getCachedTable(qualifiedName2) != null)
}
}
+
+ test("CatalogTable partitionSchema provides detailed error for corrupted
metadata") {
+ // Test case 1: Partition columns don't match schema (wrong names)
+ val corruptedTable1 = CatalogTable(
+ identifier = TableIdentifier("corrupted_table1", Some("test_db")),
+ tableType = CatalogTableType.MANAGED,
+ storage = CatalogStorageFormat.empty,
+ schema = StructType(Seq(
+ StructField("id", IntegerType),
+ StructField("name", StringType),
+ StructField("year", IntegerType),
+ StructField("month", IntegerType)
+ )),
+ partitionColumnNames = Seq("year", "day") // "day" doesn't exist in
schema
+ )
+
+ val exception1 = intercept[AssertionError] {
+ corruptedTable1.partitionSchema
+ }
+
+ val expectedMessage1 = "assertion failed: Corrupted table metadata
detected " +
+ "for table test_db.corrupted_table1. " +
+ "The partition column names in the table schema " +
+ "do not match the declared partition columns. " +
+ "Table schema columns: [id, name, year, month] " +
+ "Declared partition columns: [year, day] " +
+ "Actual partition columns from schema: [year, month]. " +
+ "This indicates corrupted table metadata that needs to be repaired."
+ assert(exception1.getMessage === expectedMessage1)
+
+ // Test case 2: Wrong number of partition columns
+ val corruptedTable2 = CatalogTable(
+ identifier = TableIdentifier("corrupted_table2", Some("test_db")),
+ tableType = CatalogTableType.MANAGED,
+ storage = CatalogStorageFormat.empty,
+ schema = StructType(Seq(
+ StructField("id", IntegerType),
+ StructField("data", StringType),
+ StructField("partition_col", StringType)
+ )),
+ partitionColumnNames = Seq("partition_col", "extra_col") // Too many
partition columns
Review Comment:
`extra_col` does not exist and this test seems the same as before. Let's
test a case where partition columns are not at the end of schema.
```suggestion
partitionColumnNames = Seq("id", "partition_col") // partition cols
not at the end of schema
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]