MaxGekk commented on a change in pull request #30979:
URL: https://github.com/apache/spark/pull/30979#discussion_r550327967
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
##########
@@ -42,6 +42,18 @@ trait AlterTableDropPartitionSuiteBase extends
command.AlterTableDropPartitionSu
checkPartitions(t) // no partitions
}
}
+
+ test("SPARK-33941: invalidate cache after partition dropping") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY
(part)")
+ sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+ val df = spark.table(t)
+ df.cache()
+ assert(!df.isEmpty)
+ sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
+ assert(df.isEmpty)
Review comment:
hmm, even simpler test without caching fails:
```scala
test("SPARK-33941: refresh cache after partition dropping") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY
(part)")
sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
val df = spark.table(t)
// df.cache()
df.collect()
sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
sql(s"ALTER TABLE $t RECOVER PARTITIONS")
sql(s"MSCK REPAIR TABLE $t")
df.collect()
}
}
```
with:
```java
Input path does not exist:
file:/private/var/folders/p3/dfs6mf655d7fnjrsjvldh0tc0000gn/T/warehouse-a3f5be00-8e30-43be-a14d-a17983f2cd2d/ns.db/tbl/part=0
org.apache.hadoop.mapred.InvalidInputException: Input path does not exist:
file:/private/var/folders/p3/dfs6mf655d7fnjrsjvldh0tc0000gn/T/warehouse-a3f5be00-8e30-43be-a14d-a17983f2cd2d/ns.db/tbl/part=0
at
org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:297)
at
org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:239)
```
on v1 In-Memory and Hive external catalogs. The issue looks much more
serious than I could expect. cc @cloud-fan @HyukjinKwon
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]