sunchao commented on a change in pull request #31131:
URL: https://github.com/apache/spark/pull/31131#discussion_r555241674
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
##########
@@ -46,6 +47,24 @@ trait AlterTableAddPartitionSuiteBase extends
command.AlterTableAddPartitionSuit
}
}
+ private def copyPartition(t: String, from: String, to: String): String = {
Review comment:
ditto: rename `t` (and perhaps `from` and `to` as well) to something
more meaningful
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
##########
@@ -91,4 +91,20 @@ trait DDLCommandTestUtils extends SQLTestUtils {
}
protected def checkLocation(t: String, spec: TablePartitionSpec, expected:
String): Unit
+
+ // Getting the total table size in the filesystem in bytes
+ def getTableSize(t: String): Int = {
Review comment:
nit: Should we use `Long` as return type? yeah I know it's supposed to
be only in test but just to be on the safe side.
Also perhaps rename `t` to `tableName` or something so it is easier to read.
:)
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
##########
@@ -46,6 +47,27 @@ trait AlterTableRenamePartitionSuiteBase extends
command.AlterTableRenamePartiti
checkAnswer(sql(s"SELECT id, data FROM $t WHERE id = 3"), Row(3, "def"))
}
}
+
+ test("SPARK-34060, SPARK-34071: update stats of cached table") {
+ withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY
(part)")
+ sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+ sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+ assert(!spark.catalog.isCached(t))
+ sql(s"CACHE TABLE $t")
+ assert(spark.catalog.isCached(t))
+ QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1,
1)))
+ val tableSize = getTableSize(t)
+ assert(tableSize > 0)
+
+ sql(s"ALTER TABLE $t PARTITION (part=0) RENAME TO PARTITION (part=2)")
+ assert(spark.catalog.isCached(t))
+ assert(tableSize == getTableSize(t))
+ QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 2), Row(1,
1)))
Review comment:
nit: not sure why need to have `QueryTest` here - I thought the trait
already extends it?
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
##########
@@ -43,6 +44,28 @@ trait AlterTableDropPartitionSuiteBase extends
command.AlterTableDropPartitionSu
checkPartitions(t) // no partitions
}
}
+
+ test("SPARK-34060, SPARK-34071: update stats of cached table") {
+ withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY
(part)")
+ sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+ sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+ assert(!spark.catalog.isCached(t))
+ sql(s"CACHE TABLE $t")
+ assert(spark.catalog.isCached(t))
+ checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1)))
+ val twoPartSize = getTableSize(t)
+ assert(twoPartSize > 0)
+
+ sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
+ assert(spark.catalog.isCached(t))
+ val onePartSize = getTableSize(t)
+ assert(0 < onePartSize && onePartSize < twoPartSize)
Review comment:
hmm I'm not sure if this check is useful - seems it is unrelated to
caching (does `getTableSize` look at cached table for stats?)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]