Udbhav30 commented on a change in pull request #29387:
URL: https://github.com/apache/spark/pull/29387#discussion_r475911449
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
##########
@@ -3101,6 +3101,78 @@ abstract class DDLSuite extends QueryTest with
SQLTestUtils {
assert(spark.sessionState.catalog.isRegisteredFunction(rand))
}
}
+
+ test("SPARK-32481 Move data to trash on truncate table if enabled") {
+ val trashIntervalKey = "fs.trash.interval"
+ withTable("tab1") {
+ withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") {
+ sql("CREATE TABLE tab1 (col INT) USING parquet")
+ sql("INSERT INTO tab1 SELECT 1")
+ // scalastyle:off hadoopconfiguration
+ val hadoopConf = spark.sparkContext.hadoopConfiguration
+ // scalastyle:on hadoopconfiguration
+ val originalValue = hadoopConf.get(trashIntervalKey, "0")
+ val tablePath = new Path(spark.sessionState.catalog
+ .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
+
+ val fs = tablePath.getFileSystem(hadoopConf)
+ val trashRoot = fs.getTrashRoot(tablePath)
+ assert(!fs.exists(trashRoot))
+ try {
+ hadoopConf.set(trashIntervalKey, "5")
+ sql("TRUNCATE TABLE tab1")
+ } finally {
+ hadoopConf.set(trashIntervalKey, originalValue)
+ }
+ assert(fs.exists(trashRoot))
+ fs.delete(trashRoot, true)
+ }
+ }
+ }
+
+ test("SPARK-32481 delete data permanently on truncate table if trash
interval is non-positive") {
+ val trashIntervalKey = "fs.trash.interval"
+ withTable("tab1") {
+ withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") {
+ sql("CREATE TABLE tab1 (col INT) USING parquet")
+ sql("INSERT INTO tab1 SELECT 1")
+ // scalastyle:off hadoopconfiguration
+ val hadoopConf = spark.sparkContext.hadoopConfiguration
+ // scalastyle:on hadoopconfiguration
+ val originalValue = hadoopConf.get(trashIntervalKey, "0")
+ val tablePath = new Path(spark.sessionState.catalog
+ .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
+
+ val fs = tablePath.getFileSystem(hadoopConf)
+ val trashRoot = fs.getTrashRoot(tablePath)
+ assert(!fs.exists(trashRoot))
+ try {
+ hadoopConf.set(trashIntervalKey, "0")
+ sql("TRUNCATE TABLE tab1")
+ } finally {
+ hadoopConf.set(trashIntervalKey, originalValue)
+ }
+ assert(!fs.exists(trashRoot))
+ }
+ }
+ }
+
+ test("SPARK-32481 Donot move data to trash on truncate table if disabled") {
+ withTable("tab1") {
+ withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "false") {
+ sql("CREATE TABLE tab1 (col INT) USING parquet")
+ sql("INSERT INTO tab1 SELECT 1")
+ val hadoopConf = spark.sessionState.newHadoopConf()
Review comment:
@dongjoon-hyun See
[here](https://github.com/apache/hadoop/blob/64f36b9543c011ce2f1f7d1e10da0eab88a0759d/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java#L125).
If `fs.trash.interval` is non positive then `moveToAppropriateTrash ` function
returns false. So to test this I have to add positive value to
`fs.trash.interval`, but `spark.sessionState.newHadoopConf()` does not update
the `hadoopConf` and so other testcase fails. And here this testcase is no-op
so updating the `hadoopConf` is not required so I used
`spark.sessionState.newHadoopConf()`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]