This is an automated email from the ASF dual-hosted git repository.

felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 14cde7e92b [GLUTEN] Delete temporary files generated by dbgen (#10783)
14cde7e92b is described below

commit 14cde7e92bb89f6f41212df52f6e08f7e8dacb19
Author: xinghuayu007 <[email protected]>
AuthorDate: Wed Oct 1 03:14:54 2025 +0800

    [GLUTEN] Delete temporary files generated by dbgen (#10783)
    
    When using TPCH shell script to generate test data, the script got stuck 
when execute it again.
    The tool dbGen will generate a lots of temporary files and not be deleted 
after the first running. When running the tool again, it asks whether to 
overwrite the temporary files, which make the script stuck.
    
    The patch adds a function to delete these temporary files before running 
the dbGen tool.
---
 .../gen_data/parquet_dataset/tpch_datagen_parquet.scala  | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git 
a/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.scala 
b/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.scala
index 2eb834a482..4539050491 100644
--- a/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.scala
+++ b/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.scala
@@ -16,6 +16,8 @@
  */
 import com.databricks.spark.sql.perf.tpch._
 
+import java.io.File
+
 
 val scaleFactor = "100" // scaleFactor defines the size of the dataset to 
generate (in GB).
 val numPartitions = 200  // how many dsdgen partitions to run - number of 
input tasks.
@@ -30,6 +32,20 @@ val tables = new TPCHTables(spark.sqlContext,
     useDoubleForDecimal = false, // true to replace DecimalType with DoubleType
     useStringForDate = false) // true to replace DateType with StringType
 
+object FileUtils {
+  def deleteFilesWithKeyword(path: String, keyword: String): Unit = {
+    val dir = new File(path)
+    if (dir.exists && dir.isDirectory) {
+      dir.listFiles()
+        .filter(f => f.isFile && f.getName.contains(keyword))
+        .foreach { f =>
+          f.delete()
+        }
+    }
+  }
+}
+
+FileUtils.deleteFilesWithKeyword(dbgenDir, "tbl")
 
 tables.genData(
     location = rootDir,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to