This is an automated email from the ASF dual-hosted git repository.

bowenliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new bef3d5590 [KYUUBI #6645] Size based eviction for server-side temp 
files cleanup
bef3d5590 is described below

commit bef3d5590fc4f7f59461fcb6a9191f131c095bfb
Author: Bowen Liang <[email protected]>
AuthorDate: Wed Sep 4 23:15:31 2024 +0800

    [KYUUBI #6645] Size based eviction for server-side temp files cleanup
    
    # :mag: Description
    ## Issue References ๐Ÿ”—
    
    This pull request fixes #
    
    ## Describe Your Solution ๐Ÿ”ง
    
    - adding `maximumSize` to support size based eviction for server-side temp 
files cleanup in `TempFileService`
    - size-based eviction is disabled by default , with `maximumSize` set to 
optional by default
    - time-based eviction time is now extended from 14 days to 30 days by 
default
    
    ## Types of changes :bookmark:
    
    - [ ] Bugfix (non-breaking change which fixes an issue)
    - [ ] New feature (non-breaking change which adds functionality)
    - [ ] Breaking change (fix or feature that would cause existing 
functionality to change)
    
    ## Test Plan ๐Ÿงช
    
    #### Behavior Without This Pull Request :coffin:
    
    #### Behavior With This Pull Request :tada:
    
    #### Related Unit Tests
    
    ---
    
    # Checklist ๐Ÿ“
    
    - [ ] This patch was not authored or co-authored using [Generative 
Tooling](https://www.apache.org/legal/generative-tooling.html)
    
    **Be nice. Be informative.**
    
    Closes #6645 from bowenliang123/temp-file-size-evict.
    
    Closes #6645
    
    e1f166b6a [liangbowen] docs
    0b2d5aa6e [liangbowen] increase default SERVER_TEMP_FILE_EXPIRE_TIME to 30 
days
    ee87da56a [liangbowen] make config optional
    0607efcd7 [Bowen Liang] import
    9cc777660 [liangbowen] update
    f9e4de00e [Bowen Liang] docs
    55bf238d3 [liangbowen] size
    
    Lead-authored-by: Bowen Liang <[email protected]>
    Co-authored-by: liangbowen <[email protected]>
    Signed-off-by: Bowen Liang <[email protected]>
---
 docs/configuration/settings.md                     |  3 ++-
 .../org/apache/kyuubi/config/KyuubiConf.scala      | 10 +++++++++-
 .../apache/kyuubi/service/TempFileService.scala    | 23 ++++++++++++++--------
 .../kyuubi/server/TempFileServiceSuite.scala       | 20 ++++++++++++++++++-
 4 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md
index 525316b8e..8cfa2659f 100644
--- a/docs/configuration/settings.md
+++ b/docs/configuration/settings.md
@@ -450,7 +450,8 @@ You can configure the Kyuubi properties in 
`$KYUUBI_HOME/conf/kyuubi-defaults.co
 | kyuubi.server.name                                       | &lt;undefined&gt; 
| The name of Kyuubi Server.                                                    
                                                                                
                                                                                
                          | string   | 1.5.0  |
 | kyuubi.server.periodicGC.interval                        | PT30M             
| How often to trigger a garbage collection.                                    
                                                                                
                                                                                
                          | duration | 1.7.0  |
 | kyuubi.server.redaction.regex                            | &lt;undefined&gt; 
| Regex to decide which Kyuubi contain sensitive information. When this regex 
matches a property key or value, the value is redacted from the various logs.   
                                                                                
                                      || 1.6.0  |
-| kyuubi.server.tempFile.expireTime                        | P14D              
| Expiration timout for cleanup server-side temporary files, e.g. operation 
logs.                                                                           
                                                                                
                              | duration | 1.10.0 |
+| kyuubi.server.tempFile.expireTime                        | P30D              
| Expiration timout for cleanup server-side temporary files, e.g. operation 
logs.                                                                           
                                                                                
                              | duration | 1.10.0 |
+| kyuubi.server.tempFile.maxCount                          | &lt;undefined&gt; 
| The upper threshold size of server-side temporary file paths to cleanup       
                                                                                
                                                                                
                          | int      | 1.10.0 |
 | kyuubi.server.thrift.resultset.default.fetch.size        | 1000              
| The number of rows sent in one Fetch RPC call by the server to the client, if 
not specified by the client. Respect 
`hive.server2.thrift.resultset.default.fetch.size` hive conf.                   
                                                                     | int      
| 1.9.1  |
 
 ### Session
diff --git 
a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala 
b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
index ebb28d415..999f1844e 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -3119,7 +3119,15 @@ object KyuubiConf {
       .version("1.10.0")
       .serverOnly
       .timeConf
-      .createWithDefaultString("P14D")
+      .createWithDefaultString("P30D")
+
+  val SERVER_TEMP_FILE_EXPIRE_MAX_COUNT: OptionalConfigEntry[Int] =
+    buildConf("kyuubi.server.tempFile.maxCount")
+      .doc("The upper threshold size of server-side temporary file paths to 
cleanup")
+      .version("1.10.0")
+      .serverOnly
+      .intConf
+      .createOptional
 
   val SERVER_ADMINISTRATORS: ConfigEntry[Set[String]] =
     buildConf("kyuubi.server.administrators")
diff --git 
a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala 
b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
index a53259e7a..6b83c8e4f 100644
--- 
a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
+++ 
b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
@@ -35,17 +35,23 @@ class TempFileService(name: String) extends 
AbstractService(name) {
   private lazy val cleanupScheduler =
     
ThreadUtils.newDaemonSingleThreadScheduledExecutor(s"$name-cleanup-scheduler")
 
+  def remainedExpiringFilesCount(): Long = expiringFiles.size()
+
   override def initialize(conf: KyuubiConf): Unit = {
     super.initialize(conf)
     val expireTimeInMs = conf.get(KyuubiConf.SERVER_TEMP_FILE_EXPIRE_TIME)
-    expiringFiles = CacheBuilder.newBuilder()
-      .expireAfterWrite(expireTimeInMs, TimeUnit.MILLISECONDS)
-      .removalListener((notification: RemovalNotification[String, String]) => {
-        val pathStr = notification.getValue
-        debug(s"Remove expired temp file: $pathStr")
-        cleanupFilePath(pathStr)
-      })
-      .build[String, String]()
+    val maxCountOpt = conf.get(KyuubiConf.SERVER_TEMP_FILE_EXPIRE_MAX_COUNT)
+    expiringFiles = {
+      val builder = CacheBuilder.newBuilder()
+        .expireAfterWrite(expireTimeInMs, TimeUnit.MILLISECONDS)
+        .removalListener((notification: RemovalNotification[String, String]) 
=> {
+          val pathStr = notification.getValue
+          debug(s"Remove expired temp file: $pathStr")
+          cleanupFilePath(pathStr)
+        })
+      maxCountOpt.foreach(builder.maximumSize(_))
+      builder.build()
+    }
 
     cleanupScheduler.scheduleAtFixedRate(
       () => expiringFiles.cleanUp(),
@@ -84,6 +90,7 @@ class TempFileService(name: String) extends 
AbstractService(name) {
       path.toString)
     TempFileCleanupUtils.deleteOnExit(path)
   }
+
 }
 
 object TempFileService {
diff --git 
a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
 
b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
index 656aef5ad..897c65983 100644
--- 
a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
+++ 
b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
@@ -26,14 +26,16 @@ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime
 import org.apache.kyuubi.{Utils, WithKyuubiServer}
 import org.apache.kyuubi.Utils.writeToTempFile
 import org.apache.kyuubi.config.KyuubiConf
-import org.apache.kyuubi.config.KyuubiConf.SERVER_TEMP_FILE_EXPIRE_TIME
+import org.apache.kyuubi.config.KyuubiConf.{SERVER_TEMP_FILE_EXPIRE_MAX_COUNT, 
SERVER_TEMP_FILE_EXPIRE_TIME}
 import org.apache.kyuubi.session.KyuubiSessionManager
 
 class TempFileServiceSuite extends WithKyuubiServer {
   private val expirationInMs = 100
+  private val tempFilesMaxCount = 3
 
   override protected val conf: KyuubiConf = KyuubiConf()
     .set(SERVER_TEMP_FILE_EXPIRE_TIME, 
Duration.ofMillis(expirationInMs).toMillis)
+    .set(SERVER_TEMP_FILE_EXPIRE_MAX_COUNT, tempFilesMaxCount)
 
   test("file cleaned up after expiration") {
     val tempFileService =
@@ -52,4 +54,20 @@ class TempFileServiceSuite extends WithKyuubiServer {
       }
     }
   }
+
+  test("temp files cleaned up when exceeding max count") {
+    val tempFileService =
+      
server.backendService.sessionManager.asInstanceOf[KyuubiSessionManager].tempFileService
+    (0 until tempFilesMaxCount * 2).map { i =>
+      val dir = Utils.createTempDir()
+      writeToTempFile(new ByteArrayInputStream(s"$i".getBytes()), dir, 
s"$i.txt")
+      dir.toFile
+    }.foreach { dirFile =>
+      assert(dirFile.exists())
+      tempFileService.addPathToExpiration(dirFile.toPath)
+      dirFile
+    }
+
+    
assertResult(tempFilesMaxCount)(tempFileService.remainedExpiringFilesCount())
+  }
 }

Reply via email to