This is an automated email from the ASF dual-hosted git repository.
bowenliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new bef3d5590 [KYUUBI #6645] Size based eviction for server-side temp
files cleanup
bef3d5590 is described below
commit bef3d5590fc4f7f59461fcb6a9191f131c095bfb
Author: Bowen Liang <[email protected]>
AuthorDate: Wed Sep 4 23:15:31 2024 +0800
[KYUUBI #6645] Size based eviction for server-side temp files cleanup
# :mag: Description
## Issue References ๐
This pull request fixes #
## Describe Your Solution ๐ง
- adding `maximumSize` to support size based eviction for server-side temp
files cleanup in `TempFileService`
- size-based eviction is disabled by default , with `maximumSize` set to
optional by default
- time-based eviction time is now extended from 14 days to 30 days by
default
## Types of changes :bookmark:
- [ ] Bugfix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
## Test Plan ๐งช
#### Behavior Without This Pull Request :coffin:
#### Behavior With This Pull Request :tada:
#### Related Unit Tests
---
# Checklist ๐
- [ ] This patch was not authored or co-authored using [Generative
Tooling](https://www.apache.org/legal/generative-tooling.html)
**Be nice. Be informative.**
Closes #6645 from bowenliang123/temp-file-size-evict.
Closes #6645
e1f166b6a [liangbowen] docs
0b2d5aa6e [liangbowen] increase default SERVER_TEMP_FILE_EXPIRE_TIME to 30
days
ee87da56a [liangbowen] make config optional
0607efcd7 [Bowen Liang] import
9cc777660 [liangbowen] update
f9e4de00e [Bowen Liang] docs
55bf238d3 [liangbowen] size
Lead-authored-by: Bowen Liang <[email protected]>
Co-authored-by: liangbowen <[email protected]>
Signed-off-by: Bowen Liang <[email protected]>
---
docs/configuration/settings.md | 3 ++-
.../org/apache/kyuubi/config/KyuubiConf.scala | 10 +++++++++-
.../apache/kyuubi/service/TempFileService.scala | 23 ++++++++++++++--------
.../kyuubi/server/TempFileServiceSuite.scala | 20 ++++++++++++++++++-
4 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md
index 525316b8e..8cfa2659f 100644
--- a/docs/configuration/settings.md
+++ b/docs/configuration/settings.md
@@ -450,7 +450,8 @@ You can configure the Kyuubi properties in
`$KYUUBI_HOME/conf/kyuubi-defaults.co
| kyuubi.server.name | <undefined>
| The name of Kyuubi Server.
| string | 1.5.0 |
| kyuubi.server.periodicGC.interval | PT30M
| How often to trigger a garbage collection.
| duration | 1.7.0 |
| kyuubi.server.redaction.regex | <undefined>
| Regex to decide which Kyuubi contain sensitive information. When this regex
matches a property key or value, the value is redacted from the various logs.
|| 1.6.0 |
-| kyuubi.server.tempFile.expireTime | P14D
| Expiration timout for cleanup server-side temporary files, e.g. operation
logs.
| duration | 1.10.0 |
+| kyuubi.server.tempFile.expireTime | P30D
| Expiration timout for cleanup server-side temporary files, e.g. operation
logs.
| duration | 1.10.0 |
+| kyuubi.server.tempFile.maxCount | <undefined>
| The upper threshold size of server-side temporary file paths to cleanup
| int | 1.10.0 |
| kyuubi.server.thrift.resultset.default.fetch.size | 1000
| The number of rows sent in one Fetch RPC call by the server to the client, if
not specified by the client. Respect
`hive.server2.thrift.resultset.default.fetch.size` hive conf.
| int
| 1.9.1 |
### Session
diff --git
a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
index ebb28d415..999f1844e 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -3119,7 +3119,15 @@ object KyuubiConf {
.version("1.10.0")
.serverOnly
.timeConf
- .createWithDefaultString("P14D")
+ .createWithDefaultString("P30D")
+
+ val SERVER_TEMP_FILE_EXPIRE_MAX_COUNT: OptionalConfigEntry[Int] =
+ buildConf("kyuubi.server.tempFile.maxCount")
+ .doc("The upper threshold size of server-side temporary file paths to
cleanup")
+ .version("1.10.0")
+ .serverOnly
+ .intConf
+ .createOptional
val SERVER_ADMINISTRATORS: ConfigEntry[Set[String]] =
buildConf("kyuubi.server.administrators")
diff --git
a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
index a53259e7a..6b83c8e4f 100644
---
a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
+++
b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TempFileService.scala
@@ -35,17 +35,23 @@ class TempFileService(name: String) extends
AbstractService(name) {
private lazy val cleanupScheduler =
ThreadUtils.newDaemonSingleThreadScheduledExecutor(s"$name-cleanup-scheduler")
+ def remainedExpiringFilesCount(): Long = expiringFiles.size()
+
override def initialize(conf: KyuubiConf): Unit = {
super.initialize(conf)
val expireTimeInMs = conf.get(KyuubiConf.SERVER_TEMP_FILE_EXPIRE_TIME)
- expiringFiles = CacheBuilder.newBuilder()
- .expireAfterWrite(expireTimeInMs, TimeUnit.MILLISECONDS)
- .removalListener((notification: RemovalNotification[String, String]) => {
- val pathStr = notification.getValue
- debug(s"Remove expired temp file: $pathStr")
- cleanupFilePath(pathStr)
- })
- .build[String, String]()
+ val maxCountOpt = conf.get(KyuubiConf.SERVER_TEMP_FILE_EXPIRE_MAX_COUNT)
+ expiringFiles = {
+ val builder = CacheBuilder.newBuilder()
+ .expireAfterWrite(expireTimeInMs, TimeUnit.MILLISECONDS)
+ .removalListener((notification: RemovalNotification[String, String])
=> {
+ val pathStr = notification.getValue
+ debug(s"Remove expired temp file: $pathStr")
+ cleanupFilePath(pathStr)
+ })
+ maxCountOpt.foreach(builder.maximumSize(_))
+ builder.build()
+ }
cleanupScheduler.scheduleAtFixedRate(
() => expiringFiles.cleanUp(),
@@ -84,6 +90,7 @@ class TempFileService(name: String) extends
AbstractService(name) {
path.toString)
TempFileCleanupUtils.deleteOnExit(path)
}
+
}
object TempFileService {
diff --git
a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
index 656aef5ad..897c65983 100644
---
a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
+++
b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/TempFileServiceSuite.scala
@@ -26,14 +26,16 @@ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime
import org.apache.kyuubi.{Utils, WithKyuubiServer}
import org.apache.kyuubi.Utils.writeToTempFile
import org.apache.kyuubi.config.KyuubiConf
-import org.apache.kyuubi.config.KyuubiConf.SERVER_TEMP_FILE_EXPIRE_TIME
+import org.apache.kyuubi.config.KyuubiConf.{SERVER_TEMP_FILE_EXPIRE_MAX_COUNT,
SERVER_TEMP_FILE_EXPIRE_TIME}
import org.apache.kyuubi.session.KyuubiSessionManager
class TempFileServiceSuite extends WithKyuubiServer {
private val expirationInMs = 100
+ private val tempFilesMaxCount = 3
override protected val conf: KyuubiConf = KyuubiConf()
.set(SERVER_TEMP_FILE_EXPIRE_TIME,
Duration.ofMillis(expirationInMs).toMillis)
+ .set(SERVER_TEMP_FILE_EXPIRE_MAX_COUNT, tempFilesMaxCount)
test("file cleaned up after expiration") {
val tempFileService =
@@ -52,4 +54,20 @@ class TempFileServiceSuite extends WithKyuubiServer {
}
}
}
+
+ test("temp files cleaned up when exceeding max count") {
+ val tempFileService =
+
server.backendService.sessionManager.asInstanceOf[KyuubiSessionManager].tempFileService
+ (0 until tempFilesMaxCount * 2).map { i =>
+ val dir = Utils.createTempDir()
+ writeToTempFile(new ByteArrayInputStream(s"$i".getBytes()), dir,
s"$i.txt")
+ dir.toFile
+ }.foreach { dirFile =>
+ assert(dirFile.exists())
+ tempFileService.addPathToExpiration(dirFile.toPath)
+ dirFile
+ }
+
+
assertResult(tempFilesMaxCount)(tempFileService.remainedExpiringFilesCount())
+ }
}