This is an automated email from the ASF dual-hosted git repository.
mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git
The following commit(s) were added to refs/heads/master by this push:
new 21874e57 [YUNIKORN-3250] Fix UserGroupCache entries never expiring
(#1081)
21874e57 is described below
commit 21874e57be999907dd0279404994e73336442a15
Author: Mit Desai <[email protected]>
AuthorDate: Mon Apr 6 13:35:38 2026 +0530
[YUNIKORN-3250] Fix UserGroupCache entries never expiring (#1081)
- Remove uninitialized `var now time.Time` whose zero value caused
now.Unix() to always return -62135596800, making cleanUpCache()
eviction conditions permanently false; replace all now.Unix() calls
with time.Now().Unix()
- Add inline TTL check in GetUserGroup so positive cache hits older
than poscache (300s) trigger immediate re-resolution rather than
waiting up to one cleaner interval (60s); reset ug before
re-resolution to prevent group duplication
- Add regression tests TestCleanUpCacheUsesRealTime and
TestPositiveCacheHitExpiryTriggersRefresh that fail against the
unfixed code
Closes: #1081
Signed-off-by: mani <[email protected]>
---
pkg/common/security/usergroup.go | 17 ++++++------
pkg/common/security/usergroup_test.go | 51 +++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/pkg/common/security/usergroup.go b/pkg/common/security/usergroup.go
index eae10a44..956492e3 100644
--- a/pkg/common/security/usergroup.go
+++ b/pkg/common/security/usergroup.go
@@ -41,7 +41,6 @@ const (
)
// global variables
-var now time.Time // One clock to access
var instance *UserGroupCache // The instance of the cache
var once = &sync.Once{} // Make sure we can only create the cache once
var stopped atomic.Bool // whether UserGroupCache is stopped (needed for
multiple partitions)
@@ -123,8 +122,8 @@ func (c *UserGroupCache) run() {
// Do the real work for the cache cleanup
func (c *UserGroupCache) cleanUpCache() {
- oldest := now.Unix() - poscache
- oldestFailed := now.Unix() - negcache
+ oldest := time.Now().Unix() - poscache
+ oldestFailed := time.Now().Unix() - negcache
// clean up the cache so we do not grow out of bounds
instance.lock.Lock()
defer instance.lock.Unlock()
@@ -173,7 +172,7 @@ func (c *UserGroupCache) ConvertUGI(ugi
*si.UserGroupInformation, force bool) (U
// If groups are already present we should just convert
newUG := UserGroup{User: ugi.User}
newUG.Groups = append(newUG.Groups, ugi.Groups...)
- newUG.resolved = now.Unix()
+ newUG.resolved = time.Now().Unix()
c.lock.Lock()
defer c.lock.Unlock()
c.ugs[ugi.User] = &newUG
@@ -192,12 +191,12 @@ func (c *UserGroupCache) GetUserGroup(userName string)
(UserGroup, error) {
c.lock.RLock()
ug, ok := c.ugs[userName]
c.lock.RUnlock()
- // return if this was not a negative cache that has not timed out
- if ok && !ug.failed {
+ // return if valid positive cache hit (not failed and not expired)
+ if ok && !ug.failed && (time.Now().Unix()-ug.resolved) < poscache {
return *ug, nil
}
- // nothing returned so create a new one
- if ug == nil {
+ // create a fresh entry for re-resolution unless we have a fresh
negative cache hit
+ if ug == nil || !ug.failed {
ug = &UserGroup{
User: userName,
}
@@ -228,7 +227,7 @@ func (c *UserGroupCache) GetUserGroup(userName string)
(UserGroup, error) {
}
}
// all resolved (or not) but use this time stamp
- ug.resolved = now.Unix()
+ ug.resolved = time.Now().Unix()
// add it to the cache, even if we fail negative cache is also good to
know
c.lock.Lock()
diff --git a/pkg/common/security/usergroup_test.go
b/pkg/common/security/usergroup_test.go
index 2a96fab8..1749ccd9 100644
--- a/pkg/common/security/usergroup_test.go
+++ b/pkg/common/security/usergroup_test.go
@@ -542,3 +542,54 @@ func TestConvertUGI(t *testing.T) {
})
}
}
+
+// TestCleanUpCacheUsesRealTime verifies that cleanUpCache uses real
wall-clock time for eviction.
+// Without the Bug 1 fix (var now time.Time never assigned), now.Unix() always
returns a
+// large negative constant so the eviction condition is never true and entries
live forever.
+func TestCleanUpCacheUsesRealTime(t *testing.T) {
+ testCache := GetUserGroupCache(testResolver, &ConfigReaderMock{},
&LdapAccessMock{})
+ testCache.resetCache()
+
+ _, err := testCache.GetUserGroup("testuser1")
+ assert.NilError(t, err)
+ assert.Equal(t, 1, testCache.getUGsize())
+
+ // Stamp the entry as resolved far in the past (2× TTL ago).
+ testCache.lock.Lock()
+ testCache.ugs["testuser1"].resolved = time.Now().Unix() - 2*poscache
+ testCache.lock.Unlock()
+
+ testCache.cleanUpCache()
+
+ // The entry must be evicted. With the broken var-now bug this
assertion fails
+ // because cleanUpCache's threshold is computed from the zero time.Time
value.
+ assert.Equal(t, 0, testCache.getUGsize(), "stale entry was not evicted
by cleanUpCache — Bug 1 not fixed")
+}
+
+// TestPositiveCacheHitExpiryTriggersRefresh verifies that GetUserGroup
re-resolves a user
+// whose positive cache entry has exceeded poscache seconds.
+// Without the Bug 2 fix, the stale entry is returned blindly regardless of
age.
+func TestPositiveCacheHitExpiryTriggersRefresh(t *testing.T) {
+ testCache := GetUserGroupCache(testResolver, &ConfigReaderMock{},
&LdapAccessMock{})
+ testCache.resetCache()
+
+ _, err := testCache.GetUserGroup("testuser1")
+ assert.NilError(t, err)
+
+ // Age the cached entry beyond poscache.
+ testCache.lock.Lock()
+ testCache.ugs["testuser1"].resolved = time.Now().Unix() - 2*poscache
+ staleResolved := testCache.ugs["testuser1"].resolved
+ testCache.lock.Unlock()
+
+ // A fresh lookup must re-resolve and stamp a new resolved timestamp.
+ ug, err := testCache.GetUserGroup("testuser1")
+ assert.NilError(t, err)
+ if ug.resolved == staleResolved {
+ t.Error("GetUserGroup returned stale cached entry after TTL
expiry — Bug 2 not fixed")
+ }
+
+ // Re-resolution must not duplicate groups (groups are appended during
resolveGroups;
+ // if the stale entry's Groups slice is reused the count doubles).
+ assert.Equal(t, 2, len(ug.Groups), "re-resolved groups are duplicated —
stale ug.Groups was not reset before re-resolution")
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]