This is an automated email from the ASF dual-hosted git repository.

mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new 21874e57 [YUNIKORN-3250] Fix UserGroupCache entries never expiring 
(#1081)
21874e57 is described below

commit 21874e57be999907dd0279404994e73336442a15
Author: Mit Desai <[email protected]>
AuthorDate: Mon Apr 6 13:35:38 2026 +0530

    [YUNIKORN-3250] Fix UserGroupCache entries never expiring (#1081)
    
    - Remove uninitialized `var now time.Time` whose zero value caused
      now.Unix() to always return -62135596800, making cleanUpCache()
      eviction conditions permanently false; replace all now.Unix() calls
      with time.Now().Unix()
    - Add inline TTL check in GetUserGroup so positive cache hits older
      than poscache (300s) trigger immediate re-resolution rather than
      waiting up to one cleaner interval (60s); reset ug before
      re-resolution to prevent group duplication
    - Add regression tests TestCleanUpCacheUsesRealTime and
      TestPositiveCacheHitExpiryTriggersRefresh that fail against the
      unfixed code
    
    Closes: #1081
    
    Signed-off-by: mani <[email protected]>
---
 pkg/common/security/usergroup.go      | 17 ++++++------
 pkg/common/security/usergroup_test.go | 51 +++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/pkg/common/security/usergroup.go b/pkg/common/security/usergroup.go
index eae10a44..956492e3 100644
--- a/pkg/common/security/usergroup.go
+++ b/pkg/common/security/usergroup.go
@@ -41,7 +41,6 @@ const (
 )
 
 // global variables
-var now time.Time            // One clock to access
 var instance *UserGroupCache // The instance of the cache
 var once = &sync.Once{}      // Make sure we can only create the cache once
 var stopped atomic.Bool      // whether UserGroupCache is stopped (needed for 
multiple partitions)
@@ -123,8 +122,8 @@ func (c *UserGroupCache) run() {
 
 // Do the real work for the cache cleanup
 func (c *UserGroupCache) cleanUpCache() {
-       oldest := now.Unix() - poscache
-       oldestFailed := now.Unix() - negcache
+       oldest := time.Now().Unix() - poscache
+       oldestFailed := time.Now().Unix() - negcache
        // clean up the cache so we do not grow out of bounds
        instance.lock.Lock()
        defer instance.lock.Unlock()
@@ -173,7 +172,7 @@ func (c *UserGroupCache) ConvertUGI(ugi 
*si.UserGroupInformation, force bool) (U
        // If groups are already present we should just convert
        newUG := UserGroup{User: ugi.User}
        newUG.Groups = append(newUG.Groups, ugi.Groups...)
-       newUG.resolved = now.Unix()
+       newUG.resolved = time.Now().Unix()
        c.lock.Lock()
        defer c.lock.Unlock()
        c.ugs[ugi.User] = &newUG
@@ -192,12 +191,12 @@ func (c *UserGroupCache) GetUserGroup(userName string) 
(UserGroup, error) {
        c.lock.RLock()
        ug, ok := c.ugs[userName]
        c.lock.RUnlock()
-       // return if this was not a negative cache that has not timed out
-       if ok && !ug.failed {
+       // return if valid positive cache hit (not failed and not expired)
+       if ok && !ug.failed && (time.Now().Unix()-ug.resolved) < poscache {
                return *ug, nil
        }
-       // nothing returned so create a new one
-       if ug == nil {
+       // create a fresh entry for re-resolution unless we have a fresh 
negative cache hit
+       if ug == nil || !ug.failed {
                ug = &UserGroup{
                        User: userName,
                }
@@ -228,7 +227,7 @@ func (c *UserGroupCache) GetUserGroup(userName string) 
(UserGroup, error) {
                }
        }
        // all resolved (or not) but use this time stamp
-       ug.resolved = now.Unix()
+       ug.resolved = time.Now().Unix()
 
        // add it to the cache, even if we fail negative cache is also good to 
know
        c.lock.Lock()
diff --git a/pkg/common/security/usergroup_test.go 
b/pkg/common/security/usergroup_test.go
index 2a96fab8..1749ccd9 100644
--- a/pkg/common/security/usergroup_test.go
+++ b/pkg/common/security/usergroup_test.go
@@ -542,3 +542,54 @@ func TestConvertUGI(t *testing.T) {
                })
        }
 }
+
+// TestCleanUpCacheUsesRealTime verifies that cleanUpCache uses real 
wall-clock time for eviction.
+// Without the Bug 1 fix (var now time.Time never assigned), now.Unix() always 
returns a
+// large negative constant so the eviction condition is never true and entries 
live forever.
+func TestCleanUpCacheUsesRealTime(t *testing.T) {
+       testCache := GetUserGroupCache(testResolver, &ConfigReaderMock{}, 
&LdapAccessMock{})
+       testCache.resetCache()
+
+       _, err := testCache.GetUserGroup("testuser1")
+       assert.NilError(t, err)
+       assert.Equal(t, 1, testCache.getUGsize())
+
+       // Stamp the entry as resolved far in the past (2× TTL ago).
+       testCache.lock.Lock()
+       testCache.ugs["testuser1"].resolved = time.Now().Unix() - 2*poscache
+       testCache.lock.Unlock()
+
+       testCache.cleanUpCache()
+
+       // The entry must be evicted. With the broken var-now bug this 
assertion fails
+       // because cleanUpCache's threshold is computed from the zero time.Time 
value.
+       assert.Equal(t, 0, testCache.getUGsize(), "stale entry was not evicted 
by cleanUpCache — Bug 1 not fixed")
+}
+
+// TestPositiveCacheHitExpiryTriggersRefresh verifies that GetUserGroup 
re-resolves a user
+// whose positive cache entry has exceeded poscache seconds.
+// Without the Bug 2 fix, the stale entry is returned blindly regardless of 
age.
+func TestPositiveCacheHitExpiryTriggersRefresh(t *testing.T) {
+       testCache := GetUserGroupCache(testResolver, &ConfigReaderMock{}, 
&LdapAccessMock{})
+       testCache.resetCache()
+
+       _, err := testCache.GetUserGroup("testuser1")
+       assert.NilError(t, err)
+
+       // Age the cached entry beyond poscache.
+       testCache.lock.Lock()
+       testCache.ugs["testuser1"].resolved = time.Now().Unix() - 2*poscache
+       staleResolved := testCache.ugs["testuser1"].resolved
+       testCache.lock.Unlock()
+
+       // A fresh lookup must re-resolve and stamp a new resolved timestamp.
+       ug, err := testCache.GetUserGroup("testuser1")
+       assert.NilError(t, err)
+       if ug.resolved == staleResolved {
+               t.Error("GetUserGroup returned stale cached entry after TTL 
expiry — Bug 2 not fixed")
+       }
+
+       // Re-resolution must not duplicate groups (groups are appended during 
resolveGroups;
+       // if the stale entry's Groups slice is reused the count doubles).
+       assert.Equal(t, 2, len(ug.Groups), "re-resolved groups are duplicated — 
stale ug.Groups was not reset before re-resolution")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to