alitheg commented on code in PR #1244:
URL: 
https://github.com/apache/tooling-trusted-releases/pull/1244#discussion_r3246987835


##########
atr/admin/__init__.py:
##########
@@ -1293,7 +1294,7 @@ async def validate_jwt_post(
 
 
 async def _check_keys(fix: bool = False) -> str:
-    email_to_uid = await util.email_to_uid_map()
+    await cache.email_uid_view_or_live()

Review Comment:
   I see two patterns - "ask for the cache to make sure it's populated then 
call a method later that I know accesses the cache" (in this file) or "ask for 
the cache and return it so I can use it" (in keys.py and tabulate.py). Both 
work obviously - do you think we should have both or should we standardise on 
one?



##########
atr/cache.py:
##########
@@ -95,6 +134,153 @@ async def admins_startup_load() -> None:
         log.warning(f"Failed to fetch admin users from LDAP at startup: {e}")
 
 
+def email_uid_apply_delta(uid: str, old_emails: Iterable[str], new_emails: 
Iterable[str]) -> bool:
+    if asfquart.APP is None:
+        return False
+    hashes = asfquart.APP.extensions.get("email_uid_hashes")
+    reverse = asfquart.APP.extensions.get("email_uid_reverse")
+    if (not isinstance(hashes, dict)) or (not isinstance(reverse, dict)):
+        return False
+    uid_lower = uid.lower()
+    old_set = {e.lower() for e in old_emails if e}
+    new_set = {e.lower() for e in new_emails if e}
+    removed = old_set - new_set
+    added = new_set - old_set
+    if (not removed) and (not added):
+        return False
+    existing = set(reverse.get(uid_lower, []))
+    for email in removed:
+        h = _email_uid_hash(email)
+        if hashes.get(h) == uid_lower:
+            del hashes[h]
+        existing.discard(h)
+    for email in added:
+        h = _email_uid_hash(email)
+        hashes[h] = uid_lower
+        existing.add(h)
+    if existing:
+        reverse[uid_lower] = sorted(existing)
+    elif uid_lower in reverse:
+        del reverse[uid_lower]
+    asfquart.APP.extensions["email_uid_refreshed"] = 
datetime.datetime.now(datetime.UTC)
+    return True
+
+
+def email_uid_erase() -> None:
+    cache_path = _email_uid_path()
+    try:
+        cache_path.unlink(missing_ok=True)
+    except OSError as e:
+        log.warning(f"Failed to erase email-to-UID cache: {e}")
+
+
+def email_uid_lookup(email: str) -> str | None:
+    return _email_uid_view().get(email)
+
+
+def email_uid_purge_uid(uid: str) -> bool:
+    if asfquart.APP is None:
+        return False
+    hashes = asfquart.APP.extensions.get("email_uid_hashes")
+    reverse = asfquart.APP.extensions.get("email_uid_reverse")
+    if (not isinstance(hashes, dict)) or (not isinstance(reverse, dict)):
+        return False
+    uid_lower = uid.lower()
+    uid_hashes = reverse.pop(uid_lower, [])
+    if not uid_hashes:
+        return False
+    for h in uid_hashes:
+        if hashes.get(h) == uid_lower:
+            del hashes[h]
+    asfquart.APP.extensions["email_uid_refreshed"] = 
datetime.datetime.now(datetime.UTC)
+    return True
+
+
+async def email_uid_refresh() -> None:
+    import atr.util as util
+
+    email_to_uid = await util.email_to_uid_map()
+    hashes: dict[str, str] = {}
+    reverse: dict[str, list[str]] = {}
+    for email, uid in email_to_uid.items():
+        if (not email) or (not uid):
+            continue
+        h = _email_uid_hash(email)
+        hashes[h] = uid
+        reverse.setdefault(uid, []).append(h)
+    if (not hashes) or (not reverse):
+        log.warning(
+            "Email-to-UID cache refresh produced no usable LDAP email 
mappings; not writing cache: "
+            f"ldap_email_values={len(email_to_uid)}"
+        )
+        raise RuntimeError("Email-to-UID cache refresh produced no usable LDAP 
email mappings")
+    await email_uid_save_to_file(hashes, reverse)
+    _email_uid_update_app_extensions(hashes, reverse)
+    log.info(f"Email-to-UID cache refreshed: {len(hashes)} hashes for 
{len(reverse)} users")
+
+
+async def email_uid_save_current_to_file() -> None:
+    if asfquart.APP is None:
+        return
+    hashes = asfquart.APP.extensions.get("email_uid_hashes")
+    reverse = asfquart.APP.extensions.get("email_uid_reverse")
+    if (not isinstance(hashes, dict)) or (not isinstance(reverse, dict)):
+        return
+    hashes_copy = dict(hashes)
+    reverse_copy = {uid: list(hs) for uid, hs in reverse.items()}
+    await email_uid_save_to_file(hashes_copy, reverse_copy)
+    _email_uid_update_app_extensions(hashes_copy, reverse_copy)
+
+
+async def email_uid_save_to_file(hashes: dict[str, str], reverse: dict[str, 
list[str]]) -> None:
+    cache_path = _email_uid_path()
+    cache_path.parent.mkdir(parents=True, exist_ok=True)
+    cache_data = EmailUidCache(
+        refreshed=datetime.datetime.now(datetime.UTC),
+        hashes=hashes,
+        reverse=reverse,
+    )
+    content = cache_data.model_dump_json()
+    temp_path = cache_path.parent / f".{cache_path.name}.{uuid.uuid4()}.tmp"
+    try:
+        async with aiofiles.open(temp_path, "w") as f:
+            await f.write(content)
+            await f.flush()
+            await asyncio.to_thread(os.fsync, f.fileno())
+        await asyncio.to_thread(os.chmod, temp_path, 0o400)
+        await aiofiles.os.rename(temp_path, cache_path)
+    except Exception:
+        with contextlib.suppress(FileNotFoundError):
+            await aiofiles.os.remove(temp_path)
+        raise
+
+
+async def email_uid_startup_load() -> None:
+    cache_data = await _email_uid_read_from_file_async()

Review Comment:
   Since we erase them on shutdown, surely there's no point trying to load them 
again? In fact, if we erase on startup is there any point in saving to a file 
at all?



##########
atr/util.py:
##########


Review Comment:
   I wonder if `asf_uid_from_email` should be renamed or made internal so that 
the cache version is the main place you can call to map an email?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to