This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git

commit 192c98816afb8f0610b96e9e8f078af9c64b232f
Author: Sean B. Palmer <[email protected]>
AuthorDate: Thu Mar 5 15:05:01 2026 +0000

    Use a strict structured subset of LDAP results only
---
 atr/admin/__init__.py                |  1 -
 atr/admin/templates/ldap-lookup.html |  8 ++--
 atr/ldap.py                          | 82 +++++++++++++++++++++---------------
 atr/principal.py                     | 31 +++++---------
 atr/util.py                          | 25 +++--------
 tests/unit/test_message.py           | 16 ++++++-
 6 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/atr/admin/__init__.py b/atr/admin/__init__.py
index 6a759d93..b877005d 100644
--- a/atr/admin/__init__.py
+++ b/atr/admin/__init__.py
@@ -543,7 +543,6 @@ async def ldap_post(session: web.Committer, lookup_form: 
LdapLookupForm) -> str:
             email_query=email_query,
             bind_dn_from_config=bind_dn,
             bind_password_from_config=bind_password,
-            email_only=False,
         )
         await asyncio.to_thread(ldap.search, ldap_params)
         end = time.perf_counter_ns()
diff --git a/atr/admin/templates/ldap-lookup.html 
b/atr/admin/templates/ldap-lookup.html
index 7278e96f..a2482ccf 100644
--- a/atr/admin/templates/ldap-lookup.html
+++ b/atr/admin/templates/ldap-lookup.html
@@ -48,9 +48,9 @@
               <tbody>
                 {% for result in ldap_params.results_list %}
                   <tr>
-                    <td>{{ result.get('uid', [''])[0] }}</td>
-                    <td>{{ result.get('cn', [''])[0] }}</td>
-                    <td>{{ result.get('mail', [''])[0] }}</td>
+                    <td>{{ (result.uid or [''])[0] }}</td>
+                    <td>{{ (result.cn or [''])[0] }}</td>
+                    <td>{{ (result.mail or [''])[0] }}</td>
                   </tr>
                 {% endfor %}
               </tbody>
@@ -65,7 +65,7 @@
                   </tr>
                 </thead>
                 <tbody>
-                  {% for key, value in result.items()|sort %}
+                  {% for key, value in 
result.model_dump(by_alias=True).items()|sort %}
                     <tr>
                       <td><strong>{{ key }}</strong></td>
                       <td>
diff --git a/atr/ldap.py b/atr/ldap.py
index eddd112e..e05fe99b 100644
--- a/atr/ldap.py
+++ b/atr/ldap.py
@@ -19,17 +19,46 @@ import asyncio
 import collections
 import dataclasses
 import ssl
-from typing import Any, Final, Literal
+from typing import Final, Literal
 
 import ldap3
 import ldap3.utils.conv as conv
 import ldap3.utils.dn as dn
 
+import atr.models.schema as schema
+
 LDAP_ROOT_BASE: Final[str] = 
"cn=infrastructure-root,ou=groups,ou=services,dc=apache,dc=org"
 LDAP_SEARCH_BASE: Final[str] = "ou=people,dc=apache,dc=org"
 LDAP_SERVER_HOST: Final[str] = "ldap-eu.apache.org"
 LDAP_TOOLING_BASE: Final[str] = 
"cn=tooling,ou=groups,ou=services,dc=apache,dc=org"
 
+RESULT_ATTRIBUTES: Final[list[str]] = [
+    "asf-altEmail",
+    "asf-banned",
+    "asf-committer-email",
+    "cn",
+    "mail",
+    "member",
+    "memberUid",
+    "uid",
+]
+
+
+class Result(schema.Strict):
+    model_config = schema.pydantic.ConfigDict(
+        extra="forbid", strict=True, validate_assignment=True, 
populate_by_name=True
+    )
+
+    dn: str
+    asf_alt_email: list[str] = schema.Field(default_factory=list, 
alias="asf-altEmail")
+    asf_banned: list[str] = schema.Field(default_factory=list, 
alias="asf-banned")
+    asf_committer_email: list[str] = schema.Field(default_factory=list, 
alias="asf-committer-email")
+    cn: list[str] = schema.Field(default_factory=list)
+    mail: list[str] = schema.Field(default_factory=list)
+    member: list[str] = schema.Field(default_factory=list)
+    member_uid: list[str] = schema.Field(default_factory=list, 
alias="memberUid")
+    uid: list[str] = schema.Field(default_factory=list)
+
 
 _tls_config = ldap3.Tls(
     validate=ssl.CERT_REQUIRED,
@@ -63,22 +92,20 @@ class Search:
         ldap_scope: Literal["BASE", "LEVEL", "SUBTREE"],
         ldap_query: str = "(objectClass=*)",
         ldap_attrs: list[str] | None = None,
-    ) -> list[dict[str, Any]]:
+    ) -> list[Result]:
         if not self._conn:
             raise RuntimeError("LDAP connection not available")
 
-        attributes = ldap_attrs if ldap_attrs else ldap3.ALL_ATTRIBUTES
+        attributes = ldap_attrs if ldap_attrs else RESULT_ATTRIBUTES
         self._conn.search(
             search_base=ldap_base,
             search_filter=ldap_query,
             search_scope=ldap_scope,
             attributes=attributes,
         )
-        results = []
+        results: list[Result] = []
         for entry in self._conn.entries:
-            result_item: dict[str, str | list[str]] = {"dn": entry.entry_dn}
-            result_item.update(entry.entry_attributes_as_dict)
-            results.append(result_item)
+            results.append(Result.model_validate({"dn": entry.entry_dn, 
**entry.entry_attributes_as_dict}))
         return results
 
 
@@ -95,19 +122,18 @@ class SearchParameters:
     github_nid_query: int | None = None
     bind_dn_from_config: str | None = None
     bind_password_from_config: str | None = None
-    results_list: list[dict[str, str | list[str]]] = 
dataclasses.field(default_factory=list)
+    results_list: list[Result] = dataclasses.field(default_factory=list)
     err_msg: str | None = None
     srv_info: str | None = None
     detail_err: str | None = None
     connection: ldap3.Connection | None = None
-    email_only: bool = False
 
 
-async def account_lookup(asf_uid: str) -> dict[str, str | list[str]] | None:
+async def account_lookup(asf_uid: str) -> Result | None:
     """
     Look up an account in LDAP by ASF UID.
 
-    Returns the account details dict if found, None if the account does not 
exist.
+    Returns the account details if found, None if the account does not exist.
     If LDAP is not configured, returns None to avoid breaking functionality.
     """
     credentials = get_bind_credentials()
@@ -146,10 +172,7 @@ async def fetch_admin_users() -> frozenset[str]:
                     result = ldap_search.search(ldap_base=base, 
ldap_scope="BASE")
                     if (not result) or (len(result) != 1):
                         continue
-                    members = result[0].get("member", [])
-                    if not isinstance(members, list):
-                        continue
-                    for member_dn in members:
+                    for member_dn in result[0].member:
                         parsed = parse_dn(member_dn)
                         uids = parsed.get("uid", [])
                         if uids:
@@ -179,10 +202,7 @@ async def fetch_tooling_users(extra: set[str]) -> set[str]:
                     result = ldap_search.search(ldap_base=base, 
ldap_scope="BASE")
                     if (not result) or (len(result) != 1):
                         continue
-                    members = result[0].get("member", [])
-                    if not isinstance(members, list):
-                        continue
-                    for member_dn in members:
+                    for member_dn in result[0].member:
                         parsed = parse_dn(member_dn)
                         uids = parsed.get("uid", [])
                         if uids:
@@ -217,10 +237,9 @@ async def github_to_apache(github_numeric_uid: int) -> str:
         github_nid_query=github_numeric_uid,
     )
     await asyncio.to_thread(search, ldap_params)
-    if not (ldap_params.results_list and ("uid" in 
ldap_params.results_list[0])):
+    if not (ldap_params.results_list and ldap_params.results_list[0].uid):
         raise LookupError(f"GitHub NID {github_numeric_uid} not registered 
with the ATR")
-    ldap_uid_val = ldap_params.results_list[0]["uid"]
-    return ldap_uid_val[0] if isinstance(ldap_uid_val, list) else ldap_uid_val
+    return ldap_params.results_list[0].uid[0]
 
 
 async def is_active(asf_uid: str) -> bool:
@@ -236,13 +255,10 @@ async def is_active(asf_uid: str) -> bool:
     return not is_banned(account)
 
 
-def is_banned(account: dict[str, str | list[str]]) -> bool:
-    banned_attr = account.get("asf-banned", "no")
-    # This is mostly for the type checker, but since asf-banned is missing 
from non-banned accounts,
-    # it should be safe to say if it has any value then the account is banned.
-    if not isinstance(banned_attr, str):
-        return True
-    return banned_attr.lower() == "yes"
+def is_banned(account: Result) -> bool:
+    # In ASF LDAP, non banned accounts do not carry this attribute
+    # Therefore, we treat any present value as banned
+    return bool(account.asf_banned)
 
 
 def parse_dn(dn_string: str) -> dict[str, list[str]]:
@@ -322,17 +338,13 @@ def _search_core_2(params: SearchParameters, filters: 
list[str]) -> None:
         params.err_msg = "LDAP Connection object not established or auto_bind 
failed."
         return
 
-    email_attributes = ["uid", "mail", "asf-altEmail", "asf-committer-email"]
-    attributes = email_attributes if params.email_only else 
ldap3.ALL_ATTRIBUTES
     params.connection.search(
         search_base=LDAP_SEARCH_BASE,
         search_filter=search_filter,
-        attributes=attributes,
+        attributes=RESULT_ATTRIBUTES,
     )
     for entry in params.connection.entries:
-        result_item: dict[str, str | list[str]] = {"dn": entry.entry_dn}
-        result_item.update(entry.entry_attributes_as_dict)
-        params.results_list.append(result_item)
+        params.results_list.append(Result.model_validate({"dn": 
entry.entry_dn, **entry.entry_attributes_as_dict}))
 
     if (not params.results_list) and (not params.err_msg):
         params.err_msg = "No results found for the given criteria."
diff --git a/atr/principal.py b/atr/principal.py
index 2e602b33..d6e4d190 100644
--- a/atr/principal.py
+++ b/atr/principal.py
@@ -92,7 +92,7 @@ class Committer:
             log.info(f"Took {finish - start:,} ns to get committer details")
 
             start = time.perf_counter_ns()
-            member_list = self._get_group_membership(ldap_search, 
LDAP_MEMBER_BASE, "memberUid", 100)
+            member_list = self._get_group_membership(ldap_search, 
LDAP_MEMBER_BASE, "member_uid", 100)
             self.isMember = self.user in member_list
             finish = time.perf_counter_ns()
             log.info(f"Took {finish - start:,} ns to get member list")
@@ -140,17 +140,16 @@ class Committer:
             raise CommitterError("An unknown error occurred while fetching 
user details.") from ex
 
         data = result[0]
-        if data.get("asf-banned"):
+        if data.asf_banned:
             raise CommitterError(
                 "This account has been administratively locked. Please contact 
[email protected] for further details."
             )
 
-        fn = data.get("cn")
-        if not (isinstance(fn, list) and (len(fn) == 1)):
+        if not (len(data.cn) == 1):
             raise CommitterError("Common backend assertions failed, LDAP 
corruption?")
-        self.fullname = fn[0]
-        self.emails = attr_to_list(data.get("mail"))
-        self.altemails = attr_to_list(data.get("asf-altEmail"))
+        self.fullname = data.cn[0]
+        self.emails = list(set(data.mail))
+        self.altemails = list(set(data.asf_alt_email))
 
     def _get_group_membership(
         self, ldap_search: ldap.Search, ldap_base: str, attribute: str, 
min_members: int = 0
@@ -170,9 +169,7 @@ class Committer:
                 f"An unknown error occurred while fetching group memberships 
from {ldap_base}."
             ) from ex
 
-        members = result[0].get(attribute)
-        if not isinstance(members, list):
-            raise CommitterError("Common backend assertions failed, LDAP 
corruption?")
+        members = getattr(result[0], attribute)
         if len(members) < min_members:
             raise CommitterError("Common backend assertions failed, LDAP 
corruption?")
         return members
@@ -191,13 +188,10 @@ class Committer:
 
         committees_or_projects = []
         for hit in result:
-            if not isinstance(hit, dict):
-                raise CommitterError("Common backend assertions failed, LDAP 
corruption?")
-            cn = hit.get("cn")
-            if not (isinstance(cn, list) and (len(cn) == 1)):
+            if not (len(hit.cn) == 1):
                 raise CommitterError("Common backend assertions failed, LDAP 
corruption?")
-            committee_or_project_name = cn[0]
-            if not (committee_or_project_name and 
isinstance(committee_or_project_name, str)):
+            committee_or_project_name = hit.cn[0]
+            if not committee_or_project_name:
                 raise CommitterError("Common backend assertions failed, LDAP 
corruption?")
             committees_or_projects.append(committee_or_project_name)
         return committees_or_projects
@@ -404,11 +398,6 @@ class Authorisation(AsyncObject):
         return self.__authoriser.member_of(self.__asf_uid)
 
 
-def attr_to_list(attr):
-    """Converts a list of bytestring attribute values to a unique list of 
strings."""
-    return list(set([value for value in attr or []]))
-
-
 def get_ldap_bind_dn_and_password() -> tuple[str, str]:
     conf = config.get()
     bind_dn = conf.LDAP_BIND_DN
diff --git a/atr/util.py b/atr/util.py
index 4c6ff5d7..254512e1 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -144,10 +144,9 @@ def as_url(func: Callable, **kwargs: Any) -> str:
 def asf_uid_from_email(email: str) -> str | None:
     ldap_params = ldap.SearchParameters(email_query=email)
     ldap.search(ldap_params)
-    if not (ldap_params.results_list and ("uid" in 
ldap_params.results_list[0])):
+    if not (ldap_params.results_list and ldap_params.results_list[0].uid):
         return None
-    ldap_uid_val = ldap_params.results_list[0]["uid"]
-    return ldap_uid_val[0] if isinstance(ldap_uid_val, list) else ldap_uid_val
+    return ldap_params.results_list[0].uid[0]
 
 
 async def asf_uid_from_uids(
@@ -402,14 +401,6 @@ async def email_mid_from_thread_id(thread_id: str) -> 
tuple[str, str]:
 
 
 async def email_to_uid_map() -> dict[str, str]:
-    def values(entry: dict, prop: str) -> list[str]:
-        raw_values = entry.get(prop, [])
-        if isinstance(raw_values, list):
-            return [v for v in raw_values if v]
-        if raw_values:
-            return [raw_values]
-        return []
-
     # Get all email addresses in LDAP
     conf = config.AppConfig()
     bind_dn = conf.LDAP_BIND_DN
@@ -418,20 +409,18 @@ async def email_to_uid_map() -> dict[str, str]:
         uid_query="*",
         bind_dn_from_config=bind_dn,
         bind_password_from_config=bind_password,
-        email_only=True,
     )
     await asyncio.to_thread(ldap.search, ldap_params)
 
     # Map the LDAP addresses to Apache UIDs
-    email_to_uid = {}
+    email_to_uid: dict[str, str] = {}
     for entry in ldap_params.results_list:
-        uid = entry.get("uid", [""])[0]
-        uid_lower = uid.lower()
-        for mail in values(entry, "mail"):
+        uid_lower = (entry.uid[0] if entry.uid else "").lower()
+        for mail in entry.mail:
             email_to_uid[mail.lower()] = uid_lower
-        for alt_email in values(entry, "asf-altEmail"):
+        for alt_email in entry.asf_alt_email:
             email_to_uid[alt_email.lower()] = uid_lower
-        for committer_email in values(entry, "asf-committer-email"):
+        for committer_email in entry.asf_committer_email:
             email_to_uid[committer_email.lower()] = uid_lower
     return email_to_uid
 
diff --git a/tests/unit/test_message.py b/tests/unit/test_message.py
index 1194061a..048d4538 100644
--- a/tests/unit/test_message.py
+++ b/tests/unit/test_message.py
@@ -23,6 +23,7 @@ from typing import TYPE_CHECKING
 
 import pytest
 
+import atr.ldap as ldap
 import atr.tasks.message as message
 
 if TYPE_CHECKING:
@@ -34,7 +35,14 @@ async def test_send_rejects_banned_asf_account(monkeypatch: 
"MonkeyPatch") -> No
     """Test that a banned ASF account raises SendError."""
     monkeypatch.setattr(
         "atr.tasks.message.ldap.account_lookup",
-        mock.AsyncMock(return_value={"uid": "banneduser", "cn": "Banned User", 
"asf-banned": "yes"}),
+        mock.AsyncMock(
+            return_value=ldap.Result(
+                dn="uid=banneduser,ou=people,dc=apache,dc=org",
+                uid=["banneduser"],
+                cn=["Banned User"],
+                asf_banned=["yes"],
+            )
+        ),
     )
 
     with pytest.raises(message.SendError, match=r"banned"):
@@ -66,7 +74,11 @@ async def test_send_succeeds_with_valid_asf_id(monkeypatch: 
"MonkeyPatch") -> No
     # ldap.account_lookup returns a dict for a known UID
     monkeypatch.setattr(
         "atr.tasks.message.ldap.account_lookup",
-        mock.AsyncMock(return_value={"uid": "validuser", "cn": "Valid User"}),
+        mock.AsyncMock(
+            return_value=ldap.Result(
+                dn="uid=validuser,ou=people,dc=apache,dc=org", 
uid=["validuser"], cn=["Valid User"]
+            )
+        ),
     )
 
     # Mock the storage.write async context manager chain:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to