hartmannathan commented on code in PR #16962:
URL: https://github.com/apache/nuttx/pull/16962#discussion_r2323902595


##########
tools/codeowners/codeowners.py:
##########
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+
+import re
+import subprocess
+import sys
+from multiprocessing import Pool
+from typing import TypeAlias
+
+# Number of processes to use
+N_PROCESSES: int = 20
+
+# Top `n` authors to list as code owners in the CODEOWNERS file
+TOP_N_AUTHORS: int = 5
+
+# Command to get author email and number of changes
+CHANGE_STATS_CMD: list[str] = [
+    "git",
+    "--no-pager",
+    "log",
+    '--pretty="%ae"',
+    "--shortstat",
+]
+
+# Command to list all files tracked by git
+GIT_FILE_CMD: list[str] = ["git", "ls-files"]
+
+# Locations to ignore for code owner generation
+IGNORE_LOCS: list[str] = [
+    # Generated release notes
+    "Documentation/ReleaseNotes/*",
+    # Boiler plate files
+    "**/__init__.py",
+    # Files that aren't really necessary to have owners
+    "**/Make.defs",
+    "**/CMakeLists.txt",
+    # Image files from documentation
+    "*.png",
+    "*.jpg",
+]
+
+# Represents a code owner by email
+CodeOwner: TypeAlias = str
+
+
+class Ownership:
+    """Represents an ownership relationship for a path."""
+
+    def __init__(self, owner: CodeOwner, path: str, changes: int) -> None:
+        """Creates a new ownership relationship."""
+        self.owner: CodeOwner = owner
+        self.path: str = path
+        self.changes: int = changes
+
+    def __str__(self) -> str:
+        return f"Ownership(path='{self.path}', owner='{self.owner}', 
changes={self.changes})"
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+
+# Represents all contributors to a file
+Roster: TypeAlias = dict[str, list[Ownership]]
+
+
+def parse_change_count(changestr: str) -> int:
+    """Parses a string summary of changes to the file into an integer number 
of changes."""
+
+    change_types = changestr.split(",")
+
+    # Skip the number of files changed, which is always the first index
+    count = 0
+    for change in change_types[1:]:
+        value = re.search(r"\d+", change)
+        if value is not None:
+            count += int(value.group())
+
+    return count
+
+
+def get_owners(path: str) -> list[Ownership]:
+    """Gets ownership relationships for `path`."""
+
+    cmd = CHANGE_STATS_CMD.copy() + [path]
+    raw_logs = subprocess.run(cmd, capture_output=True, text=True).stdout
+    logs = [e for e in raw_logs.split("\n") if e != ""]
+
+    ownerships: dict[CodeOwner, Ownership] = dict()
+    for raw_email, raw_changes in zip(logs[::2], logs[1::2]):
+        owner = raw_email.replace('"', "")
+        change_count = parse_change_count(raw_changes)
+
+        if ownerships.get(owner) is None:
+            ownerships[owner] = Ownership(
+                path=path,
+                owner=owner,
+                changes=change_count,
+            )
+        else:
+            ownerships[owner].changes += change_count
+
+    return list(ownerships.values())
+
+
+def sort_by_changes(owners: list[Ownership]) -> None:
+    """Sorts `owners` by the number of changes each owner has made, from most 
to least changes."""
+    owners.sort(key=lambda o: o.changes, reverse=True)
+
+
+def filter_owners(owners: list[Ownership], ignore: list[CodeOwner]) -> 
list[Ownership]:
+    """Returns a list composing of the original `owners` list, but without any 
of the owners in the `ignore` list."""
+    return [o for o in owners if o.owner not in ignore]
+
+
+def get_files() -> list[str]:
+    """Returns a list of currently tracked files in the git repository."""
+
+    ignored_dirs = []
+    for loc in IGNORE_LOCS:
+        ignored_dirs.append(f":!:{loc}")
+    cmd = GIT_FILE_CMD.copy() + ["--"] + ignored_dirs
+    return (
+        subprocess.run(cmd, text=True, 
capture_output=True).stdout.strip().split("\n")
+    )
+
+
+def parse_ignore_list(path: str) -> list[CodeOwner]:
+    """Parses a file of code owners to ignore into a list."""
+
+    owners = []
+    with open(path, "r") as file:
+        for line in file:
+            owners.append(line.strip())
+    return owners
+
+
+def generate_codeowners(roster: Roster) -> None:
+    """Generates the output code-owner file in the console from the 
`roster`."""
+
+    for file, ownerships in roster.items():
+        print(file, end=" ")
+        for o in ownerships:
+            print(o.owner, end=" ")
+        print()
+
+
+def main() -> None:

Review Comment:
   If not invoked from top level dir, it should print some descriptive message 
to hint the user to run it from top level dir.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@nuttx.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to