On Fri, Aug 08, 2025 at 04:44:02PM +0000, Marat Khalili wrote:
> Thank you for doing this! Very cool script, see couple of nits below.
> 
> > -----Original Message-----
> > From: Bruce Richardson <bruce.richard...@intel.com>
> > Sent: Friday 8 August 2025 15:27
> > To: dev@dpdk.org
> > Cc: Bruce Richardson <bruce.richard...@intel.com>
> > Subject: [PATCH 1/2] devtools/mailmap_ctl: script to work with mailmap
> > 
> > Add a script to easily add entries to, check and sort the mailmap file.
> > 
> > Signed-off-by: Bruce Richardson <bruce.richard...@intel.com>
> > ---
> >  devtools/mailmap_ctl.py | 211 ++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 211 insertions(+)
> >  create mode 100755 devtools/mailmap_ctl.py
> > 
> > diff --git a/devtools/mailmap_ctl.py b/devtools/mailmap_ctl.py
> > new file mode 100755
> > index 0000000000..ffb7bcd69b
> > --- /dev/null
> > +++ b/devtools/mailmap_ctl.py
> > @@ -0,0 +1,211 @@
> > +#!/usr/bin/env python3
> > +# SPDX-License-Identifier: BSD-3-Clause
> > +# Copyright(c) 2025 Intel Corporation
> > +
> > +"""
> > +A tool for manipulating the .mailmap file in DPDK repository.
> > +
> > +This script supports three operations:
> > +- add: adds a new entry to the mailmap file in the correct position
> > +- check: validates mailmap entries are sorted and correctly formatted
> > +- sort: sorts the mailmap entries alphabetically by name
> > +"""
> > +
> > +import sys
> > +import os
> > +import re
> > +import argparse
> > +import unicodedata
> > +from pathlib import Path
> > +from dataclasses import dataclass
> > +from typing import List, Optional
> > +
> > +
> > +@dataclass
> > +class MailmapEntry:
> > +    """Represents a single mailmap entry."""
> > +
> > +    name: str
> > +    name_for_sorting: str
> > +    email1: str
> > +    email2: Optional[str]
> > +    line_number: int
> > +
> > +    def __str__(self) -> str:
> > +        """Format the entry back to mailmap string format."""
> > +        return f"{self.name} <{self.email1}>" + (f" <{self.email2}>" if 
> > self.email2 else "")
> > +
> > +    @staticmethod
> > +    def _get_name_for_sorting(name):
> > +        """Normalize a name for sorting purposes."""
> > +        # Remove accents/diacritics. Separate accented chars into two - so 
> > accent is separate,
> > +        # then remove the accent.
> > +        normalized = unicodedata.normalize("NFD", name)
> > +        normalized = "".join(c for c in normalized if 
> > unicodedata.category(c) != "Mn")
> > +
> > +        return normalized.lower()
> > +
> > +    @classmethod
> > +    def parse(cls, line: str, line_number: int) -> 
> > Optional["MailmapEntry"]:
> > +        """
> > +        Parse a mailmap line and create a MailmapEntry instance.
> > +
> > +        Valid formats:
> > +        - Name <email>
> > +        - Name <primary_email> <secondary_email>
> > +        """
> > +        line = line.strip()
> > +        if not line or line.startswith("#"):
> > +            return None
> > +
> > +        # Pattern to match mailmap entries
> > +        # Group 1: Name, Group 2: first email, Group 3: optional second 
> > email
> > +        pattern = r"^([^<]+?)\s*<([^>]+)>(?:\s*<([^>]+)>)?$"
> > +        match = re.match(pattern, line)
> > +        if not match:
> > +            return None
> > +
> > +        name = match.group(1).strip()
> > +        primary_email = match.group(2).strip()
> > +        secondary_email = match.group(3).strip() if match.group(3) else 
> > None
> > +
> > +        return cls(
> > +            name=name,
> > +            name_for_sorting=cls._get_name_for_sorting(name),
> > +            email1=primary_email,
> > +            email2=secondary_email,
> > +            line_number=line_number,
> > +        )
> > +
> > +
> > +def read_and_parse_mailmap(mailmap_path: Path) -> List[MailmapEntry]:
> > +    """Read and parse a mailmap file, returning entries."""
> > +    try:
> > +        with open(mailmap_path, "r", encoding="utf-8") as f:
> > +            lines = f.readlines()
> > +    except IOError as e:
> > +        print(f"Error reading {mailmap_path}: {e}", file=sys.stderr)
> > +        sys.exit(1)
> > +
> > +    entries = []
> > +    line_num = 0
> > +
> > +    for line in lines:
> > +        line_num += 1
> 
> nit: could use `for line_num, line in enumerate(lines, 1)`.
> 
Ack. Will change in V2.

> > +        stripped_line = line.strip()
> > +
> > +        # Skip empty lines and comments
> > +        if not stripped_line or stripped_line.startswith("#"):
> > +            continue
> > +
> > +        entry = MailmapEntry.parse(stripped_line, line_num)
> > +        if entry is None:
> > +            print(f"Line {line_num}: Invalid format - {stripped_line}", 
> > file=sys.stderr)
> > +            continue
> 
> Should we fail here instead of continuing? If the operation is check, the 
> check should not pass. If the operation is add, we probably don't want to 
> simply remove everything we couldn't parse.
> 
Adding a fail-on-error parameter to the function to handle the two cases.
For "check" op, we continue, for other cases we exit(1).

> > +
> > +        # Check for more than two email addresses
> > +        if stripped_line.count("<") > 2:
> > +            print(f"Line {line_num}: Too many email addresses - 
> > {stripped_line}", file=sys.stderr)
> 
> If this is invalid should we perhaps modify regex to disallow it in 
> MailmapEntry.parse so that it affects new records as well?
> 
Good point - the regex should already enforce this, because it checks the
full entry up to end of line, and only supports an optional second address.
A quick test proves this out - attempting to add a line with 3 email
addresses we get a failure before we reach this point.

Therefore, I'll remove this check completely in V2.

> > +
> > +        entries.append(entry)
> > +    return entries
> > +
> > +
> > +def write_entries_to_file(mailmap_path: Path, entries: List[MailmapEntry]):
> > +    """Write entries to mailmap file."""
> > +    try:
> > +        with open(mailmap_path, "w", encoding="utf-8") as f:
> > +            for entry in entries:
> > +                f.write(str(entry) + "\n")
> > +    except IOError as e:
> > +        print(f"Error writing {mailmap_path}: {e}", file=sys.stderr)
> > +        sys.exit(1)
> > +
> > +
> > +def check_mailmap(mailmap_path, _):
> > +    """Check that mailmap entries are correctly sorted and formatted."""
> 
> As noted above, it will not fail if some entries are incorrectly formatted.
> 
> Also, we could probably check for duplicates.
> 
Yes, but I will leave this for future work, as I don't believe it's a
problem we currently have with our mailmap file.

> > +    entries = read_and_parse_mailmap(mailmap_path)
> > +
> > +    errors = 0
> > +    for i in range(1, len(entries)):
> > +        if entries[i].name_for_sorting < entries[i - 1].name_for_sorting:
> 
> nit: could use `for entry1, entry2 in itertools.pairwise(entries):`
> 
Interesting. Will test this option out.

> > +            print(
> > +                f"Line {entries[i].line_number}: Entry '{entries[i].name}' 
> > is incorrectly sorted",
> > +                file=sys.stderr,
> > +            )
> > +            errors += 1
> > +
> > +    if errors:
> > +        sys.exit(1)
> > +
> > +
> > +def sort_mailmap(mailmap_path, _):
> > +    """Sort the mailmap entries alphabetically by name."""
> 
> Should we warn user somewhere that all comments are going to be deleted?
> Should we allow comments at all if this is what we do?
> 
Again, in DPDK case, we don't have comments so this is not an issue.
However, I'll add a note to the usage details.

> > +    entries = read_and_parse_mailmap(mailmap_path)
> > +
> > +    entries.sort(key=lambda x: x.name_for_sorting)
> > +    write_entries_to_file(mailmap_path, entries)
> > +
> > +
> > +def add_entry(mailmap_path, args):
> > +    """Add a new entry to the mailmap file in the correct alphabetical 
> > position."""
> > +    if not args.entry:
> 
> nit: it is possible to make argparse check it using subparsers or groups.
> 
> > +        print("Error: 'add' operation requires an entry argument", 
> > file=sys.stderr)
> > +        sys.exit(1)
> > +
> > +    new_entry = MailmapEntry.parse(args.entry, 0)
> > +    if new_entry is None:
> 
> nit: it is possible to make argparse convert argument to MailmapEntry and 
> report error to the user in a standard way if it fails, but it will require 
> some redesign of MailmapEntry so maybe not worth it.
> 

Something to investigate. May not make V2 of this patch.

> > +        print(f"Error: Invalid entry format: {args.entry}", 
> > file=sys.stderr)
> > +        sys.exit(1)
> > +
> > +    entries = read_and_parse_mailmap(mailmap_path)
> > +
> > +    # Check if entry already exists, checking email2 only if it's specified
> > +    if (
> > +        not new_entry.email2
> > +        and any(e.name == new_entry.name and e.email1 == new_entry.email1 
> > for e in entries)
> > +    ) or any(
> 
> This will usually trigger even when `not new_entry.email2`. 
> 
Can you clarify this comment? Is there something I need to fix here?

> > +        e.name == new_entry.name and e.email1 == new_entry.email1 and 
> > e.email2 == new_entry.email2
> > +        for e in entries
> > +    ):
> > +        print(
> > +            f"Warning: Duplicate entry for '{new_entry.name} 
> > <{new_entry.email1}>' already exists",
> 
> Probably not a "Warning" if we exit with error code right after.
> 
Good point, I'll change it to an error.

> Also the error message is slightly misleading when the second any returns 
> true. I'd split this into two independent checks each with own error message, 
> and select between them depending on the presence of new_entry.email2.

Only very slightly misleading, IMHO, so I don't think it's worth adding a
different error message for the second case.

> 
> > +            file=sys.stderr,
> > +        )
> > +        sys.exit(1)
> > +
> > +    entries.append(new_entry)
> > +    entries.sort(key=lambda x: x.name_for_sorting)
> > +    write_entries_to_file(mailmap_path, entries)
> > +
> > +
> > +def main():
> > +    """Main function."""
> > +    parser = argparse.ArgumentParser(
> > +        description=__doc__, 
> > formatter_class=argparse.RawDescriptionHelpFormatter
> > +    )
> > +    parser.add_argument("operation", choices=["check", "add", "sort"], 
> > help="Operation to perform")
> 
> Can we build choices from keys of operations dict?
> 
Yes we can. Nice design change which makes it easier to add new ops in
future.

> > +    parser.add_argument("--mailmap", help="Path to .mailmap file (default: 
> > search up tree)")
> > +    parser.add_argument("entry", nargs="?", help='Entry to add. Format: 
> > "Name <em...@domain.com>"')
> 
> Secondary email is not mentioned. Actually, if I want to add a secondary 
> email when I already have primary, what do I do?
> 
You hand-edit for now! :-)
That is something that I think we should add in future, but I'm keeping it
simple for now. [For most of the patch application that I do to my tree,
it's only adding completely new entries to mailmap, so getting that working
was my primary concern]

> > +
> > +    args = parser.parse_args()
> > +
> > +    if args.mailmap:
> > +        mailmap_path = Path(args.mailmap)
> > +    else:
> > +        # Find mailmap file
> > +        mailmap_path = Path(".").resolve()
> > +        while not (mailmap_path / ".mailmap").exists():
> > +            if mailmap_path == mailmap_path.parent:
> > +                print("Error: No .mailmap file found", file=sys.stderr)
> > +                sys.exit(1)
> > +            mailmap_path = mailmap_path.parent
> > +        mailmap_path = mailmap_path / ".mailmap"
> > +
> > +    # Handle operations
> > +    operations = {"add": add_entry, "check": check_mailmap, "sort": 
> > sort_mailmap}
> > +    operations[args.operation](mailmap_path, args)
> > +
> > +
> > +if __name__ == "__main__":
> > +    main()
> > --
> > 2.48.1
> 

Reply via email to