================ @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# +# ===-----------------------------------------------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===-----------------------------------------------------------------------===# + +""" + +Clang-Tidy Alphabetical Order Checker +===================================== + +Normalize Clang-Tidy documentation with deterministic sorting for linting/tests. + +Behavior: +- Sort entries in docs/clang-tidy/checks/list.rst csv-table. +- Sort key sections in docs/ReleaseNotes.rst. +- Detect duplicated entries in 'Changes in existing checks'. + +Flags: + -o/--output Write normalized content to this path instead of updating docs. +""" + +import argparse +import io +import os +import re +import sys +from typing import Dict, List, Optional, Sequence, Tuple, NamedTuple +from operator import itemgetter + +# Matches a :doc:`label <path>` or :doc:`label` reference anywhere in text and +# captures the label. Used to sort bullet items alphabetically in ReleaseNotes +# items by their label. +DOC_LABEL_RN_RE = re.compile(r":doc:`(?P<label>[^`<]+)\s*(?:<[^>]+>)?`") + +# Matches a single csv-table row line in list.rst that begins with a :doc: +# reference, capturing the label. Used to extract the sort key per row. +DOC_LINE_RE = re.compile(r"^\s*:doc:`(?P<label>[^`<]+?)\s*<[^>]+>`.*$") + + +EXTRA_DIR = os.path.join(os.path.dirname(__file__), "../..") +DOCS_DIR = os.path.join(EXTRA_DIR, "docs") +CLANG_TIDY_DOCS_DIR = os.path.join(DOCS_DIR, "clang-tidy") +CHECKS_DOCS_DIR = os.path.join(CLANG_TIDY_DOCS_DIR, "checks") +LIST_DOC = os.path.join(CHECKS_DOCS_DIR, "list.rst") +RELEASE_NOTES_DOC = os.path.join(DOCS_DIR, "ReleaseNotes.rst") + + +CheckLabel = str +Lines = List[str] +BulletBlock = List[str] +BulletItem = Tuple[CheckLabel, BulletBlock] +BulletStart = int + + +class BulletBlocks(NamedTuple): + """Structured result of parsing a bullet-list section. + + - prefix: lines before the first bullet within the section range. + - blocks: list of (label, block-lines) pairs for each bullet block. + - suffix: lines after the last bullet within the section range. + """ + + prefix: Lines + blocks: List[BulletItem] + suffix: Lines + + +class ScannedBlocks(NamedTuple): + """Result of scanning bullet blocks within a section range. + + - blocks_with_pos: list of (start_index, block_lines) for each bullet block. + - next_index: index where scanning stopped; start of the suffix region. + """ + + blocks_with_pos: List[Tuple[BulletStart, BulletBlock]] + next_index: int + + +def _scan_bullet_blocks(lines: Sequence[str], start: int, end: int) -> ScannedBlocks: + """Scan consecutive bullet blocks and return (blocks_with_pos, next_index). + + Each entry in blocks_with_pos is a tuple of (start_index, block_lines). + next_index is the index where scanning stopped (start of suffix). + """ + i = start + n = end + blocks_with_pos: List[Tuple[BulletStart, BulletBlock]] = [] + while i < n: + if not _is_bullet_start(lines[i]): + break + bstart = i + i += 1 + while i < n and not _is_bullet_start(lines[i]): + if ( + i + 1 < n + and set(lines[i + 1].rstrip("\n")) == {"^"} + and lines[i].strip() + ): + break + i += 1 + block: BulletBlock = list(lines[bstart:i]) + blocks_with_pos.append((bstart, block)) + return ScannedBlocks(blocks_with_pos, i) + + +def read_text(path: str) -> List[str]: + with io.open(path, "r", encoding="utf-8") as f: + return f.read().splitlines(True) + + +def write_text(path: str, content: str) -> None: + with io.open(path, "w", encoding="utf-8", newline="") as f: + f.write(content) + + +def _normalize_list_rst_lines(lines: Sequence[str]) -> List[str]: + """Return normalized content of checks list.rst as a list of lines.""" + out: List[str] = [] + i = 0 + n = len(lines) + + def check_name(line: str): + m = DOC_LINE_RE.match(line) + if not m: + return (1, "") + return (0, m.group("label")) + + while i < n: + line = lines[i] + if line.lstrip().startswith(".. csv-table::"): + out.append(line) + i += 1 + + while i < n and (lines[i].startswith(" ") or lines[i].strip() == ""): + if DOC_LINE_RE.match(lines[i]): + break + out.append(lines[i]) + i += 1 + + entries: List[str] = [] + while i < n and lines[i].startswith(" "): + entries.append(lines[i]) + i += 1 + + entries_sorted = sorted(entries, key=check_name) + out.extend(entries_sorted) + continue + + out.append(line) + i += 1 + + return out + + +def normalize_list_rst(data: str) -> str: + """Normalize list.rst content and return a string.""" + lines = data.splitlines(True) + return "".join(_normalize_list_rst_lines(lines)) + + +def find_heading(lines: Sequence[str], title: str) -> Optional[int]: + """Find heading start index for a section underlined with ^ characters. + + The function looks for a line equal to `title` followed by a line that + consists solely of ^, which matches the ReleaseNotes style for subsection + headings used here. + + Returns index of the title line, or None if not found. + """ + for i in range(len(lines) - 1): + if lines[i].rstrip("\n") == title: + underline = lines[i + 1].rstrip("\n") + if underline and set(underline) == {"^"} and len(underline) == len(title): + return i + return None + + +def extract_label(text: str) -> str: + m = DOC_LABEL_RN_RE.search(text) + return m.group("label") if m else text + + +def _is_bullet_start(line: str) -> bool: + return line.startswith("- ") + + +def _parse_bullet_blocks(lines: Sequence[str], start: int, end: int) -> BulletBlocks: + i = start + n = end + first_bullet = i + while first_bullet < n and not _is_bullet_start(lines[first_bullet]): + first_bullet += 1 + prefix: Lines = list(lines[i:first_bullet]) + + blocks: List[BulletItem] = [] + res = _scan_bullet_blocks(lines, first_bullet, n) + for _, block in res.blocks_with_pos: + key: CheckLabel = extract_label(block[0]) + blocks.append((key, block)) + + suffix: Lines = list(lines[res.next_index : n]) + return BulletBlocks(prefix, blocks, suffix) + + +def sort_blocks(blocks: List[BulletItem]) -> List[BulletBlock]: + """Return blocks sorted deterministically by their extracted label. + + Duplicates are preserved; merging is left to authors to handle manually. + """ + return list(map(itemgetter(1), sorted(blocks, key=itemgetter(0)))) + + +def find_duplicate_entries( + lines: Sequence[str], title: str +) -> List[Tuple[str, List[Tuple[int, List[str]]]]]: + """Return detailed duplicate info as (key, [(start_idx, block_lines), ...]). + + start_idx is the 0-based index of the first line of the bullet block in + the original lines list. Only keys with more than one occurrence are + returned, and occurrences are listed in the order they appear. + """ + bounds = _find_section_bounds(lines, title, None) + if bounds is None: + return [] + _, sec_start, sec_end = bounds + + i = sec_start + n = sec_end + + while i < n and not _is_bullet_start(lines[i]): + i += 1 + + blocks_with_pos: List[Tuple[str, int, List[str]]] = [] + res = _scan_bullet_blocks(lines, i, n) + for bstart, block in res.blocks_with_pos: + key = extract_label(block[0]) + blocks_with_pos.append((key, bstart, block)) + + grouped: Dict[str, List[Tuple[int, List[str]]]] = {} ---------------- vbvictor wrote:
ditto https://github.com/llvm/llvm-project/pull/166072 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
