https://github.com/python/cpython/commit/1a7824a927f0706300af7bfc182884a43e2f587a
commit: 1a7824a927f0706300af7bfc182884a43e2f587a
branch: main
author: Peter Bierma <[email protected]>
committer: ZeroIntensity <[email protected]>
date: 2025-12-04T03:14:25Z
summary:

gh-141004: Add a CI job ensuring that new C APIs include documentation 
(GH-142102)

Co-authored-by: Hugo van Kemenade <[email protected]>

files:
A Tools/check-c-api-docs/ignored_c_api.txt
A Tools/check-c-api-docs/main.py
M .github/CODEOWNERS
M .github/workflows/build.yml
M Makefile.pre.in

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 1086b42620479d..6acc156ebff713 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -126,6 +126,9 @@ Doc/howto/clinic.rst          @erlend-aasland @AA-Turner
 # C Analyser
 Tools/c-analyzer/             @ericsnowcurrently
 
+# C API Documentation Checks
+Tools/check-c-api-docs/       @ZeroIntensity
+
 # Fuzzing
 Modules/_xxtestfuzz/          @ammaraskar
 
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8e15400e4978eb..3d889fa128e261 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -142,6 +142,9 @@ jobs:
       - name: Check for unsupported C global variables
         if: github.event_name == 'pull_request'  # $GITHUB_EVENT_NAME
         run: make check-c-globals
+      - name: Check for undocumented C APIs
+        run: make check-c-api-docs
+
 
   build-windows:
     name: >-
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 086adbdf262c48..f3086ec1462b6b 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -3322,6 +3322,11 @@ check-c-globals:
                --format summary \
                --traceback
 
+# Check for undocumented C APIs.
+.PHONY: check-c-api-docs
+check-c-api-docs:
+       $(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py
+
 # Find files with funny names
 .PHONY: funny
 funny:
diff --git a/Tools/check-c-api-docs/ignored_c_api.txt 
b/Tools/check-c-api-docs/ignored_c_api.txt
new file mode 100644
index 00000000000000..e81ffd51e193b2
--- /dev/null
+++ b/Tools/check-c-api-docs/ignored_c_api.txt
@@ -0,0 +1,93 @@
+# pydtrace_probes.h
+PyDTrace_AUDIT
+PyDTrace_FUNCTION_ENTRY
+PyDTrace_FUNCTION_RETURN
+PyDTrace_GC_DONE
+PyDTrace_GC_START
+PyDTrace_IMPORT_FIND_LOAD_DONE
+PyDTrace_IMPORT_FIND_LOAD_START
+PyDTrace_INSTANCE_DELETE_DONE
+PyDTrace_INSTANCE_DELETE_START
+PyDTrace_INSTANCE_NEW_DONE
+PyDTrace_INSTANCE_NEW_START
+PyDTrace_LINE
+# fileobject.h
+Py_FileSystemDefaultEncodeErrors
+Py_FileSystemDefaultEncoding
+Py_HasFileSystemDefaultEncoding
+Py_UTF8Mode
+# pyhash.h
+Py_HASH_EXTERNAL
+# exports.h
+PyAPI_DATA
+Py_EXPORTED_SYMBOL
+Py_IMPORTED_SYMBOL
+Py_LOCAL_SYMBOL
+# modsupport.h
+PyABIInfo_FREETHREADING_AGNOSTIC
+# moduleobject.h
+PyModuleDef_Type
+# object.h
+Py_INVALID_SIZE
+Py_TPFLAGS_HAVE_VERSION_TAG
+Py_TPFLAGS_INLINE_VALUES
+Py_TPFLAGS_IS_ABSTRACT
+# pyexpat.h
+PyExpat_CAPI_MAGIC
+PyExpat_CAPSULE_NAME
+# pyport.h
+Py_ALIGNED
+Py_ARITHMETIC_RIGHT_SHIFT
+Py_CAN_START_THREADS
+Py_FORCE_EXPANSION
+Py_GCC_ATTRIBUTE
+Py_LL
+Py_SAFE_DOWNCAST
+Py_ULL
+Py_VA_COPY
+# unicodeobject.h
+Py_UNICODE_SIZE
+# cpython/methodobject.h
+PyCFunction_GET_CLASS
+# cpython/compile.h
+PyCF_ALLOW_INCOMPLETE_INPUT
+PyCF_COMPILE_MASK
+PyCF_DONT_IMPLY_DEDENT
+PyCF_IGNORE_COOKIE
+PyCF_MASK
+PyCF_MASK_OBSOLETE
+PyCF_SOURCE_IS_UTF8
+# cpython/descrobject.h
+PyDescr_COMMON
+PyDescr_NAME
+PyDescr_TYPE
+PyWrapperFlag_KEYWORDS
+# cpython/fileobject.h
+PyFile_NewStdPrinter
+PyStdPrinter_Type
+Py_UniversalNewlineFgets
+# cpython/setobject.h
+PySet_MINSIZE
+# cpython/ceval.h
+PyUnstable_CopyPerfMapFile
+PyUnstable_PerfTrampoline_CompileCode
+PyUnstable_PerfTrampoline_SetPersistAfterFork
+# cpython/genobject.h
+PyAsyncGenASend_CheckExact
+# cpython/longintrepr.h
+PyLong_BASE
+PyLong_MASK
+PyLong_SHIFT
+# cpython/pyerrors.h
+PyException_HEAD
+# cpython/pyframe.h
+PyUnstable_EXECUTABLE_KINDS
+PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION
+PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR
+PyUnstable_EXECUTABLE_KIND_PY_FUNCTION
+PyUnstable_EXECUTABLE_KIND_SKIP
+# cpython/pylifecycle.h
+Py_FrozenMain
+# cpython/unicodeobject.h
+PyUnicode_IS_COMPACT
+PyUnicode_IS_COMPACT_ASCII
diff --git a/Tools/check-c-api-docs/main.py b/Tools/check-c-api-docs/main.py
new file mode 100644
index 00000000000000..6bdf80a9ae8985
--- /dev/null
+++ b/Tools/check-c-api-docs/main.py
@@ -0,0 +1,193 @@
+import re
+from pathlib import Path
+import sys
+import _colorize
+import textwrap
+
+SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
+SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
+SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
+SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")
+
+CPYTHON = Path(__file__).parent.parent.parent
+INCLUDE = CPYTHON / "Include"
+C_API_DOCS = CPYTHON / "Doc" / "c-api"
+IGNORED = (
+    (CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt")
+    .read_text()
+    .split("\n")
+)
+
+for index, line in enumerate(IGNORED):
+    if line.startswith("#"):
+        IGNORED.pop(index)
+
+MISTAKE = """
+If this is a mistake and this script should not be failing, create an
+issue and tag Peter (@ZeroIntensity) on it.\
+"""
+
+
+def found_undocumented(singular: bool) -> str:
+    some = "an" if singular else "some"
+    s = "" if singular else "s"
+    these = "this" if singular else "these"
+    them = "it" if singular else "them"
+    were = "was" if singular else "were"
+
+    return (
+        textwrap.dedent(
+            f"""
+    Found {some} undocumented C API{s}!
+
+    Python requires documentation on all public C API symbols, macros, and 
types.
+    If {these} API{s} {were} not meant to be public, prefix {them} with a
+    leading underscore (_PySomething_API) or move {them} to the internal C API
+    (pycore_*.h files).
+
+    In exceptional cases, certain APIs can be ignored by adding them to
+    Tools/check-c-api-docs/ignored_c_api.txt
+    """
+        )
+        + MISTAKE
+    )
+
+
+def found_ignored_documented(singular: bool) -> str:
+    some = "a" if singular else "some"
+    s = "" if singular else "s"
+    them = "it" if singular else "them"
+    were = "was" if singular else "were"
+    they = "it" if singular else "they"
+
+    return (
+        textwrap.dedent(
+            f"""
+    Found {some} C API{s} listed in Tools/c-api-docs-check/ignored_c_api.txt, 
but
+    {they} {were} found in the documentation. To fix this, remove {them} from
+    ignored_c_api.txt.
+    """
+        )
+        + MISTAKE
+    )
+
+
+def is_documented(name: str) -> bool:
+    """
+    Is a name present in the C API documentation?
+    """
+    for path in C_API_DOCS.iterdir():
+        if path.is_dir():
+            continue
+        if path.suffix != ".rst":
+            continue
+
+        text = path.read_text(encoding="utf-8")
+        if name in text:
+            return True
+
+    return False
+
+
+def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], 
list[str]]:
+    """
+    Scan a header file for  C API functions.
+    """
+    undocumented: list[str] = []
+    documented_ignored: list[str] = []
+    colors = _colorize.get_colors()
+
+    def check_for_name(name: str) -> None:
+        documented = is_documented(name)
+        if documented and (name in IGNORED):
+            documented_ignored.append(name)
+        elif not documented and (name not in IGNORED):
+            undocumented.append(name)
+
+    for function in SIMPLE_FUNCTION_REGEX.finditer(text):
+        name = function.group(2)
+        if not name.startswith("Py"):
+            continue
+
+        check_for_name(name)
+
+    for macro in SIMPLE_MACRO_REGEX.finditer(text):
+        name = macro.group(1)
+        if not name.startswith("Py"):
+            continue
+
+        if "(" in name:
+            name = name[: name.index("(")]
+
+        check_for_name(name)
+
+    for inline in SIMPLE_INLINE_REGEX.finditer(text):
+        name = inline.group(2)
+        if not name.startswith("Py"):
+            continue
+
+        check_for_name(name)
+
+    for data in SIMPLE_DATA_REGEX.finditer(text):
+        name = data.group(1)
+        if not name.startswith("Py"):
+            continue
+
+        check_for_name(name)
+
+    # Remove duplicates and sort alphabetically to keep the output 
deterministic
+    undocumented = list(set(undocumented))
+    undocumented.sort()
+
+    if undocumented or documented_ignored:
+        print(f"{filename} {colors.RED}BAD{colors.RESET}")
+        for name in undocumented:
+            print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}")
+        for name in documented_ignored:
+            print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} 
{name}")
+    else:
+        print(f"{filename} {colors.GREEN}OK{colors.RESET}")
+
+    return undocumented, documented_ignored
+
+
+def main() -> None:
+    print("Scanning for undocumented C API functions...")
+    files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
+    all_missing: list[str] = []
+    all_found_ignored: list[str] = []
+
+    for file in files:
+        if file.is_dir():
+            continue
+        assert file.exists()
+        text = file.read_text(encoding="utf-8")
+        missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), 
text)
+        all_found_ignored += ignored
+        all_missing += missing
+
+    fail = False
+    to_check = [
+        (all_missing, "missing", found_undocumented(len(all_missing) == 1)),
+        (
+            all_found_ignored,
+            "documented but ignored",
+            found_ignored_documented(len(all_found_ignored) == 1),
+        ),
+    ]
+    for name_list, what, message in to_check:
+        if not name_list:
+            continue
+
+        s = "s" if len(name_list) != 1 else ""
+        print(f"-- {len(name_list)} {what} C API{s} --")
+        for name in name_list:
+            print(f" - {name}")
+        print(message)
+        fail = True
+
+    sys.exit(1 if fail else 0)
+
+
+if __name__ == "__main__":
+    main()

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to