https://github.com/python/cpython/commit/1a7824a927f0706300af7bfc182884a43e2f587a commit: 1a7824a927f0706300af7bfc182884a43e2f587a branch: main author: Peter Bierma <[email protected]> committer: ZeroIntensity <[email protected]> date: 2025-12-04T03:14:25Z summary:
gh-141004: Add a CI job ensuring that new C APIs include documentation (GH-142102) Co-authored-by: Hugo van Kemenade <[email protected]> files: A Tools/check-c-api-docs/ignored_c_api.txt A Tools/check-c-api-docs/main.py M .github/CODEOWNERS M .github/workflows/build.yml M Makefile.pre.in diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1086b42620479d..6acc156ebff713 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -126,6 +126,9 @@ Doc/howto/clinic.rst @erlend-aasland @AA-Turner # C Analyser Tools/c-analyzer/ @ericsnowcurrently +# C API Documentation Checks +Tools/check-c-api-docs/ @ZeroIntensity + # Fuzzing Modules/_xxtestfuzz/ @ammaraskar diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8e15400e4978eb..3d889fa128e261 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,6 +142,9 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals + - name: Check for undocumented C APIs + run: make check-c-api-docs + build-windows: name: >- diff --git a/Makefile.pre.in b/Makefile.pre.in index 086adbdf262c48..f3086ec1462b6b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -3322,6 +3322,11 @@ check-c-globals: --format summary \ --traceback +# Check for undocumented C APIs. +.PHONY: check-c-api-docs +check-c-api-docs: + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py + # Find files with funny names .PHONY: funny funny: diff --git a/Tools/check-c-api-docs/ignored_c_api.txt b/Tools/check-c-api-docs/ignored_c_api.txt new file mode 100644 index 00000000000000..e81ffd51e193b2 --- /dev/null +++ b/Tools/check-c-api-docs/ignored_c_api.txt @@ -0,0 +1,93 @@ +# pydtrace_probes.h +PyDTrace_AUDIT +PyDTrace_FUNCTION_ENTRY +PyDTrace_FUNCTION_RETURN +PyDTrace_GC_DONE +PyDTrace_GC_START +PyDTrace_IMPORT_FIND_LOAD_DONE +PyDTrace_IMPORT_FIND_LOAD_START +PyDTrace_INSTANCE_DELETE_DONE +PyDTrace_INSTANCE_DELETE_START +PyDTrace_INSTANCE_NEW_DONE +PyDTrace_INSTANCE_NEW_START +PyDTrace_LINE +# fileobject.h +Py_FileSystemDefaultEncodeErrors +Py_FileSystemDefaultEncoding +Py_HasFileSystemDefaultEncoding +Py_UTF8Mode +# pyhash.h +Py_HASH_EXTERNAL +# exports.h +PyAPI_DATA +Py_EXPORTED_SYMBOL +Py_IMPORTED_SYMBOL +Py_LOCAL_SYMBOL +# modsupport.h +PyABIInfo_FREETHREADING_AGNOSTIC +# moduleobject.h +PyModuleDef_Type +# object.h +Py_INVALID_SIZE +Py_TPFLAGS_HAVE_VERSION_TAG +Py_TPFLAGS_INLINE_VALUES +Py_TPFLAGS_IS_ABSTRACT +# pyexpat.h +PyExpat_CAPI_MAGIC +PyExpat_CAPSULE_NAME +# pyport.h +Py_ALIGNED +Py_ARITHMETIC_RIGHT_SHIFT +Py_CAN_START_THREADS +Py_FORCE_EXPANSION +Py_GCC_ATTRIBUTE +Py_LL +Py_SAFE_DOWNCAST +Py_ULL +Py_VA_COPY +# unicodeobject.h +Py_UNICODE_SIZE +# cpython/methodobject.h +PyCFunction_GET_CLASS +# cpython/compile.h +PyCF_ALLOW_INCOMPLETE_INPUT +PyCF_COMPILE_MASK +PyCF_DONT_IMPLY_DEDENT +PyCF_IGNORE_COOKIE +PyCF_MASK +PyCF_MASK_OBSOLETE +PyCF_SOURCE_IS_UTF8 +# cpython/descrobject.h +PyDescr_COMMON +PyDescr_NAME +PyDescr_TYPE +PyWrapperFlag_KEYWORDS +# cpython/fileobject.h +PyFile_NewStdPrinter +PyStdPrinter_Type +Py_UniversalNewlineFgets +# cpython/setobject.h +PySet_MINSIZE +# cpython/ceval.h +PyUnstable_CopyPerfMapFile +PyUnstable_PerfTrampoline_CompileCode +PyUnstable_PerfTrampoline_SetPersistAfterFork +# cpython/genobject.h +PyAsyncGenASend_CheckExact +# cpython/longintrepr.h +PyLong_BASE +PyLong_MASK +PyLong_SHIFT +# cpython/pyerrors.h +PyException_HEAD +# cpython/pyframe.h +PyUnstable_EXECUTABLE_KINDS +PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION +PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR +PyUnstable_EXECUTABLE_KIND_PY_FUNCTION +PyUnstable_EXECUTABLE_KIND_SKIP +# cpython/pylifecycle.h +Py_FrozenMain +# cpython/unicodeobject.h +PyUnicode_IS_COMPACT +PyUnicode_IS_COMPACT_ASCII diff --git a/Tools/check-c-api-docs/main.py b/Tools/check-c-api-docs/main.py new file mode 100644 index 00000000000000..6bdf80a9ae8985 --- /dev/null +++ b/Tools/check-c-api-docs/main.py @@ -0,0 +1,193 @@ +import re +from pathlib import Path +import sys +import _colorize +import textwrap + +SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(") +SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ") +SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)") +SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)") + +CPYTHON = Path(__file__).parent.parent.parent +INCLUDE = CPYTHON / "Include" +C_API_DOCS = CPYTHON / "Doc" / "c-api" +IGNORED = ( + (CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt") + .read_text() + .split("\n") +) + +for index, line in enumerate(IGNORED): + if line.startswith("#"): + IGNORED.pop(index) + +MISTAKE = """ +If this is a mistake and this script should not be failing, create an +issue and tag Peter (@ZeroIntensity) on it.\ +""" + + +def found_undocumented(singular: bool) -> str: + some = "an" if singular else "some" + s = "" if singular else "s" + these = "this" if singular else "these" + them = "it" if singular else "them" + were = "was" if singular else "were" + + return ( + textwrap.dedent( + f""" + Found {some} undocumented C API{s}! + + Python requires documentation on all public C API symbols, macros, and types. + If {these} API{s} {were} not meant to be public, prefix {them} with a + leading underscore (_PySomething_API) or move {them} to the internal C API + (pycore_*.h files). + + In exceptional cases, certain APIs can be ignored by adding them to + Tools/check-c-api-docs/ignored_c_api.txt + """ + ) + + MISTAKE + ) + + +def found_ignored_documented(singular: bool) -> str: + some = "a" if singular else "some" + s = "" if singular else "s" + them = "it" if singular else "them" + were = "was" if singular else "were" + they = "it" if singular else "they" + + return ( + textwrap.dedent( + f""" + Found {some} C API{s} listed in Tools/c-api-docs-check/ignored_c_api.txt, but + {they} {were} found in the documentation. To fix this, remove {them} from + ignored_c_api.txt. + """ + ) + + MISTAKE + ) + + +def is_documented(name: str) -> bool: + """ + Is a name present in the C API documentation? + """ + for path in C_API_DOCS.iterdir(): + if path.is_dir(): + continue + if path.suffix != ".rst": + continue + + text = path.read_text(encoding="utf-8") + if name in text: + return True + + return False + + +def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: + """ + Scan a header file for C API functions. + """ + undocumented: list[str] = [] + documented_ignored: list[str] = [] + colors = _colorize.get_colors() + + def check_for_name(name: str) -> None: + documented = is_documented(name) + if documented and (name in IGNORED): + documented_ignored.append(name) + elif not documented and (name not in IGNORED): + undocumented.append(name) + + for function in SIMPLE_FUNCTION_REGEX.finditer(text): + name = function.group(2) + if not name.startswith("Py"): + continue + + check_for_name(name) + + for macro in SIMPLE_MACRO_REGEX.finditer(text): + name = macro.group(1) + if not name.startswith("Py"): + continue + + if "(" in name: + name = name[: name.index("(")] + + check_for_name(name) + + for inline in SIMPLE_INLINE_REGEX.finditer(text): + name = inline.group(2) + if not name.startswith("Py"): + continue + + check_for_name(name) + + for data in SIMPLE_DATA_REGEX.finditer(text): + name = data.group(1) + if not name.startswith("Py"): + continue + + check_for_name(name) + + # Remove duplicates and sort alphabetically to keep the output deterministic + undocumented = list(set(undocumented)) + undocumented.sort() + + if undocumented or documented_ignored: + print(f"{filename} {colors.RED}BAD{colors.RESET}") + for name in undocumented: + print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}") + for name in documented_ignored: + print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} {name}") + else: + print(f"{filename} {colors.GREEN}OK{colors.RESET}") + + return undocumented, documented_ignored + + +def main() -> None: + print("Scanning for undocumented C API functions...") + files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()] + all_missing: list[str] = [] + all_found_ignored: list[str] = [] + + for file in files: + if file.is_dir(): + continue + assert file.exists() + text = file.read_text(encoding="utf-8") + missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text) + all_found_ignored += ignored + all_missing += missing + + fail = False + to_check = [ + (all_missing, "missing", found_undocumented(len(all_missing) == 1)), + ( + all_found_ignored, + "documented but ignored", + found_ignored_documented(len(all_found_ignored) == 1), + ), + ] + for name_list, what, message in to_check: + if not name_list: + continue + + s = "s" if len(name_list) != 1 else "" + print(f"-- {len(name_list)} {what} C API{s} --") + for name in name_list: + print(f" - {name}") + print(message) + fail = True + + sys.exit(1 if fail else 0) + + +if __name__ == "__main__": + main() _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
