================
@@ -1,19 +1,86 @@
 #!/usr/bin/env python3
 
-from sphinx.application import Sphinx
+"""Script to link #GH123 to our github issue tracker.
+
+We use MyST and reST, so this Sphinx plugin operates on the shared doctree
+representation of the documentation, which is effectively a documentation-AST.
+
+Docutils doctree node reference:
+https://docutils.sourceforge.io/docs/ref/doctree.html
+"""
+
 import re
+from docutils import nodes
+from sphinx.application import Sphinx
 
 __version__ = "1.0"
 
+GH_LINK_RE = re.compile("#GH([0-9]+)")
+GH_LINK_URL = "https://github.com/llvm/llvm-project/issues/{}";
+SKIP_NODES: tuple[type[nodes.Node], ...] = (
+    nodes.FixedTextElement,
+    nodes.literal,
+    nodes.raw,
+    nodes.reference,
+)
+
+
+def make_gh_link(issue: str) -> nodes.reference:
+    """Create the docutils node that writers render as an external link."""
+    return nodes.reference("", "#" + issue, refuri=GH_LINK_URL.format(issue))
+
+
+def replace_gh_links(node: nodes.Text) -> None:
+    """Replace one text node with text fragments and GitHub issue links.
+
+    docutils text nodes cannot contain child nodes, so a single string like
+    "See #GH123." has to become a sibling list: Text("See "), reference(...),
+    Text("."). The parent node owns that sibling list.
+    """
+    remaining = str(node)
+    replacements = []
+
+    while GH_LINK_RE.search(remaining):
+        before, issue, remaining = GH_LINK_RE.split(remaining, maxsplit=1)
+        if before:
+            replacements.append(nodes.Text(before))
+        replacements.append(make_gh_link(issue))
+
+    # If we found matches, do the replacement.
+    if replacements:
+        if remaining:
+            replacements.append(nodes.Text(remaining))
+        node.parent.replace(node, replacements)
+
+
+def replace_gh_links_in_subtree(node: nodes.Node) -> None:
+    """Rewrite linkable text nodes under node, pruning ignored subtrees.
+
+    The doctree is the markup-language neutral documentation AST, so it
+    handles both reST and MyST (markdown). It helps us avoid rewriting #GH123
+    in code and literal blocks.
+    """
+    # DFS cutoff for code blocks etc.
+    if isinstance(node, SKIP_NODES):
+        return
+
+    # Find #GH123 links in text blocks and linkify them.
+    if isinstance(node, nodes.Text):
+        if GH_LINK_RE.search(str(node)):
+            replace_gh_links(node)
+        return
+
+    # Recursive DFS traversal of children.
+    if isinstance(node, nodes.Element):
+        for child in list(node.children):
+            replace_gh_links_in_subtree(child)
+
 
-def subst_gh_links(app: Sphinx, docname, source):
-    regex = re.compile("#GH([0-9]+)")
-    out_pattern = r"`#\1 <https://github.com/llvm/llvm-project/issues/\1>`_"
-    result = source[0]
-    result = regex.sub(out_pattern, result)
-    source[0] = result
+def subst_gh_links(_app: Sphinx, doctree: nodes.document) -> None:
----------------
nigham wrote:

Would be nice to add tests, even basic ones that LLM can add.

Also: how are we testing this works, is it via building the docs and verifying 
links are substituted correctly?

https://github.com/llvm/llvm-project/pull/199076
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to