d18c3d0acab0e7469c3284c897afcb61f9dd1fea

brondsem Fri, 21 Nov 2025 11:04:43 -0800

This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8588
in repository https://gitbox.apache.org/repos/asf/allura.git


commit 596f00968fca6a05d3ce873a5596378544449ed9
Author: Dave Brondsema <[email protected]>
AuthorDate: Thu Nov 20 18:24:21 2025 -0500

    [#8588] convert all markdown.inlinepatterns.Pattern to 
markdown.inlinepatterns.InlineProcessor based on 
https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea
---
 Allura/allura/lib/markdown_extensions.py | 68 +++++++++++++++-----------------
 1 file changed, 31 insertions(+), 37 deletions(-)

diff --git a/Allura/allura/lib/markdown_extensions.py 
b/Allura/allura/lib/markdown_extensions.py
index 4579e1970..52eb10b10 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -47,7 +47,7 @@
 SHORT_REF_RE = markdown.inlinepatterns.NOIMG + r'\[([^\]]+)\]'
 
 # FORGE_LINK_RE copied from markdown pre 3.0's LINK_RE
-# TODO: replace these with newer approach, see ForgeLinkPattern
+# TODO: further simplify now that InlineProcessor is used, see 
ForgeLinkPattern? or not needed since possessive quantifiers are used
 NOBRACKET = r'[^\]\[]*+'
 BRK = (
     r'\[(' +
@@ -56,7 +56,7 @@
     NOBRACKET + r')\]'
 )
 FORGE_LINK_RE = markdown.inlinepatterns.NOIMG + BRK + \
-    r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+    r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\11\s*)?\)'''
 
 
 def clear_markdown_registry(reg: markdown.util.Registry, keep: list[str] = []):
@@ -286,7 +286,7 @@ def extendMarkdown(self, md):
         # this has to be before the 'escape' processor, otherwise weird
         # placeholders are inserted for escaped chars within urls, and then the
         # autolink can't match the whole url
-        
md.inlinePatterns.register(AutolinkPattern(r'(http(?:s?)://[a-zA-Z0-9./\-\\_%?&=+#;~:!@]+)',
 md),
+        
md.inlinePatterns.register(AutolinkPattern(r'(?:(?<=\s)|^)(http(?:s?)://[a-zA-Z0-9./\-\\_%?&=+#;~:!@]+)',
 md),
                                    'autolink_without_brackets',
                                    185)  # was '<escape' and 'escape' is 
priority 180; great num runs first, so: 185
         # replace the link pattern with our extended version
@@ -318,11 +318,11 @@ def extendMarkdown(self, md):
         md.inlinePatterns.register(EmojiInlinePattern(self.EMOJI_RE), 'emoji', 
0)
 
 
-class EmojiInlinePattern(markdown.inlinepatterns.Pattern):
+class EmojiInlinePattern(markdown.inlinepatterns.InlineProcessor):
 
-    def handleMatch(self, m):
-        emoji_code = m.group(2)
-        return emoji.emojize(emoji_code, language="alias")
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element 
| None, int | None, int | None]:
+        emoji_code = m.group(1)
+        return emoji.emojize(emoji_code, language="alias"), m.start(0), 
m.end(0)
 
 
 class UserMentionExtension(markdown.Extension):
@@ -334,10 +334,10 @@ def extendMarkdown(self, md):
         md.inlinePatterns.register(UserMentionInlinePattern(self.UM_RE), 
'user_mentions', 0)
 
 
-class UserMentionInlinePattern(markdown.inlinepatterns.Pattern):
+class UserMentionInlinePattern(markdown.inlinepatterns.InlineProcessor):
 
-    def handleMatch(self, m):
-        user_name = m.group(2).replace("@", "")
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element 
| None, int | None, int | None]:
+        user_name = m.group(1).replace("@", "")
         user = M.User.by_username(user_name)
         result = None
 
@@ -348,13 +348,14 @@ def handleMatch(self, m):
             result.set('class', 'user-mention')
         else:
             result = "@%s" % user_name
-        return result
+        return result, m.start(0), m.end(0)
 
 
-class ForgeLinkPattern(markdown.inlinepatterns.Pattern):
-    # TODO: convert from extending Pattern to extending InlineProcessor
-    #  which is how core Markdown library in 3.0 made its base link parsing 
much faster.
+class ForgeLinkPattern(markdown.inlinepatterns.InlineProcessor):
+    # TODO: consider further changes to align with InlineProcessor patterns
+    # but maybe isn't needed since NOBRACKET and BRK are fast now (see their 
comments)
     # 
https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea
+    # https://github.com/brondsem/allura/pull/1 maybe a starting point
 
     artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)')
 
@@ -362,26 +363,26 @@ def __init__(self, *args, **kwargs):
         self.ext = kwargs.pop('ext')
         super().__init__(*args, **kwargs)
 
-    def handleMatch(self, m):
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element 
| None, int | None, int | None]:
         el = etree.Element('a')
-        el.text = m.group(2)
+        el.text = m.group(1)
         is_link_with_brackets = False
         try:
-            href = m.group(9)
+            href = m.group(8)
         except IndexError:
-            href = m.group(2)
+            href = m.group(1)
             is_link_with_brackets = True
             if el.text == 'x' or el.text == ' ':  # skip [ ] and [x] for 
markdown checklist
-                return '[' + el.text + ']'
+                return None, None, None
         try:
-            title = m.group(13)
+            title = m.group(12)
         except IndexError:
             title = None
 
         classes = ''
         if href:
             if href == 'TOC':
-                return '[TOC]'  # skip TOC
+                return None, None, None
             if self.artifact_re.match(href):
                 href, classes = self._expand_alink(href, is_link_with_brackets)
             el.set('href', self.unescape(href.strip()))
@@ -397,7 +398,7 @@ def handleMatch(self, m):
             text = el.text
             el = etree.Element('span')
             el.text = '[%s]' % text
-        return el
+        return el, m.start(0), m.end(0)
 
     def _expand_alink(self, link, is_link_with_brackets):
         '''Return (href, classes) for an artifact link'''
@@ -426,17 +427,17 @@ def _expand_alink(self, link, is_link_with_brackets):
         return href, classes
 
 
-class ForgeMacroPattern(markdown.inlinepatterns.Pattern):
+class ForgeMacroPattern(markdown.inlinepatterns.InlineProcessor):
 
     def __init__(self, *args, **kwargs):
         self.ext = kwargs.pop('ext')
         self.macro = macro.parse(self.ext._macro_context)
         super().__init__(*args, **kwargs)
 
-    def handleMatch(self, m):
-        html = self.macro(m.group(2))
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element 
| None, int | None, int | None]:
+        html = self.macro(m.group(1))
         placeholder = self.md.htmlStash.store(html)
-        return placeholder
+        return placeholder, m.start(0), m.end(0)
 
 
 class ForgeLinkTreeProcessor(markdown.treeprocessors.Treeprocessor):
@@ -546,17 +547,10 @@ def run(self, text):
         return s.render(walker)
 
 
-class AutolinkPattern(markdown.inlinepatterns.Pattern):
-
-    def __init__(self, pattern, markdown_instance=None):
-        super().__init__(pattern, markdown_instance)
-        # override the complete regex, requiring the preceding text (.*?) to 
end
-        # with whitespace or beginning of line "\s|^"
-        self.compiled_re = re.compile(r"^(.*?\s|^)%s(.*?)$" % pattern,
-                                      re.DOTALL | re.UNICODE)
+class AutolinkPattern(markdown.inlinepatterns.InlineProcessor):
 
-    def handleMatch(self, mo):
-        old_link = mo.group(2)
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element 
| None, int | None, int | None]:
+        old_link = m.group(1)
         result = etree.Element('a')
         result.text = old_link
         # since this is run before the builtin 'escape' processor, we have to
@@ -564,7 +558,7 @@ def handleMatch(self, mo):
         for char in self.md.ESCAPED_CHARS:
             old_link = old_link.replace('\\' + char, char)
         result.set('href', old_link)
-        return result
+        return result, m.start(0), m.end(0)
 
 
 class ForgeMacroIncludePreprocessor(markdown.preprocessors.Preprocessor):

(allura) 04/04: [#8588] convert all markdown.inlinepatterns.Pattern to markdown.inlinepatterns.InlineProcessor based on https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea

Reply via email to