[allura] 12/12: [#8410] markdown regex improvement

kentontaylor Fri, 11 Feb 2022 09:36:56 -0800

This is an automated email from the ASF dual-hosted git repository.

kentontaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git


commit 406871119348e7b05d1190b58120178106431a79
Author: Dave Brondsema <[email protected]>
AuthorDate: Wed Feb 9 13:50:20 2022 -0500

    [#8410] markdown regex improvement
---
 Allura/allura/lib/markdown_extensions.py |  6 +++-
 Allura/allura/tests/test_globals.py      | 51 ++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/Allura/allura/lib/markdown_extensions.py 
b/Allura/allura/lib/markdown_extensions.py
index 27303c0..e3844f8 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -50,7 +50,8 @@ MACRO_PATTERN = r'\[\[([^\]\[]+)\]\]'
 SHORT_REF_RE = markdown.inlinepatterns.NOIMG + r'\[([^\]]+)\]'
 
 # FORGE_LINK_RE copied from markdown pre 3.0's LINK_RE
-NOBRACKET = r'[^\]\[]*'
+# TODO: replace these with newer approach, see ForgeLinkPattern
+NOBRACKET = r'[^\]\[]{0,50}'  # "*" changed to {0,50} for performance 
mitigation
 BRK = (
     r'\[(' +
     (NOBRACKET + r'(\[')*6 +
@@ -344,6 +345,9 @@ class 
UserMentionInlinePattern(markdown.inlinepatterns.Pattern):
 
 
 class ForgeLinkPattern(markdown.inlinepatterns.Pattern):
+    # TODO: convert from extending Pattern to extending InlineProcessor
+    #  which is how core Markdown library in 3.0 made its base link parsing 
much faster.
+    # 
https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea
 
     artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)')
 
diff --git a/Allura/allura/tests/test_globals.py 
b/Allura/allura/tests/test_globals.py
index fa9572f..3974764 100644
--- a/Allura/allura/tests/test_globals.py
+++ b/Allura/allura/tests/test_globals.py
@@ -608,6 +608,57 @@ def test_markdown_invalid_script_in_link2():
                  'rel="nofollow">xss</a></p></div>', r)
 
 
+def test_markdown_extremely_slow():
+    r = g.markdown.convert('''bonjour, voila ce que j'obtient en voulant 
ajouter un utilisateur a un groupe de sécurite, que ce soit sur un groupe 
pre-existant, ou sur un groupe crée.
+message d'erreur:
+
+ERROR: Could not complete the Add UserLogin To SecurityGroup 
[file:/C:/neogia/ofbizNeogia/applications/securityext/script/org/ofbiz/securityext/securitygroup/SecurityGroupServices.xml#addUserLoginToSecurityGroup]
 process [problem creating the newEntity value: Exception while inserting the 
following entity: 
[GenericEntity:UserLoginSecurityGroup][createdStamp,2006-01-23 
17:42:39.312(java.sql.Timestamp)][createdTxStamp,2006-01-23 
17:42:38.875(java.sql.Timestamp)][fromDate,2006-01-23 17:42:3 [...]
+
+à priori les données du formulaire ne sont pas traitées : VALUES (?, ?, ?, ?, 
?, ?, ?, ?) ce qui entraine l'echec du traitement SQL.
+
+
+Si une idée vous vient à l'esprit, merci de me tenir au courant.
+
+cordialement, julien.''')
+    assert True   # finished!
+
+
[email protected]_tool('test', 'Wiki', 'wiki-len')
+def test_markdown_link_length_limits():
+    with h.push_context('test', 'wiki-len', neighborhood='Projects'):
+        # these are always ok, no matter the NOBRACKET length
+        WM.Page.upsert(title='12345678901').commit()
+        text = g.markdown.convert('See [12345678901]')
+        assert 'href="/p/test/wiki-len/12345678901/">[12345678901]</a>' in 
text, text
+        WM.Page.upsert(title='this is 26 characters long').commit()
+        text = g.markdown.convert('See [this is 26 characters long]')
+        assert 
'href="/p/test/wiki-len/this%20is%2026%20characters%20long/">[this is 26 
characters long]</a>' in text, text
+
+        # NOBRACKET regex length impacts standard markdown links
+        text = g.markdown.convert('See [short](http://a.de)')
+        assert 'href="http://a.de"; rel="nofollow">short</a>' in text, text
+        text = g.markdown.convert('See [this is 26 characters 
long](http://a.de)')
+        assert 'href="http://a.de"; rel="nofollow">this is 26 characters 
long</a>' in text, text  # {0,12} fails {0,13} ok
+
+        # NOBRACKET regex length impacts our custom artifact links
+        text = g.markdown.convert('See [short](Home)')
+        assert 'href="/p/test/wiki-len/Home/">short</a>' in text, text
+        text = g.markdown.convert('See [123456789](Home)')
+        assert 'href="/p/test/wiki-len/Home/">123456789</a>' in text, text
+        text = g.markdown.convert('See [12345678901](Home)')
+        assert 'href="/p/test/wiki-len/Home/">12345678901</a>' in text, text  
# {0,5} fails, {0,6} ok
+        text = g.markdown.convert('See [this is 16 chars](Home)')
+        assert 'href="/p/test/wiki-len/Home/">this is 16 chars</a>' in text, 
text  # {0,7} fails {0,8} ok
+        text = g.markdown.convert('See [this is 26 characters long](Home)')
+        assert 'href="/p/test/wiki-len/Home/">this is 26 characters long</a>' 
in text, text  # {0,12} fails {0,13} ok
+
+        # breaking point, currently.  Would be nice if this worked and made a 
real link:
+        char110long = '1234567890'*11
+        text = g.markdown.convert(f'See [{char110long}](Home)')
+        assert f'<span>[{char110long}]</span>(Home)' in text, text  # current 
limitation, not a link
+        # assert f'href="/p/test/wiki-len/Home/">{char110long}</a>' in text, 
text  # ideal output
+
+
 @td.with_wiki
 def test_macro_include():
     r = g.markdown.convert('[[include ref=Home id=foo]]')

[allura] 12/12: [#8410] markdown regex improvement

Reply via email to