C. Scott Ananian has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/393908 )
Change subject: Properly handle short headings ...................................................................... Properly handle short headings This is a minor issue, but it causes some test case failures which would otherwise get blamed on I12b2a148f7170d20bd9aacd3b5b8ee1965859592. Bug: T21910 Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab --- M lib/wt2html/pegTokenizer.pegjs M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 64 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/08/393908/1 diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index 3dfa339..3f7dfcd 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -489,14 +489,25 @@ r:( s:$'='+ // moved in here to make s accessible to inner action & { return stops.inc('h'); } - c:nested_block_line - e:$'='+ + ce:( + nested_block_line + $'='+ + )? endTPos:("" { return endOffset(); }) spc:(spaces / comment)* + & { stops.dec('h'); return ce || s.length > 2; } &eolf { - stops.dec('h'); - var level = Math.min(s.length, e.length); + var c = ce ? ce[0] : ''; + var e = ce ? ce[1] : ''; + var level; + if (!ce) { + // split up heading + level = (s.length - 1) >>> 1; + c = '='.repeat(s.length - 2*level); + s = e = '='.repeat(level); + } + level = Math.min(s.length, e.length); level = Math.min(6, level); // convert surplus equals into text if (s.length > level) { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 9b7e84c..e01d540 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -146,7 +146,6 @@ add("wt2html", "TOC with wgMaxTocLevel=3 (T8204)", "<h2 id=\"title_1\" data-parsoid='{\"dsr\":[0,13,2,2]}'> title 1 </h2>\n<h3 id=\"title_1.1\" data-parsoid='{\"dsr\":[14,31,3,3]}'> title 1.1 </h3>\n<h4 id=\"title_1.1.1\" data-parsoid='{\"dsr\":[32,53,4,4]}'> title 1.1.1 </h4>\n<h3 id=\"title_1.2\" data-parsoid='{\"dsr\":[54,71,3,3]}'> title 1.2 </h3>\n<h2 id=\"title_2\" data-parsoid='{\"dsr\":[72,85,2,2]}'> title 2 </h2>\n<h3 id=\"title_2.1\" data-parsoid='{\"dsr\":[86,103,3,3]}'> title 2.1 </h3>"); add("wt2html", "TOC with wgMaxTocLevel=3 and two level four headings (T8204)", "<h2 id=\"Section_1\" data-parsoid='{\"dsr\":[0,13,2,2]}'>Section 1</h2>\n<h3 id=\"Section_1.1\" data-parsoid='{\"dsr\":[14,31,3,3]}'>Section 1.1</h3>\n<h4 id=\"Section_1.1.1\" data-parsoid='{\"dsr\":[32,53,4,4]}'>Section 1.1.1</h4>\n<h4 id=\"Section_1.1.1.1\" data-parsoid='{\"dsr\":[54,77,4,4]}'>Section 1.1.1.1</h4>\n<h2 id=\"Section_2\" data-parsoid='{\"dsr\":[78,91,2,2]}'>Section 2</h2>"); add("wt2html", "TOC regression (T14077)", "<meta property=\"mw:PageProp/toc\" data-parsoid='{\"src\":\"__TOC__\",\"magicSrc\":\"__TOC__\",\"dsr\":[0,7,null,null]}'/>\n<h2 id=\"title_1\" data-parsoid='{\"dsr\":[8,21,2,2]}'> title 1 </h2>\n<h3 id=\"title_1.1\" data-parsoid='{\"dsr\":[22,39,3,3]}'> title 1.1 </h3>\n<h2 id=\"title_2\" data-parsoid='{\"dsr\":[40,53,2,2]}'> title 2 </h2>"); -add("wt2html", "Short headings with trailing space should match behavior of Parser::doHeadings (T21910)", "<p data-parsoid='{\"dsr\":[0,100,0,0]}'>=== \nThe line above must have a trailing space!\n=== <!--\n--> <!-- -->\nBut just in case it doesn't...</p>"); add("wt2html", "Header with special characters (T27462)", "<p data-parsoid='{\"dsr\":[0,72,0,0]}'>The tooltips shall not show entities to the user (ie. be double escaped)</p>\n\n<h2 id=\"text_.3E_text\" data-parsoid='{\"dsr\":[74,91,2,2]}'> text > text </h2>\n<p data-parsoid='{\"dsr\":[92,101,0,0]}'>section 1</p>\n\n<h2 id=\"text_.3C_text\" data-parsoid='{\"dsr\":[103,120,2,2]}'> text < text </h2>\n<p data-parsoid='{\"dsr\":[121,130,0,0]}'>section 2</p>\n\n<h2 id=\"text_.26_text\" data-parsoid='{\"dsr\":[132,149,2,2]}'> text & text </h2>\n<p data-parsoid='{\"dsr\":[150,159,0,0]}'>section 3</p>\n\n<h2 id=\"text_.27_text\" data-parsoid='{\"dsr\":[161,178,2,2]}'> text ' text </h2>\n<p data-parsoid='{\"dsr\":[179,188,0,0]}'>section 4</p>\n\n<h2 id=\"text_.22_text\" data-parsoid='{\"dsr\":[190,207,2,2]}'> text \" text </h2>\n<p data-parsoid='{\"dsr\":[208,217,0,0]}'>section 5</p>"); add("wt2html", "Header with space, plus and underscore as entity", "<p data-parsoid='{\"dsr\":[0,34,0,0]}'>Id should not contain + for spaces</p>\n\n<h2 id=\"Space_between_Text\" data-parsoid='{\"dsr\":[36,60,2,2]}'> Space between Text </h2>\n<p data-parsoid='{\"dsr\":[61,70,0,0]}'>section 1</p>\n\n<h2 id=\"Space-Entity_between_Text\" data-parsoid='{\"dsr\":[72,111,2,2]}'> Space-Entity<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&#32;\",\"srcContent\":\" \",\"dsr\":[87,92,null,null]}'> </span>between<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&#32;\",\"srcContent\":\" \",\"dsr\":[99,104,null,null]}'> </span>Text </h2>\n<p data-parsoid='{\"dsr\":[112,121,0,0]}'>section 2</p>\n\n<h2 id=\"Plus.2Bbetween.2BText\" data-parsoid='{\"dsr\":[123,146,2,2]}'> Plus+between+Text </h2>\n<p data-parsoid='{\"dsr\":[147,156,0,0]}'>section 3</p>\n\n<h2 id=\"Plus-Entity.2Bbetween.2BText\" data-parsoid='{\"dsr\":[158,196,2,2]}'> Plus-Entity<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&#43;\",\"srcContent\":\"+\",\"dsr\":[172,177,null,null]}'>+</span>between<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&#43;\",\"srcContent\":\"+\",\"dsr\":[184,189,null,null]}'>+</span>Text </h2>\n<p data-parsoid='{\"dsr\":[197,206,0,0]}'>section 4</p>\n\n<h2 id=\"Underscore_between_Text\" data-parsoid='{\"dsr\":[208,237,2,2]}'> Underscore_between_Text </h2>\n<p data-parsoid='{\"dsr\":[238,247,0,0]}'>section 5</p>\n\n<h2 id=\"Underscore-Entity_between_Text\" data-parsoid='{\"dsr\":[249,293,2,2]}'> Underscore-Entity<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&#95;\",\"srcContent\":\"_\",\"dsr\":[269,274,null,null]}'>_</span>between<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&#95;\",\"srcContent\":\"_\",\"dsr\":[281,286,null,null]}'>_</span>Text </h2>\n<p data-parsoid='{\"dsr\":[294,303,0,0]}'>section 6</p>\n\n<p data-parsoid='{\"dsr\":[305,501,0,0]}'><a rel=\"mw:WikiLink\" href=\"./Main_Page#Space_between_Text\" data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Space_between_Text\"},\"sa\":{\"href\":\"#Space between Text\"},\"dsr\":[305,328,2,2]}'>#Space between Text</a>\n<a rel=\"mw:WikiLink\" href=\"./Main_Page#Space-Entity_between_Text\" data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Space-Entity_between_Text\"},\"sa\":{\"href\":\"#Space-Entity&#32;between&#32;Text\"},\"dsr\":[329,367,2,2]}'>#Space-Entity between Text</a>\n<a rel=\"mw:WikiLink\" href=\"./Main_Page#Plus.2Bbetween.2BText\" data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Plus.2Bbetween.2BText\"},\"sa\":{\"href\":\"#Plus+between+Text\"},\"dsr\":[368,390,2,2]}'>#Plus+between+Text</a>\n<a rel=\"mw:WikiLink\" href=\"./Main_Page#Plus-Entity.2Bbetween.2BText\" data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Plus-Entity.2Bbetween.2BText\"},\"sa\":{\"href\":\"#Plus-Entity&#43;between&#43;Text\"},\"dsr\":[391,428,2,2]}'>#Plus-Entity+between+Text</a>\n<a rel=\"mw:WikiLink\" href=\"./Main_Page#Underscore_between_Text\" data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Underscore_between_Text\"},\"sa\":{\"href\":\"#Underscore_between_Text\"},\"dsr\":[429,457,2,2]}'>#Underscore_between_Text</a>\n<a rel=\"mw:WikiLink\" href=\"./Main_Page#Underscore-Entity_between_Text\" data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page#Underscore-Entity_between_Text\"},\"sa\":{\"href\":\"#Underscore-Entity&#95;between&#95;Text\"},\"dsr\":[458,501,2,2]}'>#Underscore-Entity_between_Text</a></p>"); add("wt2html", "Headers with excess '=' characters\n(Are similar tests necessary beyond the 1st level?)", "<h1 id=\"foo.3D\" data-parsoid='{\"dsr\":[0,6,1,1]}'>foo=</h1>\n<h1 id=\".3Dfoo\" data-parsoid='{\"dsr\":[7,13,1,1]}'>=foo</h1>\n<h1 id=\"italic_heading.3D\" data-parsoid='{\"dsr\":[14,35,1,1]}'><i data-parsoid='{\"dsr\":[15,25,2,2]}'>italic</i> heading=</h1>\n<h1 id=\".3Ditalic_heading\" data-parsoid='{\"dsr\":[36,57,1,1]}'>=<i data-parsoid='{\"dsr\":[38,48,2,2]}'>italic</i> heading</h1>"); @@ -862,7 +861,6 @@ add("html2wt", "__NOEDITSECTION__ keyword", "== Section 1 ==\n\n== Section 2 ==\n"); add("html2wt", "Link inside a section heading", "== Section with a [[wiki/Main Page|link]] in it ==\n"); add("html2wt", "TOC regression (T14077)", "<div id=\"toc\" class=\"toc\"><div class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#title_1|<span class=\"tocnumber\">1</span> <span class=\"toctext\">title 1</span>]]\n\n** [[#title_1.1|<span class=\"tocnumber\">1.1</span> <span class=\"toctext\">title 1.1</span>]]\n* [[#title_2|<span class=\"tocnumber\">2</span> <span class=\"toctext\">title 2</span>]]\n\n</div>\n\n== title 1 ==\n\n=== title 1.1 ===\n\n== title 2 ==\n"); -add("html2wt", "Short headings with trailing space should match behavior of Parser::doHeadings (T21910)", "= = =\nThe line above must have a trailing space!\n\n= = =\nBut just in case it doesn't...\n"); add("html2wt", "Header with special characters (T27462)", "The tooltips shall not show entities to the user (ie. be double escaped)\n\n<div id=\"toc\" class=\"toc\"><div class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#text_.3E_text|<span class=\"tocnumber\">1</span> <span class=\"toctext\">text > text</span>]]\n* [[#text_.3C_text|<span class=\"tocnumber\">2</span> <span class=\"toctext\">text < text</span>]]\n* [[#text_.26_text|<span class=\"tocnumber\">3</span> <span class=\"toctext\">text & text</span>]]\n* [[#text_.27_text|<span class=\"tocnumber\">4</span> <span class=\"toctext\">text ' text</span>]]\n* [[#text_.22_text|<span class=\"tocnumber\">5</span> <span class=\"toctext\">text \" text</span>]]\n\n</div>\n\n== text > text ==\nsection 1\n\n== text < text ==\nsection 2\n\n== text & text ==\nsection 3\n\n== text ' text ==\nsection 4\n\n== text \" text ==\nsection 5\n"); add("html2wt", "Header with space, plus and underscore as entity", "Id should not contain + for spaces\n\n<div id=\"toc\" class=\"toc\"><div class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#Space_between_Text|<span class=\"tocnumber\">1</span> <span class=\"toctext\">Space between Text</span>]]\n* [[#Space-Entity_between_Text|<span class=\"tocnumber\">2</span> <span class=\"toctext\">Space-Entity between Text</span>]]\n* [[#Plus.2Bbetween.2BText|<span class=\"tocnumber\">3</span> <span class=\"toctext\">Plus+between+Text</span>]]\n* [[#Plus-Entity.2Bbetween.2BText|<span class=\"tocnumber\">4</span> <span class=\"toctext\">Plus-Entity+between+Text</span>]]\n* [[#Underscore_between_Text|<span class=\"tocnumber\">5</span> <span class=\"toctext\">Underscore_between_Text</span>]]\n* [[#Underscore-Entity_between_Text|<span class=\"tocnumber\">6</span> <span class=\"toctext\">Underscore-Entity_between_Text</span>]]\n\n</div>\n\n== Space between Text ==\nsection 1\n\n== Space-Entity between Text ==\nsection 2\n\n== Plus+between+Text ==\nsection 3\n\n== Plus-Entity+between+Text ==\nsection 4\n\n== Underscore_between_Text ==\nsection 5\n\n== Underscore-Entity_between_Text ==\nsection 6\n\n[[#Space_between_Text|#Space between Text]]\n[[#Space-Entity_between_Text|#Space-Entity between Text]]\n[[#Plus.2Bbetween.2BText|#Plus+between+Text]]\n[[#Plus-Entity.2Bbetween.2BText|#Plus-Entity+between+Text]]\n[[#Underscore_between_Text|#Underscore_between_Text]]\n[[#Underscore-Entity_between_Text|#Underscore-Entity_between_Text]]\n"); add("html2wt", "Headers with excess '=' characters\n(Are similar tests necessary beyond the 1st level?)", "<div id=\"toc\" class=\"toc\"><div class=\"toctitle\">\n== Contents ==\n</div>\n\n* [[#foo.3D|<span class=\"tocnumber\">1</span> <span class=\"toctext\">foo=</span>]]\n* [[#.3Dfoo|<span class=\"tocnumber\">2</span> <span class=\"toctext\">=foo</span>]]\n* [[#italic_heading.3D|<span class=\"tocnumber\">3</span> <span class=\"toctext\">''italic'' heading=</span>]]\n* [[#.3Ditalic_heading|<span class=\"tocnumber\">4</span> <span class=\"toctext\">=''italic'' heading</span>]]\n\n</div>\n\n= foo= =\n\n= =foo =\n\n= ''italic'' heading= =\n\n= =''italic'' heading =\n"); diff --git a/tests/parserTests.txt b/tests/parserTests.txt index b1faa82..3d6e6a9 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -16866,21 +16866,30 @@ <p><a rel="mw:ExtLink" href="http://example.com">http://example.com</a> <figure-inline class="mw-default-size" typeof="mw:Image"><a href="./File:Foobar.jpg"><img resource="./File:Foobar.jpg" src="//example.com/images/3/3a/Foobar.jpg" data-file-width="1941" data-file-height="220" data-file-type="bitmap" height="220" width="1941"/></a></figure-inline></p> !!end +# Parsoid doesn't wt2wt this cleanly because it adds <nowiki>s. !! test Short headings with trailing space should match behavior of Parser::doHeadings (T21910) +!! options +parsoid=wt2html,html2html !! wikitext === The line above must have a trailing space! === <!-- --> <!-- --> But just in case it doesn't... -!! html +!! html/php <h1><span class="mw-headline" id=".3D">=</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h1> <p>The line above must have a trailing space! </p> <h1><span class="mw-headline" id=".3D_2">=</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h1> <p>But just in case it doesn't... </p> +!! html/parsoid +<h1 id=".3D">=</h1> +<p>The line above must have a trailing space!</p> +<h1 id=".3D_2">=</h1> <!-- +--> <!-- --> +<p>But just in case it doesn't...</p> !! end !! test @@ -24845,17 +24854,53 @@ !! options parsoid=html2wt !! html/parsoid -<p>=== -=foo= x +<p>=foo= x =foo= <s></s> </p> !! wikitext -=== =foo= x =foo= <s></s> +!! html/php +<p>=foo= x +=foo= <s></s> +</p> !!end !! test +Headings: 4c. Short headings (1) +!! options +parsoid=html2wt +!! html/parsoid +<p>=== +</p> +!! wikitext +<nowiki>===</nowiki> +!! html/php +<p>=== +</p> +!! end + +# in the html2wt direction we emit '= = =' or '=<nowiki>=</nowiki>=' +!! test +Headings: 4d. Short headings (2) +!! options +parsoid=wt2html,html2html +!! wikitext +=== +==== +===== +!! html/php +<h1><span class="mw-headline" id=".3D">=</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h1> +<h1><span class="mw-headline" id=".3D.3D">==</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: ==">edit</a><span class="mw-editsection-bracket">]</span></span></h1> +<h2><span class="mw-headline" id=".3D_2">=</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&action=edit&section=3" title="Edit section: =">edit</a><span class="mw-editsection-bracket">]</span></span></h2> + +!! html/parsoid +<h1 id=".3D">=</h1> +<h1 id=".3D.3D">==</h1> +<h2 id=".3D_2">=</h2> +!! end + +!! test Headings: 5. Empty headings !! options parsoid=html2wt -- To view, visit https://gerrit.wikimedia.org/r/393908 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I11926f2d2365755794d8f8f6647b1f0b02b827ab Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits