jenkins-bot has submitted this change and it was merged.

Change subject: T71950: Output <nowiki> closer to quotes
......................................................................


T71950: Output <nowiki> closer to quotes

Previously, if text contained non-quote escapable chars, it was
fully wrapped in <nowiki>s if necessary, making the tags appear
away from the actual reason for it. With this patch, the code
tries to put them closer to the causing quotes.

Also added some changes for the heuristics to calculate when
two <nowikis> can be merged:
* Ignore display hacks.
* Increase the amount of text allowed inside the same nowiki to 40.

Blacklist changed to reflect new results' outputs. A new test for
nowiki positioning after a ' :' (space followed by colon) was
blacklisted due to parserTests.js detecting some difference when
there shouldn't be.

Change-Id: I576aaf4bcad8931a6cc15d89fb4447125d5f539f
---
M lib/pegTokenizer.pegjs.txt
M lib/wts.escapeWikitext.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
4 files changed, 47 insertions(+), 11 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 5eb50ae..4edf061 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -1898,7 +1898,7 @@
               return [
                   new TagTk( 'span', [ new KV( 'typeof', 'mw:Placeholder' ) ], 
{ src: ' ', tsr: [peg$reportedPos, peg$reportedPos], isDisplayHack: true } ),
                   "\u00a0",
-                  new EndTagTk( 'span', [], { tsr: [peg$currPos, peg$currPos] 
} )
+                  new EndTagTk( 'span', [], { tsr: [peg$currPos, peg$currPos], 
isDisplayHack: true } )
               ];
           }
           / & ('__') bs:behavior_switch { return bs; }
diff --git a/lib/wts.escapeWikitext.js b/lib/wts.escapeWikitext.js
index 367007c..119fdf9 100644
--- a/lib/wts.escapeWikitext.js
+++ b/lib/wts.escapeWikitext.js
@@ -389,8 +389,8 @@
                        // Add nowikis intelligently
                        var smartNowikier = function(open, close, str, i, 
numToks) {
                                // Max length of string that gets 
"unnecessarily"
-                               // sucked into a nowiki (15 is an arbitrary 
number)
-                               var maxExcessWrapLength = 15;
+                               // sucked into a nowiki (40 is an arbitrary 
number)
+                               var maxExcessWrapLength = 40;
 
                                // If we are being asked to close a nowiki
                                // without opening one, we open a nowiki.
@@ -421,6 +421,12 @@
                                        tsr = (t.dataAttribs || {}).tsr;
 
                                // console.warn("SOL: " + sol + "; T[" + i + 
"]=" + JSON.stringify(t));
+
+                               // Ignore display hacks, so text like "A : B" 
doesn't produce
+                               // an unnecessary nowiki.
+                               if (t.dataAttribs && 
t.dataAttribs.isDisplayHack) {
+                                       continue;
+                               }
 
                                switch (t.constructor) {
                                case String:
@@ -576,8 +582,11 @@
                        || (hasNonQuoteEscapableChars &&
                                this.hasWikitextTokens(state, sol, 
this.serializer.options, text)))
                {
-                       state.env.log("trace/wt-escape", "---quotes: full 
nowiki wrap---");
-                       return this.escapedText(state, false, text, true);
+                       state.env.log("trace/wt-escape", "---quotes: escaping 
text---");
+                       // If the reason for full wrap is that the text 
contains non-quote
+                       // escapable chars, it's still possible to minimize the 
contents
+                       // of the <nowiki> (T71950).
+                       return this.escapedText(state, sol, text, 
!hasNonQuoteEscapableChars);
                } else {
                        var quoteEscapedText = escapedIBSiblingNodeText(state, 
text, opts);
                        if (quoteEscapedText) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 51c92de..280b3e2 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -550,7 +550,7 @@
 add("html2html", "3c. Indent-Pre and block tags (pre-content on separate 
line)", "\n<p data-parsoid='{\"dsr\":[1,22,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[1,19,8,9]}'> </span>foo</p>\n\n<div 
data-parsoid='{\"stx\":\"html\",\"dsr\":[24,42,5,6]}'>\n<pre 
data-parsoid='{\"dsr\":[30,34,1,0]}'>foo</pre>\n\n</div>\n<center 
data-parsoid='{\"stx\":\"html\",\"dsr\":[43,67,8,9]}'>\n<pre 
data-parsoid='{\"dsr\":[52,56,1,0]}'>foo</pre>\n\n</center>\n<blockquote 
data-parsoid='{\"stx\":\"html\",\"dsr\":[68,100,12,13]}'>\n<p 
data-parsoid='{\"dsr\":[81,85,0,0]}'> foo</p>\n\n</blockquote>\n<blockquote 
data-parsoid='{\"stx\":\"html\",\"dsr\":[101,133,12,13]}'>\n<p 
data-parsoid='{\"dsr\":[114,118,0,0]}'> foo</p>\n\n</blockquote>\n<table 
data-parsoid='{\"dsr\":[134,147,2,2]}'>\n<tbody 
data-parsoid='{\"dsr\":[137,145,0,0]}'><tr 
data-parsoid='{\"autoInsertedEnd\":true,\"autoInsertedStart\":true,\"dsr\":[137,143,0,0]}'><td
 data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[137,143,1,0]}'>\n<pre 
data-parsoid='{\"dsr\":[139,143,1,0]}'>foo</pre></td></tr>\n\n</tbody></table>\n<ul
 data-parsoid='{\"dsr\":[148,154,0,0]}'><li 
data-parsoid='{\"dsr\":[148,154,1,0]}'>  foo</li></ul>\n");
 add("html2html", "4. Indent-Pre and extension tags", "<p 
data-parsoid='{\"dsr\":[0,1,0,0]}'>a</p>\n\n<ul 
data-parsoid='{\"dsr\":[3,30,0,0]}'><li 
data-parsoid='{\"dsr\":[3,30,1,0]}'><div style=\"width: 155px\" 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[4,30,26,0]}'></div></li></ul>\n<div
 class=\"thumb\" style=\"width: 150px;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[31,181,41,6]}'>\n<div 
style=\"margin:68px auto;\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[73,174,31,6]}'><img 
src=\"http://example.com/images/thumb/3/3a/Foobar.jpg/120px-Foobar.jpg\"; 
alt=\"120px-Foobar.jpg\" rel=\"mw:externalImage\" 
data-parsoid='{\"dsr\":[104,168,null,null]}'/></div>\n</div>\n<div 
class=\"gallerytext\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[182,213,25,6]}'></div>\n\n");
 add("html2html", "5a. White-space in indent-pre", "<pre 
data-parsoid='{\"dsr\":[0,13,1,0]}'>a<br 
data-parsoid='{\"stx\":\"html\",\"noClose\":true,\"dsr\":[2,8,4,null]}'/>\n\n\nb</pre>\n");
-add("html2html", "HTML-pre: 3: other wikitext", "<pre 
data-parsoid='{\"dsr\":[0,71,1,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[1,71,8,9]}'>* foo\n # bar\n = no-h =\n '' no-italic 
''\n [[ NoLink ]]</span></pre>\n");
+add("html2html", "HTML-pre: 3: other wikitext", "<pre 
data-parsoid='{\"dsr\":[0,71,1,0]}'>* foo\n<span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[8,71,8,9]}'># bar\n = no-h =\n '' no-italic ''\n [[ 
NoLink ]]</span></pre>\n");
 add("html2html", "Definition list with wikilink containing colon", "<dl 
data-parsoid='{\"dsr\":[0,95,0,0]}'><dt data-parsoid='{\"dsr\":[0,24,1,0]}'> 
[/index.php?title=Help</dt><dd 
data-parsoid='{\"stx\":\"row\",\"dsr\":[24,60,1,0]}'>FAQ&amp;action=edit&amp;redlink=1
 Help:FAQ]</dd>\n<dd data-parsoid='{\"dsr\":[61,95,1,0]}'> The least-read page 
on Wikipedia</dd></dl>\n");
 add("html2html", "Definition lists: colon in HTML attribute", "<dl 
data-parsoid='{\"dsr\":[0,12,0,0]}'><dt data-parsoid='{\"dsr\":[0,12,1,0]}'> <b 
data-parsoid='{\"dsr\":[2,12,3,3]}'>bold</b></dt></dl>\n");
 add("html2html", "Definition Lists: Nesting: Multi-level (Parsoid only)", 
"\n<dl data-parsoid='{\"dsr\":[1,116,0,0]}'><dt 
data-parsoid='{\"dsr\":[1,6,1,0]}'> t1 </dt>\n<dd 
data-parsoid='{\"dsr\":[7,17,1,0]}'> d1      </dd>\n<dt 
data-parsoid='{\"dsr\":[18,116,1,0]}'><dl 
data-parsoid='{\"dsr\":[19,116,0,0]}'><dt data-parsoid='{\"dsr\":[19,24,1,0]}'> 
t2 </dt>\n<dd data-parsoid='{\"dsr\":[25,62,1,0]}'> <span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[28,48,8,9]}'>:d2</span>              </dd>\n<dt 
data-parsoid='{\"dsr\":[63,116,2,0]}'><dl 
data-parsoid='{\"dsr\":[65,116,0,0]}'><dt data-parsoid='{\"dsr\":[65,70,1,0]}'> 
t3 </dt>\n<dd data-parsoid='{\"dsr\":[71,116,1,0]}'> <span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[75,96,8,9]}'>::d3</span>                    
</dd></dl></dt></dl></dt></dl>\n");
@@ -769,7 +769,7 @@
 add("html2html", "Section replacement test (section 8)", "<p 
data-parsoid='{\"dsr\":[0,105,0,0]}'>start\n<span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[6,105,8,9]}'>==a==\n===aa===\n====aaa====\n==b==\n===ba===\n===bb===\n====bba====\nxxx\n\n==c==\n===ca===</span></p>");
 add("html2html", "Section replacement test (section 9)", "<p 
data-parsoid='{\"dsr\":[0,98,0,0]}'>start\n<span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[6,98,8,9]}'>==a==\n===aa===\n====aaa====\n==b==\n===ba===\n===bb===\n====bba====\n===bc===\nxxx</span></p>");
 add("html2html", "Section replacement test (section 10)", "<p 
data-parsoid='{\"dsr\":[0,104,0,0]}'>start\n<span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[6,104,8,9]}'>==a==\n===aa===\n====aaa====\n==b==\n===ba===\n===bb===\n====bba====\n===bc===\n==c==\nxxx</span></p>");
-add("html2html", "Section replacement test with initial whitespace (bug 
13728)", "<p data-parsoid='{\"dsr\":[0,70,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,18,8,9]}'> </span>Preformatted initial line\n<span 
typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[44,70,8,9]}'>==a==\nxxx</span></p>");
+add("html2html", "Section replacement test with initial whitespace (bug 
13728)", "<p data-parsoid='{\"dsr\":[0,53,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,53,8,9]}'> Preformatted initial 
line\n==a==\nxxx</span></p>");
 add("html2html", "Section extraction, heading followed by pre with 20 spaces 
(bug 6398)", "<p data-parsoid='{\"dsr\":[0,44,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,24,8,9]}'>==a==\n </span>                   a</p>");
 add("html2html", "Section extraction, heading followed by pre with 19 spaces 
(bug 6398 sanity check)", "<p data-parsoid='{\"dsr\":[0,43,0,0]}'><span 
typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[0,24,8,9]}'>==a==\n </span>        
          a</p>");
 add("html2html", "Section extraction, <pre> around bogus header (bug 10309)", 
"<p data-parsoid='{\"dsr\":[0,40,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,40,8,9]}'>== Section Two ==\nstuff</span></p>");
@@ -843,7 +843,7 @@
 add("html2html", "preload: check <noinclude> and <includeonly>", "<p 
data-parsoid='{\"dsr\":[0,17,0,0]}'>Hello kind world.</p>");
 add("html2html", "preload: check <onlyinclude>", "<p 
data-parsoid='{\"dsr\":[0,11,0,0]}'>Hello world</p>");
 add("html2html", "preload: can pass tags through if we want to", "<meta 
typeof=\"mw:Includes/IncludeOnly\" 
data-parsoid='{\"src\":\"&lt;includeonly>Hello 
world&lt;/includeonly>\",\"dsr\":[0,38,null,null]}'/><meta 
typeof=\"mw:Includes/IncludeOnly/End\" 
data-parsoid='{\"src\":\"\",\"dsr\":[38,38,null,null]}'/>");
-add("html2html", "preload: check that it doesn't try to do tricks", "<p 
data-parsoid='{\"dsr\":[0,118,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,18,8,9]}'>*</span> <!-- Hello --><span 
typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[33,118,8,9]}'> ''{{world}}'' 
{{subst:How are you}}{{ {{{|safesubst:}}} #if:1|2|3}}</span></p>");
+add("html2html", "preload: check that it doesn't try to do tricks", "<p 
data-parsoid='{\"dsr\":[0,118,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,18,8,9]}'>*</span> <!-- Hello --> <span 
typeof=\"mw:Nowiki\" data-parsoid='{\"dsr\":[34,118,8,9]}'>''{{world}}'' 
{{subst:How are you}}{{ {{{|safesubst:}}} #if:1|2|3}}</span></p>");
 add("html2html", "HTML5 data attributes", "<p 
data-parsoid='{\"dsr\":[0,31,0,0]}'><span data-foo=\"bar\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,31,21,7]}'>Baz</span></p>\n\n<p 
data-parsoid='{\"dsr\":[33,37,0,0]}'>Quuz</p>\n");
 add("html2html", "Bug 31098 Template which includes system messages which 
includes the template", "<p data-parsoid='{\"dsr\":[0,195,0,0]}'><span 
class=\"error\" data-parsoid='{\"stx\":\"html\",\"dsr\":[0,97,20,7]}'>Template 
loop detected: <a rel=\"mw:WikiLink\" href=\"./Wiki/Template:Identical\" 
title=\"Wiki/Template:Identical\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Template:Identical\"},\"sa\":{\"href\":\"wiki/Template:Identical\"},\"dsr\":[44,90,26,2]}'>Template:Identical</a></span>\n<span
 class=\"error\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[98,195,20,7]}'>Template loop detected: 
<a rel=\"mw:WikiLink\" href=\"./Wiki/Template:Identical\" 
title=\"Wiki/Template:Identical\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Template:Identical\"},\"sa\":{\"href\":\"wiki/Template:Identical\"},\"dsr\":[142,188,26,2]}'>Template:Identical</a></span></p>\n");
 add("html2html", "Bug31490 Turkish: ucfırst (with a dotless i)", "<p 
data-parsoid='{\"dsr\":[0,90,0,0]}'>[/index.php?title=%C5%9Eablon:Ucf%C4%B1rst:blah&amp;action=edit&amp;redlink=1
 Şablon:Ucfırst:blah]</p>\n");
@@ -899,6 +899,7 @@
 add("html2wt", "<nowiki> spacing", "Lorem ipsum dolor\nsed abit.\n<nowiki>  
sed nullum.\n:</nowiki>and a colon\n");
 add("html2wt", "nowiki 3", ": There is not nowiki.\n: There is nowiki.\n# 
There is not nowiki.\n# There is nowiki.\n* There is not nowiki.\n* There is 
nowiki.\n");
 add("html2wt", "Entities inside <nowiki>", "<\n");
+add("html2wt", "T71950: 2. Put nowiki as close to cause as possible, after ' 
:'", "This text : L<nowiki>''</nowiki>[[Foo]]\n");
 add("html2wt", "Comments and Indent-Pre", " asdf\n\n asdf\n\n asdf\n\nxyz\n\n 
asdf\n xyz\n");
 add("html2wt", "Comment test 2a", "asdf\njkl\n");
 add("html2wt", "Comment test 2b", "asdf\n\njkl\n");
@@ -962,7 +963,7 @@
 add("html2wt", "5a. White-space in indent-pre", " a<br>\n \n \n b\n");
 add("html2wt", "6. Pre-blocks should extend across lines with leading WS even 
when there is no wrappable content", " a\n \n b\n\n c\n \n\nd\n");
 add("html2wt", "HTML-pre: 2: indented text", "  foo\n");
-add("html2wt", "HTML-pre: 3: other wikitext", " <nowiki>* foo\n # bar\n = no-h 
=\n '' no-italic ''\n [[ NoLink ]]</nowiki>\n");
+add("html2wt", "HTML-pre: 3: other wikitext", " * foo\n <nowiki># bar\n = no-h 
=\n '' no-italic ''\n [[ NoLink ]]</nowiki>\n");
 add("html2wt", "Simple definition", "; name \n: Definition\n");
 add("html2wt", "Definition list with no space", "; name\n: Definition\n");
 add("html2wt", "Definition list with URL link", "; http://example.com/ \n: 
definition\n");
@@ -1513,7 +1514,7 @@
 add("html2wt", "Section replacement test (section 8)", 
"start\n<nowiki>==a==\n===aa===\n====aaa====\n==b==\n===ba===\n===bb===\n====bba====\nxxx\n\n==c==\n===ca===</nowiki>");
 add("html2wt", "Section replacement test (section 9)", 
"start\n<nowiki>==a==\n===aa===\n====aaa====\n==b==\n===ba===\n===bb===\n====bba====\n===bc===\nxxx</nowiki>");
 add("html2wt", "Section replacement test (section 10)", 
"start\n<nowiki>==a==\n===aa===\n====aaa====\n==b==\n===ba===\n===bb===\n====bba====\n===bc===\n==c==\nxxx</nowiki>");
-add("html2wt", "Section replacement test with initial whitespace (bug 13728)", 
"<nowiki> </nowiki>Preformatted initial line\n<nowiki>==a==\nxxx</nowiki>");
+add("html2wt", "Section replacement test with initial whitespace (bug 13728)", 
"<nowiki> Preformatted initial line\n==a==\nxxx</nowiki>");
 add("html2wt", "Section extraction, heading followed by pre with 20 spaces 
(bug 6398)", "<nowiki>==a==\n </nowiki>                   a");
 add("html2wt", "Section extraction, heading followed by pre with 19 spaces 
(bug 6398 sanity check)", "<nowiki>==a==\n </nowiki>                  a");
 add("html2wt", "Section extraction, <pre> around bogus header (bug 10309)", 
"<nowiki>== Section Two ==\nstuff</nowiki>");
@@ -1649,7 +1650,7 @@
 add("html2wt", "preload: check <noinclude> and <includeonly>", "Hello kind 
world.");
 add("html2wt", "preload: check <onlyinclude>", "Hello world");
 add("html2wt", "preload: can pass tags through if we want to", 
"<includeonly>Hello world</includeonly>");
-add("html2wt", "preload: check that it doesn't try to do tricks", 
"<nowiki>*</nowiki> <!-- Hello --><nowiki> ''{{world}}'' {{subst:How are 
you}}{{ {{{|safesubst:}}} #if:1|2|3}}</nowiki>");
+add("html2wt", "preload: check that it doesn't try to do tricks", 
"<nowiki>*</nowiki> <!-- Hello --> <nowiki>''{{world}}'' {{subst:How are 
you}}{{ {{{|safesubst:}}} #if:1|2|3}}</nowiki>");
 add("html2wt", "HTML5 data attributes", "<span 
data-foo=\"bar\">Baz</span>\n\nQuuz\n");
 add("html2wt", "percent-encoding and + signs in internal links (Bug 26410)", 
"[[User:+%]] [[Page+title%]] [[%+]] [[%+|%20]] [[%+ ]] [[%+r]] [[%]] [[+]] 
[[File:%25+abc9|link=File:%+abc9|[[bar]]]] [[3E]] [[3E+]]\n");
 add("html2wt", "Special characters in embedded file links (bug 27679)", 
"[[File:Contains_&_ampersand.jpg]]\n[[File:Does_not_exist.jpg|Title with & 
ampersand]]\n");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index ee42cd6..df3f1e2 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -1373,6 +1373,32 @@
 <nowiki>* &lt;/nowiki&gt;</nowiki> tag
 !! end
 
+!! test
+T71950: 1. Put nowiki as close to cause as possible, even with non-quote 
escapable chars
+!! options
+parsoid=html2wt
+!! html
+<p>This text: L'<a rel="mw:WikiLink" href="./Foo">Foo</a>
+This text: L''<a rel="mw:WikiLink" href="./Foo">Foo</a>
+This text: L'''<a rel="mw:WikiLink" href="./Foo">Foo</a>''</p>
+!! wikitext
+This text: L'[[Foo]]
+This text: L<nowiki>''</nowiki>[[Foo]]
+This text: L<nowiki>'''</nowiki>[[Foo]]<nowiki>''</nowiki>
+!! end
+
+# This test fails because wikitext whitespace is not normalized before 
comparing.
+!! test
+T71950: 2. Put nowiki as close to cause as possible, after ' :'
+!! options
+parsoid=html2wt
+!! html
+<p>This text : L''<a rel="mw:WikiLink" href="./Foo">Foo</a>
+</p>
+!! wikitext
+This text : L<nowiki>''</nowiki>[[Foo]]
+!! end
+
 ###
 ### Comments
 ###

-- 
To view, visit https://gerrit.wikimedia.org/r/193842
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I576aaf4bcad8931a6cc15d89fb4447125d5f539f
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Marcoil <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Marcoil <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to