Subramanya Sastry has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/371068 )

Change subject: Linter: Add tidy-whitespace-bug linter category
......................................................................

Linter: Add tidy-whitespace-bug linter category

* 
https://it.wikipedia.org/wiki/Template:Campioni_NBA?action=parsermigration-edit

  Expand the navbox to see the problem.

* The problem is that Tidy hoists trailing whitespace out of tags.

  Normally, no big deal. But, in scenarios where adjacent inline tags
  have no whitespace, and the left tag has trailing whitespace as well
  as 'white-space:nowrap' CSS property, this can be a problem.

  In Tidy, the hoisted whitespace gives the browser an opportunity to
  break content. But, in non-Tidy scenarios, there is no hoisting and
  this causes content to run horizontally indefinitely.

* The detection is for the common case scenario and I don't look at
  the trailing whitespace deep in the tree. But, this is good enough
  for now since this handles the common case adequately enough.

* Added a bunch of tests clarifying expectations of this behavior.

Change-Id: Ie5e888089c8b665d948438f030221c8c58689f5f
---
M lib/wt2html/pp/handlers/linter.js
M tests/mocha/linter.js
2 files changed, 164 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/68/371068/1

diff --git a/lib/wt2html/pp/handlers/linter.js 
b/lib/wt2html/pp/handlers/linter.js
index cd3ce4c..d4e2e7d 100644
--- a/lib/wt2html/pp/handlers/linter.js
+++ b/lib/wt2html/pp/handlers/linter.js
@@ -395,19 +395,24 @@
        return c;
 }
 
+function hasNoWrapCSS(node) {
+       // In the general case, this CSS can come from a class,
+       // or from a <style> tag or a stylesheet or even from JS code.
+       // But, for now, we are restricting this inspection to inline CSS
+       // since the intent is to aid editors in fixing patterns that
+       // can be automatically detected.
+       //
+       // Special case for enwiki that has Template:nowrap which
+       // assigns class='nowrap' with CSS white-space:nowrap in
+       // MediaWiki:Common.css
+       return /nowrap/.test(node.getAttribute('style')) ||
+               /(^|\s)nowrap($|\s)/.test(node.getAttribute('class'));
+}
+
 function logBadPWrapping(env, node, dp, tplInfo) {
        var findP = function(e) { return e.nodeName === 'P'; };
        if (!DU.isBlockNode(node) && DU.isBlockNode(node.parentNode)) {
-               // In the general case, this CSS can come from a class,
-               // or from a <style> tag or a stylesheet or even from JS code.
-               // But, for now, we are restricting this inspection to inline 
CSS
-               // since the intent is to aid editors in fixing patterns that
-               // can be automatically detected.
-               if (/nowrap/.test(node.getAttribute('style')) ||
-                       // Special case for enwiki that has Template:nowrap 
which
-                       // assigns class='nowrap' with CSS white-space:nowrap in
-                       // MediaWiki:Common.css
-                       /(^|\s)nowrap($|\s)/.test(node.getAttribute('class'))) {
+               if (hasNoWrapCSS(node)) {
                        var p = findMatchingChild(node, findP);
                        if (p) {
                                var dsr, templateInfo;
@@ -426,6 +431,68 @@
                        }
                }
        }
+}
+
+function logTidyWhitespaceBug(env, node, dp, tplInfo) {
+       if (DU.isBlockNode(node)) {
+               // Nothing to worry
+               return;
+       }
+
+       if (!hasNoWrapCSS(node)) {
+               // no CSS property that affects whitespcae
+               return;
+       }
+
+       // Find next non-comment sibling of 'node'
+       var next = node.nextSibling;
+       while (next && DU.isComment(next)) {
+               next = next.nextSibling;
+       }
+
+       if (!next || DU.isBlockNode(next) || (DU.isText(next) && 
/^\s/.test(next.data))) {
+               // All good! No text/inline-node sibling
+               return;
+       }
+
+       // Find last non-comment child of 'node'
+       var last = node.lastChild;
+       while (last && DU.isComment(last)) {
+               last = last.previousSibling;
+       }
+
+       if (!last || !DU.isText(last) || !/\s$/.test(last.data)) {
+               // All good! No whitespace for Tidy to hoist out
+               return;
+       }
+
+       // So, we have:
+       // * two inline siblings without whitespace between then,
+       // * the left sibling has nowrap CSS
+       // * the left sibling has trailing whitespace
+       //
+       // In this scenario, when Tidy hoists the whitespace to
+       // after the left sibling, that whitespace is not subject
+       // to the nowrap CSS => browsers can break content there.
+       //
+       // But, non-Tidy libraries won't hoist the whitespace.
+       // So, the content will extend indefinitely.
+       // Flag this pattern for editors to fix.
+
+
+       var dsr, templateInfo;
+       if (tplInfo) {
+               templateInfo = findEnclosingTemplateName(env, tplInfo);
+               dsr = tplInfo.dsr;
+       } else {
+               dsr = dp.dsr;
+       }
+       var lintObj = {
+               dsr: dsr,
+               templateInfo: templateInfo,
+               params: { node: node.nodeName, sibling: next.nodeName },
+       };
+       env.log('lint/tidy-whitespace-bug', lintObj);
 }
 
 function logWikitextFixups(node, env, atTopLevel, tplInfo) {
@@ -453,6 +520,7 @@
        logBadPWrapping(env, node, dp, tplInfo);    // For T161306
        logObsoleteHTMLTags(env, node, dp, tplInfo);
        logBogusMediaOptions(env, node, dp, tplInfo);
+       logTidyWhitespaceBug(env, node, dp, tplInfo);
 
        // Log fostered content, but skip rendering-transparent nodes
        //
diff --git a/tests/mocha/linter.js b/tests/mocha/linter.js
index fdd74e3..e91c4a4 100644
--- a/tests/mocha/linter.js
+++ b/tests/mocha/linter.js
@@ -448,4 +448,90 @@
                });
        });
 
+       describe('TIDY WHITESPACE BUG', function() {
+               it('should detect problematic whitespace hoisting' , function() 
{
+                       var wt = [
+                               // Basic with inline CSS + text sibling
+                               "<span style='white-space:nowrap'>",
+                               "a ",
+                               "</span>",
+                               "x",
+                               // Basic with inline CSS + span sibling
+                               "<span style='white-space:nowrap'>",
+                               "a ",
+                               "</span>",
+                               "<span>x</span>",
+                               // Basic with class CSS + text sibling
+                               "<span class='nowrap'>",
+                               "a ",
+                               "</span>",
+                               "x",
+                               // Basic with class CSS + span sibling
+                               "<span class='nowrap'>",
+                               "a ",
+                               "</span>",
+                               "<span>x</span>",
+                               // Comments shouldn't trip it up
+                               "<span style='white-space:nowrap'>",
+                               "a<!--boo--> <!--boo-->",
+                               "</span>",
+                               "<!--boo-->",
+                               "<span>x</span>",
+                       ].join('');
+                       return parseWT(wt).then(function(result) {
+                               result.should.have.length(5);
+                               result.forEach(function(r) {
+                                       r.should.have.a.property('type', 
'tidy-whitespace-bug');
+                                       r.params.should.have.a.property('node', 
'SPAN');
+                               });
+                               
result[0].params.should.have.a.property("sibling", "#text");
+                               
result[1].params.should.have.a.property("sibling", "SPAN");
+                               
result[2].params.should.have.a.property("sibling", "#text");
+                               
result[3].params.should.have.a.property("sibling", "SPAN");
+                               
result[4].params.should.have.a.property("sibling", "SPAN");
+                               // skipping dsr tests
+                       });
+               });
+               it('should not flag tidy whitespace bug (1)' , function() {
+                       var wt = [
+                               // No CSS
+                               "<span>",
+                               "a ",
+                               "</span>",
+                               "<span>x</span>",
+                               // No trailing white-space
+                               "<span class='nowrap'>",
+                               "a",
+                               "</span>",
+                               "x",
+                               // White-space follows
+                               "<span class='nowrap'>",
+                               "a ",
+                               "</span>",
+                               " ",
+                               "<span>x</span>",
+                               // White-space follows
+                               "<span style='white-space:nowrap'>",
+                               "a ",
+                               "</span>",
+                               "<!--boo--> boo",
+                               "<span>x</span>",
+                               // Block tag
+                               "<div class='nowrap'>",
+                               "a ",
+                               "</div>",
+                               "<span>x</span>",
+                               // Block tag sibling
+                               "<span class='nowrap'>",
+                               "a ",
+                               "</span>",
+                               "<div>x</div>",
+                               // No next sibling
+                               "<span class='nowrap'>",
+                               "a ",
+                               "</span>",
+                       ].join('');
+                       return expectEmptyResults(wt);
+               });
+       });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/371068
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie5e888089c8b665d948438f030221c8c58689f5f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to