jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/372896 )

Change subject: Linter: Tweak tidy-whitespace-bug output
......................................................................


Linter: Tweak tidy-whitespace-bug output

* Before this patch, we were reporting all instances of affected
  whitespace. However, with short strings, there is no real issue
  with rendering unless the viewport is small. So, in this patch,
  I am implementing a heuristic that triggers this linter issue
  only if the run of text content exceeds a configurable length.

* Updated mocha tests.

Change-Id: I15d401e9a9b649723ae3bfc3598d02e8a619a9ec
---
M lib/config/ParsoidConfig.js
M lib/wt2html/pp/handlers/linter.js
M tests/mocha/linter.js
3 files changed, 242 insertions(+), 84 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/config/ParsoidConfig.js b/lib/config/ParsoidConfig.js
index 7addddd..359dc2e 100644
--- a/lib/config/ParsoidConfig.js
+++ b/lib/config/ParsoidConfig.js
@@ -249,6 +249,19 @@
  */
 ParsoidConfig.prototype.linterAPISampling = 1;
 
+// FIXME: Fold other linter config options into this.
+ParsoidConfig.prototype.linter = {
+       /**
+        * @property {number} tidyWhiteSpaceBugMaxLength
+        *
+        * Max length of content covered by 'white-space:nowrap' CSS
+        * that we consider "safe" when Tidy is replaced. Beyond that,
+        * wikitext will have to be fixed up to manually insert whitespace
+        * at the right places.
+        */
+       tidyWhitespaceBugMaxLength: 100,
+};
+
 /**
  * @property {Function} loggerBackend
  * The logger output function.
diff --git a/lib/wt2html/pp/handlers/linter.js 
b/lib/wt2html/pp/handlers/linter.js
index d4e2e7d..8e95440 100644
--- a/lib/wt2html/pp/handlers/linter.js
+++ b/lib/wt2html/pp/handlers/linter.js
@@ -434,65 +434,144 @@
 }
 
 function logTidyWhitespaceBug(env, node, dp, tplInfo) {
-       if (DU.isBlockNode(node)) {
-               // Nothing to worry
+       // We handle a run of nodes in one shot.
+       // No need to reprocess repeatedly.
+       if (dp && dp.tmp.processedTidyWSBug) {
                return;
        }
 
-       if (!hasNoWrapCSS(node)) {
-               // no CSS property that affects whitespcae
-               return;
-       }
-
-       // Find next non-comment sibling of 'node'
-       var next = node.nextSibling;
-       while (next && DU.isComment(next)) {
-               next = next.nextSibling;
-       }
-
-       if (!next || DU.isBlockNode(next) || (DU.isText(next) && 
/^\s/.test(next.data))) {
-               // All good! No text/inline-node sibling
-               return;
-       }
-
-       // Find last non-comment child of 'node'
-       var last = node.lastChild;
-       while (last && DU.isComment(last)) {
-               last = last.previousSibling;
-       }
-
-       if (!last || !DU.isText(last) || !/\s$/.test(last.data)) {
-               // All good! No whitespace for Tidy to hoist out
-               return;
-       }
-
-       // So, we have:
-       // * two inline siblings without whitespace between then,
-       // * the left sibling has nowrap CSS
-       // * the left sibling has trailing whitespace
+       // Find the longest run of nodes that are affected by 
white-space:nowrap CSS
+       // in a way that leads to unsightly rendering in HTML5 compliant 
browsers.
        //
-       // In this scenario, when Tidy hoists the whitespace to
-       // after the left sibling, that whitespace is not subject
-       // to the nowrap CSS => browsers can break content there.
+       // Check if Tidy does buggy whitespace hoisting there to provide the 
browser
+       // opportunities to split the content in short segments.
        //
-       // But, non-Tidy libraries won't hoist the whitespace.
-       // So, the content will extend indefinitely.
-       // Flag this pattern for editors to fix.
+       // If so, editors would need to edit this run of nodes to introduce
+       // whitespace breaks as necessary so that HTML5 browsers get that
+       // same opportunity when Tidy is removed.
+       var s, ws;
+       var nowrapNodes = [];
+       var startNode = node;
+       var haveTidyBug = false;
+       var runLength = 0;
+       while (node && !DU.isBlockNode(node)) {
+               if (DU.isText(node) || !hasNoWrapCSS(node)) {
+                       // No CSS property that affects whitespace.
+                       s = node.textContent;
+                       ws = s.match(/\s/);
+                       if (ws) {
+                               runLength += ws.index;
+                               nowrapNodes.push({ node: node, tidybug: false, 
hasLeadingWS: /^\s/.test(s) });
+                               break;
+                       } else {
+                               nowrapNodes.push({ node: node, tidybug: false 
});
+                               runLength += s.length;
+                       }
+               } else {
+                       // Find last non-comment child of node
+                       var last = node.lastChild;
+                       while (last && DU.isComment(last)) {
+                               last = last.previousSibling;
+                       }
 
+                       var bug = false; // Set this explicitly always (because 
vars aren't block-scoped)
+                       if (last && DU.isText(last) && /\s$/.test(last.data)) {
+                               // In this scenario, when Tidy hoists the 
whitespace to
+                               // after the node, that whitespace is not 
subject to the
+                               // nowrap CSS => browsers can break content 
there.
+                               //
+                               // But, non-Tidy libraries won't hoist the 
whitespace.
+                               // So, browsers don't have a place to break 
content.
+                               bug = true;
+                               haveTidyBug = true;
+                       }
 
+                       nowrapNodes.push({ node: node, tidybug: bug });
+                       runLength += node.textContent.length;
+               }
+
+               // Don't cross template boundaries at the top-level
+               if (tplInfo && tplInfo.last === node) {
+                       // Exiting a top-level template
+                       break;
+               } else if (!tplInfo && 
DU.findFirstEncapsulationWrapperNode(node)) {
+                       // Entering a top-level template
+                       break;
+               }
+
+               // Move to the next non-comment sibling
+               node = node.nextSibling;
+               while (node && DU.isComment(node)) {
+                       node = node.nextSibling;
+               }
+       }
+
+       var markProcessedNodes = function() { // Helper
+               nowrapNodes.forEach(function(o) {
+                       if (DU.isElt(o.node)) {
+                               
DU.getDataParsoid(o.node).tmp.processedTidyWSBug = true;
+                       }
+               });
+       };
+
+       if (!haveTidyBug) {
+               // Mark processed nodes and bail
+               markProcessedNodes();
+               return;
+       }
+
+       // Find run before startNode that doesn't have a whitespace break
+       var prev = startNode.previousSibling;
+       while (prev && !DU.isBlockNode(prev)) {
+               if (!DU.isComment(prev)) {
+                       s = prev.textContent;
+                       // Find the last \s in the string
+                       ws = s.match(/\s[^\s]*$/);
+                       if (ws) {
+                               runLength += (s.length - ws.index - 1); // -1 
for the \s
+                               break;
+                       } else {
+                               runLength += s.length;
+                       }
+               }
+               prev = prev.previousSibling;
+       }
+
+       if (runLength < env.conf.parsoid.linter.tidyWhitespaceBugMaxLength) {
+               // Mark processed nodes and bail
+               markProcessedNodes();
+               return;
+       }
+
+       // For every node where Tidy hoists whitespace,
+       // emit an event to flag a whitespace fixup opportunity.
        var dsr, templateInfo;
        if (tplInfo) {
                templateInfo = findEnclosingTemplateName(env, tplInfo);
                dsr = tplInfo.dsr;
-       } else {
-               dsr = dp.dsr;
        }
-       var lintObj = {
-               dsr: dsr,
-               templateInfo: templateInfo,
-               params: { node: node.nodeName, sibling: next.nodeName },
-       };
-       env.log('lint/tidy-whitespace-bug', lintObj);
+       var n = nowrapNodes.length - 1;
+       nowrapNodes.forEach(function(o, i) {
+               if (o.tidybug && i < n) {
+                       if (!tplInfo) {
+                               dsr = DU.getDataParsoid(o.node).dsr;
+                       }
+                       if (!nowrapNodes[i + 1].hasLeadingWS) {
+                               var lintObj = {
+                                       dsr: dsr,
+                                       templateInfo: templateInfo,
+                                       params: {
+                                               node: o.node.nodeName,
+                                               sibling: 
o.node.nextSibling.nodeName,
+                                       },
+                               };
+
+                               env.log('lint/tidy-whitespace-bug', lintObj);
+                       }
+               }
+       });
+
+       markProcessedNodes();
 }
 
 function logWikitextFixups(node, env, atTopLevel, tplInfo) {
diff --git a/tests/mocha/linter.js b/tests/mocha/linter.js
index 81c9ef8..f1ad362 100644
--- a/tests/mocha/linter.js
+++ b/tests/mocha/linter.js
@@ -17,14 +17,14 @@
        // Parsing the `[[file:...]]` tags below may also require running the
        // mock API to answer imageinfo queries.
        var parsoidConfig = new ParsoidConfig(null, { defaultWiki: 'enwiki', 
loadWMF: true, linting: true });
-       var parseWT = function(wt) {
-               return helpers.parse(parsoidConfig, wt).then(function(ret) {
+       var parseWT = function(wt, opts) {
+               return helpers.parse(parsoidConfig, wt, 
opts).then(function(ret) {
                        return ret.env.lintLogger.buffer;
                });
        };
 
-       var expectEmptyResults = function(wt) {
-               return parseWT(wt).then(function(result) {
+       var expectEmptyResults = function(wt, opts) {
+               return parseWT(wt, opts).then(function(result) {
                        return result.should.be.empty;
                });
        };
@@ -449,36 +449,30 @@
        });
 
        describe('TIDY WHITESPACE BUG', function() {
-               it('should detect problematic whitespace hoisting' , function() 
{
-                       var wt = [
-                               // Basic with inline CSS + text sibling
-                               "<span style='white-space:nowrap'>",
-                               "a ",
-                               "</span>",
-                               "x",
-                               // Basic with inline CSS + span sibling
-                               "<span style='white-space:nowrap'>",
-                               "a ",
-                               "</span>",
-                               "<span>x</span>",
-                               // Basic with class CSS + text sibling
-                               "<span class='nowrap'>",
-                               "a ",
-                               "</span>",
-                               "x",
-                               // Basic with class CSS + span sibling
-                               "<span class='nowrap'>",
-                               "a ",
-                               "</span>",
-                               "<span>x</span>",
-                               // Comments shouldn't trip it up
-                               "<span style='white-space:nowrap'>",
-                               "a<!--boo--> <!--boo-->",
-                               "</span>",
-                               "<!--boo-->",
-                               "<span>x</span>",
-                       ].join('');
-                       return parseWT(wt).then(function(result) {
+               var wt1 = [
+                       // Basic with inline CSS + text sibling
+                       "<span style='white-space:nowrap'>a </span>",
+                       "x",
+                       // Basic with inline CSS + span sibling
+                       "<span style='white-space:nowrap'>a </span>",
+                       "<span>x</span>",
+                       // Basic with class CSS + text sibling
+                       "<span class='nowrap'>a </span>",
+                       "x",
+                       // Basic with class CSS + span sibling
+                       "<span class='nowrap'>a </span>",
+                       "<span>x</span>",
+                       // Comments shouldn't trip it up
+                       "<span style='white-space:nowrap'>a<!--boo--> 
<!--boo--></span>",
+                       "<!--boo-->",
+                       "<span>x</span>",
+               ].join('');
+
+               it('should detect problematic whitespace hoisting', function() {
+                       var tweakEnv = function(env) {
+                               
env.conf.parsoid.linter.tidyWhitespaceBugMaxLength = 0;
+                       };
+                       return parseWT(wt1, { tweakEnv: tweakEnv 
}).then(function(result) {
                                result.should.have.length(5);
                                result.forEach(function(r) {
                                        r.should.have.a.property('type', 
'tidy-whitespace-bug');
@@ -488,11 +482,80 @@
                                
result[1].params.should.have.a.property("sibling", "SPAN");
                                
result[2].params.should.have.a.property("sibling", "#text");
                                
result[3].params.should.have.a.property("sibling", "SPAN");
-                               
result[4].params.should.have.a.property("sibling", "SPAN");
+                               
result[4].params.should.have.a.property("sibling", "#comment");
                                // skipping dsr tests
                        });
                });
-               it('should not flag tidy whitespace bug (1)' , function() {
+
+               it('should not detect problematic whitespace hoisting for short 
text runs', function() {
+                       // Nothing to trigger here
+                       var tweakEnv = function(env) {
+                               
env.conf.parsoid.linter.tidyWhitespaceBugMaxLength = 100;
+                       };
+                       return expectEmptyResults(wt1, { tweakEnv: tweakEnv });
+               });
+
+               var wt2 = [
+                       "some unaffected text here ",
+                       "<span style='white-space:nowrap'>a </span>",
+                       "<span style='white-space:nowrap'>bb</span>",
+                       "<span class='nowrap'>cc</span>",
+                       "<span class='nowrap'>d </span>",
+                       "<span style='white-space:nowrap'>e </span>",
+                       "<span class='nowrap'>x</span>",
+               ].join('');
+
+               it('should flag tidy whitespace bug on a run of affected 
content', function() {
+                       // The run length is 11 chars in the example above
+                       var tweakEnv = function(env) {
+                               
env.conf.parsoid.linter.tidyWhitespaceBugMaxLength = 5;
+                       };
+                       return parseWT(wt2, { tweakEnv: tweakEnv 
}).then(function(result) {
+                               result.should.have.length(3);
+                               result.forEach(function(r) {
+                                       r.should.have.a.property('type', 
'tidy-whitespace-bug');
+                                       r.params.should.have.a.property('node', 
'SPAN');
+                               });
+                               
result[0].params.should.have.a.property("sibling", "SPAN"); // 1st span
+                               result[0].dsr.should.deep.equal([ 26, 68, 33, 7 
]);
+                               
result[1].params.should.have.a.property("sibling", "SPAN"); // 4th span
+                               result[1].dsr.should.deep.equal([ 140, 170, 21, 
7 ]);
+                               
result[2].params.should.have.a.property("sibling", "SPAN"); // 5th span
+                               result[2].dsr.should.deep.equal([ 170, 212, 33, 
7 ]);
+                       });
+               });
+
+               it('should not flag tidy whitespace bug on a run of short 
affected content', function() {
+                       // The run length is 11 chars in the example above
+                       var tweakEnv = function(env) {
+                               
env.conf.parsoid.linter.tidyWhitespaceBugMaxLength = 12;
+                       };
+                       return expectEmptyResults(wt2, { tweakEnv: tweakEnv });
+               });
+
+               it('should account for preceding text content', function() {
+                       // The run length is 11 chars in the example above
+                       var tweakEnv = function(env) {
+                               
env.conf.parsoid.linter.tidyWhitespaceBugMaxLength = 12;
+                       };
+                       // Run length changes to 16 chars because of preceding 
text
+                       wt2 = wt2.replace(/some unaffected text here /, 'some 
unaffected text HERE-');
+                       return parseWT(wt2, { tweakEnv: tweakEnv 
}).then(function(result) {
+                               result.should.have.length(3);
+                               result.forEach(function(r) {
+                                       r.should.have.a.property('type', 
'tidy-whitespace-bug');
+                                       r.params.should.have.a.property('node', 
'SPAN');
+                               });
+                               
result[0].params.should.have.a.property("sibling", "SPAN"); // 1st span
+                               result[0].dsr.should.deep.equal([ 26, 68, 33, 7 
]);
+                               
result[1].params.should.have.a.property("sibling", "SPAN"); // 4th span
+                               result[1].dsr.should.deep.equal([ 140, 170, 21, 
7 ]);
+                               
result[2].params.should.have.a.property("sibling", "SPAN"); // 5th span
+                               result[2].dsr.should.deep.equal([ 170, 212, 33, 
7 ]);
+                       });
+               });
+
+               it('should not flag tidy whitespace bug where it does not 
matter', function() {
                        var wt = [
                                // No CSS
                                "<span>",
@@ -531,7 +594,10 @@
                                "a ",
                                "</span>",
                        ].join('');
-                       return expectEmptyResults(wt);
+                       var tweakEnv = function(env) {
+                               
env.conf.parsoid.linter.tidyWhitespaceBugMaxLength = 0;
+                       };
+                       return expectEmptyResults(wt, { tweakEnv: tweakEnv });
                });
        });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/372896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I15d401e9a9b649723ae3bfc3598d02e8a619a9ec
Gerrit-PatchSet: 14
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to