jenkins-bot has submitted this change and it was merged.

Change subject: Let more attribs through in parsoid-only tests
......................................................................


Let more attribs through in parsoid-only tests

We need e.g. property and class so we can make sure the new image tests
actually test what we need 'em to.

Only two parserTests changes:

1) "Table with broken attribute value quoting" now passes wt2html
because title/style attributes aren't stripped.

2) "Italics and bold: other quote tests: (3,2,3,3)" fails html2html -- it
previously failed wt2wt and html2wt (ie, we don't round-trip this cleanly)
and managed to pass html2html only by a fluke (we were stripping out
the nowiki <span> attribute it was adding).

The underlying issue with the failing test case is that we round-trip
   '''this is about ''foo'''s family'''
as
   '''this is about ''<nowiki>foo'</nowiki>''s family'''
which is conservative but the <nowiki> is not strictly necessary.

At any rate, we now accurately fail wt2wt, html2wt, and html2html on this
test case.

Co-authored-by: C. Scott Ananian <[email protected]>
Co-authored-by: Mark Holmquist <[email protected]>
Change-Id: Idc0de45d43fdecfd37c730baee10c8ee086e6fd6
---
M js/lib/mediawiki.Util.js
M js/tests/parserTests-blacklist.js
M js/tests/parserTests.js
3 files changed, 42 insertions(+), 25 deletions(-)

Approvals:
  Cscott: Looks good to me, approved
  GWicke: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js
index 8934503..4a887f2 100644
--- a/js/lib/mediawiki.Util.js
+++ b/js/lib/mediawiki.Util.js
@@ -989,18 +989,21 @@
 
                                // and eat all remaining newlines
                                .replace(/[\r\n]/g, '');
-},
+};
 
 /**
  * @method normalizeOut
  *
  * Specialized normalization of the wiki parser output, mostly to ignore a few
- * known-ok differences.
+ * known-ok differences.  If parsoidOnly is true-ish, then we allow more
+ * markup through (like property and typeof attributes), for better
+ * checking of parsoid-only test cases.
  *
  * @param {string} out
+ * @param {bool} parsoidOnly
  * @returns {string}
  */
-normalizeOut = function ( out ) {
+var normalizeOut = function ( out, parsoidOnly ) {
        // TODO: Do not strip newlines in pre and nowiki blocks!
        // NOTE that we use a slightly restricted regexp for "attribute"
        //  which works for the output of DOM serialization.  For example,
@@ -1012,23 +1015,34 @@
                throw new Error("normalizeOut input is not in standard 
serialized form");
        }
        out = normalizeNewlines( out );
-       return out
-               .replace(/<span(?: [^>]+)* 
typeof="mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))"(?: 
[^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1')
-               // Ignore these attributes for now
-               .replace(/ 
(data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)="[^"]*"/g,
 '')
+       if ( !parsoidOnly ) {
+               // ignore troublesome attributes
+               out = out.
+                       // remove <span typeof="....">....</span>
+                       replace(/<span(?: [^>]+)* 
typeof="mw:(?:Placeholder|Nowiki|Object\/Template|Entity)"(?: 
[^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1').
+                       replace(/ 
(data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)="[^\"]*"/g,
 '');
+       } else {
+               out = out.
+                       // remove <span typeof="mw:Placeholder">....</span>
+                       replace(/<span(?: [^>]+)* typeof="mw:Placeholder"(?: 
[^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1').
+                       // unnecessary attributes, we don't need to check these
+                       // style is in there because we should only check 
classes.
+                       replace(/ 
(data-mw|data-parsoid|prefix|about|rev|datatype|inlist|vocab|content|style)="[^\"]*"/g,
 '');
+       }
+       return out.
                // replace mwt ids
-               .replace(/ id="mwt\d+"/, '')
+               replace(/ id="mwt\d+"/, '').
                //.replace(/<!--.*?-->\n?/gm, '')
-               .replace(/<\/?(?:meta|link)(?: 
[^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '')
-               .replace(/<span[^>]+about="[^"]*"[^>]*>/g, '')
-               .replace(/<span><\/span>/g, '')
-               .replace(/(href=")(?:\.?\.\/)+/g, '$1')
+               replace(/<\/?(?:meta|link)(?: 
[^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '').
+               replace(/<span[^>]+about="[^"]*"[^>]*>/g, '').
+               replace(/<span><\/span>/g, '').
+               replace(/(href=")(?:\.?\.\/)+/g, '$1').
                // replace unnecessary URL escaping
-               .replace(/ href="[^"]*"/g, decodeURIComponent)
+               replace(/ href="[^"]*"/g, decodeURIComponent).
                // strip thumbnail size prefixes
-               
.replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
 '$1$2')
-               .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, 
'$1');
-},
+               
replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
 '$1$2').
+               replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
+};
 
 /**
  * @method normalizeHTML
@@ -1040,7 +1054,7 @@
  * @param source {string}
  * @return {string}
  */
-normalizeHTML = function ( source ) {
+var normalizeHTML = function ( source ) {
        // TODO: Do not strip newlines in pre and nowiki blocks!
        source = normalizeNewlines( source );
        try {
@@ -1069,8 +1083,8 @@
                        .replace(/<span><\/span>/g, '')
                        
.replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
        } catch(e) {
-        console.log("normalizeHTML failed on" +
-                               source + " with the following error: " + e);
+               console.log("normalizeHTML failed on" +
+                           source + " with the following error: " + e);
                console.trace();
                return source;
        }
diff --git a/js/tests/parserTests-blacklist.js 
b/js/tests/parserTests-blacklist.js
index b1ecf89..9f7564a 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -473,7 +473,6 @@
 add("wt2html", "1. Leading space in SOL context should be escaped");
 add("wt2html", "1. a tags");
 add("wt2html", "Table with broken attribute value quoting on consecutive 
lines");
-add("wt2html", "Parsoid-only: Table with broken attribute value quoting on 
consecutive lines");
 add("wt2html", "Parsoid-only: Don't wrap broken template tags in <nowiki> on 
wt2wt (Bug 42353)");
 add("wt2html", "Parsoid-only: Don't wrap broken template tags in <nowiki> on 
wt2wt (Bug 42353)");
 add("wt2html", "Empty table rows go away");
@@ -629,6 +628,7 @@
 add("html2html", "Italics and bold: 4-quote opening sequence: (4,5) 
(parsoid)");
 add("html2html", "Italics and bold: multiple quote sequences: (3,4,2) 
(parsoid)");
 add("html2html", "Italics and bold: multiple quote sequences: (3,4,3) 
(parsoid)");
+add("html2html", "Italics and bold: other quote tests: (3,2,3,3) (parsoid)");
 add("html2html", "Italicized possessive");
 add("html2html", "Preformatted text");
 add("html2html", "<pre> with attributes (bug 3202)");
diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js
index b24c775..b186165 100755
--- a/js/tests/parserTests.js
+++ b/js/tests/parserTests.js
@@ -1078,6 +1078,7 @@
  */
 ParserTests.prototype.printResult = function ( title, time, comments, iopts, 
expected, actual, options, mode, item ) {
        var quick = booleanOption( options.quick );
+       var parsoidOnly = (iopts.parsoid !== undefined);
 
        if ( mode === 'selser' ) {
                title += ' ' + JSON.stringify( item.changes );
@@ -1090,7 +1091,7 @@
        if ( fail &&
             booleanOption( options.whitelist ) &&
             title in testWhiteList &&
-            Util.normalizeOut( testWhiteList[title] ) ===  actual.normal ) {
+            Util.normalizeOut( testWhiteList[title], parsoidOnly ) ===  
actual.normal ) {
                whitelist = true;
                fail = false;
        }
@@ -1117,13 +1118,14 @@
  */
 ParserTests.prototype.checkHTML = function ( item, out, options, mode ) {
        var normalizedOut, normalizedExpected;
+       var parsoidOnly = (item.options.parsoid !== undefined);
 
-       normalizedOut = Util.normalizeOut( out );
+       normalizedOut = Util.normalizeOut( out, parsoidOnly );
 
        if ( item.cachedNormalizedHTML === null ) {
-               if ('parsoid' in item.options) {
+               if ( parsoidOnly ) {
                        var normalDOM = Util.parseHTML( item.result 
).body.innerHTML;
-                       normalizedExpected = Util.normalizeOut( normalDOM );
+                       normalizedExpected = Util.normalizeOut( normalDOM, 
parsoidOnly );
                } else {
                        normalizedExpected = Util.normalizeHTML( item.result );
                }
@@ -1759,6 +1761,7 @@
        reportResultXML = function ( title, time, comments, iopts, expected, 
actual, options, mode, item ) {
                var timeTotal, testcaseEle;
                var quick = booleanOption( options.quick );
+               var parsoidOnly = (iopts.parsoid !== undefined);
 
                if ( mode === 'selser' ) {
                        title += ' ' + JSON.stringify( item.changes );
@@ -1771,7 +1774,7 @@
                if ( fail &&
                     booleanOption( options.whitelist ) &&
                     title in testWhiteList &&
-                    Util.normalizeOut( testWhiteList[title] ) ===  
actual.normal ) {
+                    Util.normalizeOut( testWhiteList[title], parsoidOnly ) === 
 actual.normal ) {
                        whitelist = true;
                        fail = false;
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/61780
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idc0de45d43fdecfd37c730baee10c8ee086e6fd6
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: MarkTraceur <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to