Cscott has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/61780


Change subject: Let more attribs through in parsoid-only tests
......................................................................

Let more attribs through in parsoid-only tests

We need e.g. property and class so we can make sure the new image tests
actually test what we need 'em to.

Change-Id: Idc0de45d43fdecfd37c730baee10c8ee086e6fd6
Co-authored-by: C. Scott Ananian <[email protected]>
Co-authored-by: Mark Holmquist <[email protected]>
---
M js/lib/mediawiki.Util.js
M js/tests/parserTests.js
2 files changed, 59 insertions(+), 40 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/80/61780/1

diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js
index 82ab83b..fa365fb 100644
--- a/js/lib/mediawiki.Util.js
+++ b/js/lib/mediawiki.Util.js
@@ -989,18 +989,21 @@
 
                                // and eat all remaining newlines
                                .replace(/[\r\n]/g, '');
-},
+};
 
 /**
  * @method normalizeOut
  *
  * Specialized normalization of the wiki parser output, mostly to ignore a few
- * known-ok differences.
+ * known-ok differences.  If parsoidOnly is true-ish, then we allow more
+ * markup through (like property and typeof attributes), for better
+ * checking of parsoid-only test cases.
  *
  * @param {string} out
+ * @param {bool} parsoidOnly
  * @returns {string}
  */
-normalizeOut = function ( out ) {
+var normalizeOut = function ( out, parsoidOnly ) {
        // TODO: Do not strip newlines in pre and nowiki blocks!
        // NOTE that we use a slightly restricted regexp for "attribute"
        //  which works for the output of DOM serialization.  For example,
@@ -1012,23 +1015,29 @@
                throw new Error("normalizeOut input is not in standard 
serialized form");
        }
        out = normalizeNewlines( out );
-       return out
-               .replace(/<span 
typeof="mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))"(?: 
[^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1')
-               // Ignore these attributes for now
-               .replace(/ 
(data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)="[^"]*"/g,
 '')
+       if ( !parsoidOnly ) {
+               // remove <span typeof="....">....</span>
+               out = out.
+                       replace(/<span 
typeof="mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))"(?: 
[^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1');
+               // ignore troublesome attributes
+               out = out.replace(/ 
(data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)="[^\"]*"/g,
 '');
+       } else {
+               out = out.replace(/ 
(data-mw|data-parsoid|prefix|about|rev|datatype|inlist|vocab|content)="[^\"]*"/g,
 '');
+       }
+       return out.
                // replace mwt ids
-               .replace(/ id="mwt\d+"/, '')
+               replace(/ id="mwt\d+"/, '').
                //.replace(/<!--.*?-->\n?/gm, '')
-               .replace(/<\/?(?:meta|link)(?: 
[^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '')
-               .replace(/<span[^>]+about="[^"]*"[^>]*>/g, '')
-               .replace(/<span><\/span>/g, '')
-               .replace(/(href=")(?:\.?\.\/)+/g, '$1')
+               replace(/<\/?(?:meta|link)(?: 
[^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '').
+               replace(/<span[^>]+about="[^"]*"[^>]*>/g, '').
+               replace(/<span><\/span>/g, '').
+               replace(/(href=")(?:\.?\.\/)+/g, '$1').
                // replace unnecessary URL escaping
-               .replace(/ href="[^"]*"/g, decodeURIComponent)
+               replace(/ href="[^"]*"/g, decodeURIComponent).
                // strip thumbnail size prefixes
-               
.replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
 '$1$2')
-               .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, 
'$1');
-},
+               
replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
 '$1$2').
+               replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
+};
 
 /**
  * @method normalizeHTML
@@ -1037,44 +1046,52 @@
  * re-serializing it to HTML. Ideally, the parser would normalize inter-tag
  * whitespace for us. For now, we fake that by simply stripping all newlines.
  *
- * @param source {string}
+ * If parsoidOnly is true-ish, then we allow more markup through to allow
+ * more accurate parsoid-only tests.
+ *
+ * @param {string} source
+ * @param {bool} parsoidOnly
  * @return {string}
  */
-normalizeHTML = function ( source ) {
+var normalizeHTML = function ( source, parsoidOnly ) {
        // TODO: Do not strip newlines in pre and nowiki blocks!
        source = normalizeNewlines( source );
        try {
                var doc = this.parseHTML( source );
-               return doc.body
-                       .innerHTML
+               var out = doc.body.innerHTML.
                        // a few things we ignore for now..
-                       //.replace(/\/wiki\/Main_Page/g, 'Main Page')
+                       //replace(/\/wiki\/Main_Page/g, 'Main Page').
                        // do not expect a toc for now
-                       .replace(/<table[^>]+?id="toc"[^>]*>.+?<\/table>/mg, '')
+                       replace(/<table[^>]+?id="toc"[^>]*>.+?<\/table>/mg, '').
                        // do not expect section editing for now
-                       
.replace(/<span[^>]+class="mw-headline"[^>]*>(.*?)<\/span> *(<span 
class="mw-editsection">\[.*?<\/span>)?/g, '$1')
+                       
replace(/<span[^>]+class="mw-headline"[^>]*>(.*?)<\/span> *(<span 
class="mw-editsection">\[.*?<\/span>)?/g, '$1').
                        // remove empty span tags
-                       .replace(/<span><\/span>/g, '')
-                       // general class and titles, typically on links
-                       .replace(/ (title|class|rel|about|typeof)="[^"]*"/g, '')
-                       // strip red link markup, we do not check if a page 
exists yet
-                       
.replace(/\/index.php\?title=([^']+?)&amp;action=edit&amp;redlink=1/g, 
'/wiki/$1')
+                       replace(/<span><\/span>/g, '');
+               if ( !parsoidOnly ) {
+                       out = out.
+                               // general class and titles, typically on links
+                               replace(/ 
(title|class|rel|about|typeof)="[^"]*"/g, '').
+                               // strip red link markup, we do not check if a 
page exists yet
+                               
replace(/\/index.php\?title=([^']+?)&amp;action=edit&amp;redlink=1/g, 
'/wiki/$1');
+               }
+               out = out.
                        // the expected html has some extra space in tags, 
strip it
-                       .replace(/<a +href/g, '<a href')
-                       .replace(/href="\/wiki\//g, 'href="')
-                       .replace(/" +>/g, '">')
+                       replace(/<a +href/g, '<a href').
+                       replace(/href="\/wiki\//g, 'href="').
+                       replace(/" +>/g, '">').
                        // replace unnecessary URL escaping
-                       .replace(/ href="[^"]*"/g, decodeURIComponent)
+                       replace(/ href="[^"]*"/g, decodeURIComponent).
                        // strip empty spans
-                       .replace(/<span><\/span>/g, '')
-                       
.replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
+                       replace(/<span><\/span>/g, '').
+                       
replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
+               return out;
        } catch(e) {
-        console.log("normalizeHTML failed on" +
+               console.log("normalizeHTML failed on" +
                                source + " with the following error: " + e);
                console.trace();
                return source;
        }
-},
+};
 
 /**
  * @method formatHTML
@@ -1084,7 +1101,7 @@
  * @param {string} source
  * @returns {string}
  */
-formatHTML = function ( source ) {
+var formatHTML = function ( source ) {
        return source.replace(
                
/(?!^)<((div|dd|dt|li|p|table|tr|td|tbody|dl|ol|ul|h1|h2|h3|h4|h5|h6)[^>]*)>/g, 
'\n<$1>');
 },
diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js
index ef2381d..506ff53 100755
--- a/js/tests/parserTests.js
+++ b/js/tests/parserTests.js
@@ -1117,11 +1117,12 @@
  */
 ParserTests.prototype.checkHTML = function ( item, out, options, mode ) {
        var normalizedOut, normalizedExpected;
+       var parsoidOnly = (item.options.parsoid !== undefined);
 
-       normalizedOut = Util.normalizeOut( out );
+       normalizedOut = Util.normalizeOut( out, parsoidOnly );
 
        if ( item.cachedNormalizedHTML === null ) {
-               normalizedExpected = Util.normalizeHTML( item.result );
+               normalizedExpected = Util.normalizeHTML( item.result, 
parsoidOnly );
                item.cachedNormalizedHTML = normalizedExpected;
        } else {
                normalizedExpected = item.cachedNormalizedHTML;
@@ -1730,6 +1731,7 @@
        reportResultXML = function ( title, time, comments, iopts, expected, 
actual, options, mode, item ) {
                var timeTotal, testcaseEle;
                var quick = booleanOption( options.quick );
+               var parsoidOnly = (iopts.parsoid !== undefined);
 
                if ( mode === 'selser' ) {
                        title += ' ' + JSON.stringify( item.changes );
@@ -1742,7 +1744,7 @@
                if ( fail &&
                     booleanOption( options.whitelist ) &&
                     title in testWhiteList &&
-                    Util.normalizeOut( testWhiteList[title] ) ===  
actual.normal ) {
+                    Util.normalizeOut( testWhiteList[title], parsoidOnly ) === 
 actual.normal ) {
                        whitelist = true;
                        fail = false;
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/61780
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Idc0de45d43fdecfd37c730baee10c8ee086e6fd6
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
Gerrit-Reviewer: MarkTraceur <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to