[MediaWiki-commits] [Gerrit] Let more attribs through in parsoid-only tests - change (mediawiki...Parsoid)

MarkTraceur (Code Review) Mon, 15 Apr 2013 11:09:13 -0700

MarkTraceur has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/59169



Change subject: Let more attribs through in parsoid-only tests
......................................................................

Let more attribs through in parsoid-only tests

We need e.g. property and class so we can make sure the new image tests
actually test what we need 'em to.

Change-Id: I8d16b7a8522290cf0f9c6a920cb9e18e95642222
---
M js/lib/mediawiki.Util.js
M js/tests/parserTests.js
2 files changed, 92 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/69/59169/1

diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js
index de1e78b..ff87a50 100644
--- a/js/lib/mediawiki.Util.js
+++ b/js/lib/mediawiki.Util.js
@@ -929,6 +929,43 @@
 },
 
 /**
+ * @method normalizeParsoidOut
+ *
+ * Specialized normalization of the wiki parser output, mostly to ignore a few
+ * known-ok differences. Specifically for parsoid-only tests, where we want to
+ * be sure of a few more attributes like property and typeof.
+ *
+ * @param {string} out
+ * @returns {string}
+ */
+normalizeParsoidOut = function ( out ) {
+       // TODO: Do not strip newlines in pre and nowiki blocks!
+       // NOTE that we use a slightly restricted regexp for "attribute"
+       //  which works for the output of DOM serialization.  For example,
+       //  we know that attribute values will be surrounded with double quotes,
+       //  not unquoted or quoted with single quotes.  The serialization
+       //  algorithm is given by:
+       //  
http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#serializing-html-fragments
+       if 
(!/[^<]*(<\w+(\s+[^\0-\cZ\s"'>\/=]+(="[^"]*")?)*\/?>[^<]*)*/.test(out)) {
+               throw new Error("normalizeParsoidOut input is not in standard 
serialized form");
+       }
+       out = normalizeNewlines( out );
+       return out
+               // Ignore these attributes for now
+               .replace(/ 
(data-mw|data-parsoid|prefix|about|rev|datatype|inlist|vocab|content)="[^"]*"/g,
 '')
+               // replace mwt ids
+               .replace(/ id="mwt\d+"/, '')
+               //.replace(/<!--.*?-->\n?/gm, '')
+               .replace(/<\/?(?:meta|link)(?: 
[^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '')
+               .replace(/<span[^>]+about="[^"]*"[^>]*>/g, '')
+               .replace(/<span><\/span>/g, '')
+               .replace(/(href=")(?:\.?\.\/)+/g, '$1')
+               // strip thumbnail size prefixes
+               
.replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
 '$1$2')
+               .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, 
'$1');
+},
+
+/**
  * @method normalizeOut
  *
  * Specialized normalization of the wiki parser output, mostly to ignore a few
@@ -963,6 +1000,53 @@
                // strip thumbnail size prefixes
                
.replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
 '$1$2')
                .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, 
'$1');
+},
+
+/**
+ * @method normalizeParsoidHTML
+ *
+ * Normalize the expected parser output by parsing it using a HTML5 parser and
+ * re-serializing it to HTML. Ideally, the parser would normalize inter-tag
+ * whitespace for us. For now, we fake that by simply stripping all newlines.
+ *
+ * This function is for Parsoid-only tests. We leave in some extra attributes
+ * and elements in that case, for completeness.
+ *
+ * @param source {string}
+ * @return {string}
+ */
+normalizeParsoidHTML = function ( source ) {
+       // TODO: Do not strip newlines in pre and nowiki blocks!
+       source = normalizeNewlines( source );
+       try {
+               var doc = this.parseHTML( source );
+               return doc.body
+                       .innerHTML
+                       // a few things we ignore for now..
+                       //.replace(/\/wiki\/Main_Page/g, 'Main Page')
+                       // do not expect a toc for now
+                       .replace(/<table[^>]+?id="toc"[^>]*>.+?<\/table>/mg, '')
+                       // do not expect section editing for now
+                       .replace(/(<span class="editsection">\[.*?<\/span> 
*)?<span[^>]+class="mw-headline"[^>]*>(.*?)<\/span>/g, '$2')
+                       // remove empty span tags
+                       .replace(/<span><\/span>/g, '')
+                       // Don't need about attributes
+                       .replace(/ about="[^"]*"/g, '')
+                       // strip red link markup, we do not check if a page 
exists yet
+                       
.replace(/\/index.php\?title=([^']+?)&amp;action=edit&amp;redlink=1/g, 
'/wiki/$1')
+                       // the expected html has some extra space in tags, 
strip it
+                       .replace(/<a +href/g, '<a href')
+                       .replace(/href="\/wiki\//g, 'href="')
+                       .replace(/" +>/g, '">')
+                       // strip empty spans
+                       .replace(/<span><\/span>/g, '')
+                       
.replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
+       } catch(e) {
+        console.log("normalizeParsoidHTML failed on" +
+                               source + " with the following error: " + e);
+               console.trace();
+               return source;
+       }
 },
 
 /**
@@ -1120,6 +1204,8 @@
 Util.serializeNode = serializeNode;
 Util.normalizeHTML = normalizeHTML;
 Util.normalizeOut = normalizeOut;
+Util.normalizeParsoidHTML = normalizeParsoidHTML;
+Util.normalizeParsoidOut = normalizeParsoidOut;
 Util.formatHTML = formatHTML;
 
 }( Util ) );
diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js
index 5b51546..aa87b55 100755
--- a/js/tests/parserTests.js
+++ b/js/tests/parserTests.js
@@ -1069,12 +1069,13 @@
  * @param {Object} options
  */
 ParserTests.prototype.checkHTML = function ( item, out, options, mode ) {
-       var normalizedOut, normalizedExpected;
-
-       normalizedOut = Util.normalizeOut( out );
+       var normalizedExpected,
+               normOutFunc = ( ( item.options.parsoid !== undefined ) ? 
Util.normalizeParsoidOut : Util.normalizeOut ).bind( Util ),
+               normHTMLFunc = ( ( item.options.parsoid !== undefined ) ? 
Util.normalizeParsoidHTML : Util.normalizeHTML ).bind( Util ),
+               normalizedOut = normOutFunc( out );
 
        if ( item.cachedNormalizedHTML === null ) {
-               normalizedExpected = Util.normalizeHTML( item.result );
+               normalizedExpected = normHTMLFunc( item.result );
                item.cachedNormalizedHTML = normalizedExpected;
        } else {
                normalizedExpected = item.cachedNormalizedHTML;
@@ -1637,7 +1638,7 @@
 
                if ( expected.normal !== actual.normal ) {
                        if ( options.whitelist && title in testWhiteList &&
-                                Util.normalizeOut( testWhiteList[title] ) ===  
actual.normal ) {
+                               Util.normalizeOut( testWhiteList[title] ) ===  
actual.normal ) {
                                reportSuccessXML( title, mode, true, quiet );
                        } else {
                                reportFailureXML( title, comments, iopts, 
options, actual, expected, quick, mode );

-- 
To view, visit https://gerrit.wikimedia.org/r/59169
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8d16b7a8522290cf0f9c6a920cb9e18e95642222
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: MarkTraceur <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Let more attribs through in parsoid-only tests - change (mediawiki...Parsoid)

Reply via email to