MarkTraceur has uploaded a new change for review.
https://gerrit.wikimedia.org/r/59169
Change subject: Let more attribs through in parsoid-only tests
......................................................................
Let more attribs through in parsoid-only tests
We need e.g. property and class so we can make sure the new image tests
actually test what we need 'em to.
Change-Id: I8d16b7a8522290cf0f9c6a920cb9e18e95642222
---
M js/lib/mediawiki.Util.js
M js/tests/parserTests.js
2 files changed, 92 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/69/59169/1
diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js
index de1e78b..ff87a50 100644
--- a/js/lib/mediawiki.Util.js
+++ b/js/lib/mediawiki.Util.js
@@ -929,6 +929,43 @@
},
/**
+ * @method normalizeParsoidOut
+ *
+ * Specialized normalization of the wiki parser output, mostly to ignore a few
+ * known-ok differences. Specifically for parsoid-only tests, where we want to
+ * be sure of a few more attributes like property and typeof.
+ *
+ * @param {string} out
+ * @returns {string}
+ */
+normalizeParsoidOut = function ( out ) {
+ // TODO: Do not strip newlines in pre and nowiki blocks!
+ // NOTE that we use a slightly restricted regexp for "attribute"
+ // which works for the output of DOM serialization. For example,
+ // we know that attribute values will be surrounded with double quotes,
+ // not unquoted or quoted with single quotes. The serialization
+ // algorithm is given by:
+ //
http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#serializing-html-fragments
+ if
(!/[^<]*(<\w+(\s+[^\0-\cZ\s"'>\/=]+(="[^"]*")?)*\/?>[^<]*)*/.test(out)) {
+ throw new Error("normalizeParsoidOut input is not in standard
serialized form");
+ }
+ out = normalizeNewlines( out );
+ return out
+ // Ignore these attributes for now
+ .replace(/
(data-mw|data-parsoid|prefix|about|rev|datatype|inlist|vocab|content)="[^"]*"/g,
'')
+ // replace mwt ids
+ .replace(/ id="mwt\d+"/, '')
+ //.replace(/<!--.*?-->\n?/gm, '')
+ .replace(/<\/?(?:meta|link)(?:
[^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '')
+ .replace(/<span[^>]+about="[^"]*"[^>]*>/g, '')
+ .replace(/<span><\/span>/g, '')
+ .replace(/(href=")(?:\.?\.\/)+/g, '$1')
+ // strip thumbnail size prefixes
+
.replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
'$1$2')
+ .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g,
'$1');
+},
+
+/**
* @method normalizeOut
*
* Specialized normalization of the wiki parser output, mostly to ignore a few
@@ -963,6 +1000,53 @@
// strip thumbnail size prefixes
.replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g,
'$1$2')
.replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g,
'$1');
+},
+
+/**
+ * @method normalizeParsoidHTML
+ *
+ * Normalize the expected parser output by parsing it using a HTML5 parser and
+ * re-serializing it to HTML. Ideally, the parser would normalize inter-tag
+ * whitespace for us. For now, we fake that by simply stripping all newlines.
+ *
+ * This function is for Parsoid-only tests. We leave in some extra attributes
+ * and elements in that case, for completeness.
+ *
+ * @param source {string}
+ * @return {string}
+ */
+normalizeParsoidHTML = function ( source ) {
+ // TODO: Do not strip newlines in pre and nowiki blocks!
+ source = normalizeNewlines( source );
+ try {
+ var doc = this.parseHTML( source );
+ return doc.body
+ .innerHTML
+ // a few things we ignore for now..
+ //.replace(/\/wiki\/Main_Page/g, 'Main Page')
+ // do not expect a toc for now
+ .replace(/<table[^>]+?id="toc"[^>]*>.+?<\/table>/mg, '')
+ // do not expect section editing for now
+ .replace(/(<span class="editsection">\[.*?<\/span>
*)?<span[^>]+class="mw-headline"[^>]*>(.*?)<\/span>/g, '$2')
+ // remove empty span tags
+ .replace(/<span><\/span>/g, '')
+ // Don't need about attributes
+ .replace(/ about="[^"]*"/g, '')
+ // strip red link markup, we do not check if a page
exists yet
+
.replace(/\/index.php\?title=([^']+?)&action=edit&redlink=1/g,
'/wiki/$1')
+ // the expected html has some extra space in tags,
strip it
+ .replace(/<a +href/g, '<a href')
+ .replace(/href="\/wiki\//g, 'href="')
+ .replace(/" +>/g, '">')
+ // strip empty spans
+ .replace(/<span><\/span>/g, '')
+
.replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1');
+ } catch(e) {
+ console.log("normalizeParsoidHTML failed on" +
+ source + " with the following error: " + e);
+ console.trace();
+ return source;
+ }
},
/**
@@ -1120,6 +1204,8 @@
Util.serializeNode = serializeNode;
Util.normalizeHTML = normalizeHTML;
Util.normalizeOut = normalizeOut;
+Util.normalizeParsoidHTML = normalizeParsoidHTML;
+Util.normalizeParsoidOut = normalizeParsoidOut;
Util.formatHTML = formatHTML;
}( Util ) );
diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js
index 5b51546..aa87b55 100755
--- a/js/tests/parserTests.js
+++ b/js/tests/parserTests.js
@@ -1069,12 +1069,13 @@
* @param {Object} options
*/
ParserTests.prototype.checkHTML = function ( item, out, options, mode ) {
- var normalizedOut, normalizedExpected;
-
- normalizedOut = Util.normalizeOut( out );
+ var normalizedExpected,
+ normOutFunc = ( ( item.options.parsoid !== undefined ) ?
Util.normalizeParsoidOut : Util.normalizeOut ).bind( Util ),
+ normHTMLFunc = ( ( item.options.parsoid !== undefined ) ?
Util.normalizeParsoidHTML : Util.normalizeHTML ).bind( Util ),
+ normalizedOut = normOutFunc( out );
if ( item.cachedNormalizedHTML === null ) {
- normalizedExpected = Util.normalizeHTML( item.result );
+ normalizedExpected = normHTMLFunc( item.result );
item.cachedNormalizedHTML = normalizedExpected;
} else {
normalizedExpected = item.cachedNormalizedHTML;
@@ -1637,7 +1638,7 @@
if ( expected.normal !== actual.normal ) {
if ( options.whitelist && title in testWhiteList &&
- Util.normalizeOut( testWhiteList[title] ) ===
actual.normal ) {
+ Util.normalizeOut( testWhiteList[title] ) ===
actual.normal ) {
reportSuccessXML( title, mode, true, quiet );
} else {
reportFailureXML( title, comments, iopts,
options, actual, expected, quick, mode );
--
To view, visit https://gerrit.wikimedia.org/r/59169
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I8d16b7a8522290cf0f9c6a920cb9e18e95642222
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: MarkTraceur <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits