jenkins-bot has submitted this change and it was merged. Change subject: Let more attribs through in parsoid-only tests ......................................................................
Let more attribs through in parsoid-only tests We need e.g. property and class so we can make sure the new image tests actually test what we need 'em to. Only two parserTests changes: 1) "Table with broken attribute value quoting" now passes wt2html because title/style attributes aren't stripped. 2) "Italics and bold: other quote tests: (3,2,3,3)" fails html2html -- it previously failed wt2wt and html2wt (ie, we don't round-trip this cleanly) and managed to pass html2html only by a fluke (we were stripping out the nowiki <span> attribute it was adding). The underlying issue with the failing test case is that we round-trip '''this is about ''foo'''s family''' as '''this is about ''<nowiki>foo'</nowiki>''s family''' which is conservative but the <nowiki> is not strictly necessary. At any rate, we now accurately fail wt2wt, html2wt, and html2html on this test case. Co-authored-by: C. Scott Ananian <[email protected]> Co-authored-by: Mark Holmquist <[email protected]> Change-Id: Idc0de45d43fdecfd37c730baee10c8ee086e6fd6 --- M js/lib/mediawiki.Util.js M js/tests/parserTests-blacklist.js M js/tests/parserTests.js 3 files changed, 42 insertions(+), 25 deletions(-) Approvals: Cscott: Looks good to me, approved GWicke: Looks good to me, approved jenkins-bot: Verified diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js index 8934503..4a887f2 100644 --- a/js/lib/mediawiki.Util.js +++ b/js/lib/mediawiki.Util.js @@ -989,18 +989,21 @@ // and eat all remaining newlines .replace(/[\r\n]/g, ''); -}, +}; /** * @method normalizeOut * * Specialized normalization of the wiki parser output, mostly to ignore a few - * known-ok differences. + * known-ok differences. If parsoidOnly is true-ish, then we allow more + * markup through (like property and typeof attributes), for better + * checking of parsoid-only test cases. * * @param {string} out + * @param {bool} parsoidOnly * @returns {string} */ -normalizeOut = function ( out ) { +var normalizeOut = function ( out, parsoidOnly ) { // TODO: Do not strip newlines in pre and nowiki blocks! // NOTE that we use a slightly restricted regexp for "attribute" // which works for the output of DOM serialization. For example, @@ -1012,23 +1015,34 @@ throw new Error("normalizeOut input is not in standard serialized form"); } out = normalizeNewlines( out ); - return out - .replace(/<span(?: [^>]+)* typeof="mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))"(?: [^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1') - // Ignore these attributes for now - .replace(/ (data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)="[^"]*"/g, '') + if ( !parsoidOnly ) { + // ignore troublesome attributes + out = out. + // remove <span typeof="....">....</span> + replace(/<span(?: [^>]+)* typeof="mw:(?:Placeholder|Nowiki|Object\/Template|Entity)"(?: [^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1'). + replace(/ (data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)="[^\"]*"/g, ''); + } else { + out = out. + // remove <span typeof="mw:Placeholder">....</span> + replace(/<span(?: [^>]+)* typeof="mw:Placeholder"(?: [^\0-\cZ\s\"\'>\/=]+(?:="[^"]*")?)*>((?:[^<]+|(?!<\/span).)*)<\/span>/g, '$1'). + // unnecessary attributes, we don't need to check these + // style is in there because we should only check classes. + replace(/ (data-mw|data-parsoid|prefix|about|rev|datatype|inlist|vocab|content|style)="[^\"]*"/g, ''); + } + return out. // replace mwt ids - .replace(/ id="mwt\d+"/, '') + replace(/ id="mwt\d+"/, ''). //.replace(/<!--.*?-->\n?/gm, '') - .replace(/<\/?(?:meta|link)(?: [^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, '') - .replace(/<span[^>]+about="[^"]*"[^>]*>/g, '') - .replace(/<span><\/span>/g, '') - .replace(/(href=")(?:\.?\.\/)+/g, '$1') + replace(/<\/?(?:meta|link)(?: [^\0-\cZ\s"'>\/=]+(?:="[^"]*")?)*\/?>/g, ''). + replace(/<span[^>]+about="[^"]*"[^>]*>/g, ''). + replace(/<span><\/span>/g, ''). + replace(/(href=")(?:\.?\.\/)+/g, '$1'). // replace unnecessary URL escaping - .replace(/ href="[^"]*"/g, decodeURIComponent) + replace(/ href="[^"]*"/g, decodeURIComponent). // strip thumbnail size prefixes - .replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g, '$1$2') - .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1'); -}, + replace(/(src="[^"]*?)\/thumb(\/[0-9a-f]\/[0-9a-f]{2}\/[^\/]+)\/[0-9]+px-[^"\/]+(?=")/g, '$1$2'). + replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1'); +}; /** * @method normalizeHTML @@ -1040,7 +1054,7 @@ * @param source {string} * @return {string} */ -normalizeHTML = function ( source ) { +var normalizeHTML = function ( source ) { // TODO: Do not strip newlines in pre and nowiki blocks! source = normalizeNewlines( source ); try { @@ -1069,8 +1083,8 @@ .replace(/<span><\/span>/g, '') .replace(/(<(table|tbody|tr|th|td|\/th|\/td)[^<>]*>)\s+/g, '$1'); } catch(e) { - console.log("normalizeHTML failed on" + - source + " with the following error: " + e); + console.log("normalizeHTML failed on" + + source + " with the following error: " + e); console.trace(); return source; } diff --git a/js/tests/parserTests-blacklist.js b/js/tests/parserTests-blacklist.js index b1ecf89..9f7564a 100644 --- a/js/tests/parserTests-blacklist.js +++ b/js/tests/parserTests-blacklist.js @@ -473,7 +473,6 @@ add("wt2html", "1. Leading space in SOL context should be escaped"); add("wt2html", "1. a tags"); add("wt2html", "Table with broken attribute value quoting on consecutive lines"); -add("wt2html", "Parsoid-only: Table with broken attribute value quoting on consecutive lines"); add("wt2html", "Parsoid-only: Don't wrap broken template tags in <nowiki> on wt2wt (Bug 42353)"); add("wt2html", "Parsoid-only: Don't wrap broken template tags in <nowiki> on wt2wt (Bug 42353)"); add("wt2html", "Empty table rows go away"); @@ -629,6 +628,7 @@ add("html2html", "Italics and bold: 4-quote opening sequence: (4,5) (parsoid)"); add("html2html", "Italics and bold: multiple quote sequences: (3,4,2) (parsoid)"); add("html2html", "Italics and bold: multiple quote sequences: (3,4,3) (parsoid)"); +add("html2html", "Italics and bold: other quote tests: (3,2,3,3) (parsoid)"); add("html2html", "Italicized possessive"); add("html2html", "Preformatted text"); add("html2html", "<pre> with attributes (bug 3202)"); diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js index b24c775..b186165 100755 --- a/js/tests/parserTests.js +++ b/js/tests/parserTests.js @@ -1078,6 +1078,7 @@ */ ParserTests.prototype.printResult = function ( title, time, comments, iopts, expected, actual, options, mode, item ) { var quick = booleanOption( options.quick ); + var parsoidOnly = (iopts.parsoid !== undefined); if ( mode === 'selser' ) { title += ' ' + JSON.stringify( item.changes ); @@ -1090,7 +1091,7 @@ if ( fail && booleanOption( options.whitelist ) && title in testWhiteList && - Util.normalizeOut( testWhiteList[title] ) === actual.normal ) { + Util.normalizeOut( testWhiteList[title], parsoidOnly ) === actual.normal ) { whitelist = true; fail = false; } @@ -1117,13 +1118,14 @@ */ ParserTests.prototype.checkHTML = function ( item, out, options, mode ) { var normalizedOut, normalizedExpected; + var parsoidOnly = (item.options.parsoid !== undefined); - normalizedOut = Util.normalizeOut( out ); + normalizedOut = Util.normalizeOut( out, parsoidOnly ); if ( item.cachedNormalizedHTML === null ) { - if ('parsoid' in item.options) { + if ( parsoidOnly ) { var normalDOM = Util.parseHTML( item.result ).body.innerHTML; - normalizedExpected = Util.normalizeOut( normalDOM ); + normalizedExpected = Util.normalizeOut( normalDOM, parsoidOnly ); } else { normalizedExpected = Util.normalizeHTML( item.result ); } @@ -1759,6 +1761,7 @@ reportResultXML = function ( title, time, comments, iopts, expected, actual, options, mode, item ) { var timeTotal, testcaseEle; var quick = booleanOption( options.quick ); + var parsoidOnly = (iopts.parsoid !== undefined); if ( mode === 'selser' ) { title += ' ' + JSON.stringify( item.changes ); @@ -1771,7 +1774,7 @@ if ( fail && booleanOption( options.whitelist ) && title in testWhiteList && - Util.normalizeOut( testWhiteList[title] ) === actual.normal ) { + Util.normalizeOut( testWhiteList[title], parsoidOnly ) === actual.normal ) { whitelist = true; fail = false; } -- To view, visit https://gerrit.wikimedia.org/r/61780 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Idc0de45d43fdecfd37c730baee10c8ee086e6fd6 Gerrit-PatchSet: 8 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: Cscott <[email protected]> Gerrit-Reviewer: Cscott <[email protected]> Gerrit-Reviewer: GWicke <[email protected]> Gerrit-Reviewer: MarkTraceur <[email protected]> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
