[MediaWiki-commits] [Gerrit] Better regexp for typeof attribute; use normalizeOut for p... - change (mediawiki...Parsoid)
jenkins-bot has submitted this change and it was merged. Change subject: Better regexp for typeof attribute; use normalizeOut for parsoid-only tests. .. Better regexp for typeof attribute; use normalizeOut for parsoid-only tests. 1. Use consistent normalization for parsoid-only tests (normalizeOut for both parsoid output and the output specified in the parserTests file, although we normalize the DOM read in from parserTests before normalizing). 2. Match typeof attributes during normalization, even if they are not the first attribute in their tag. Five different parserTests newly pass. There are two new failures, both due to the fact that we accurately strip span typeof=Object/Template now. In the Ugly nesting test case, the typeof used to be hidden by another attribute in front of it; aka the output was always wrong. In the confirming safety of fix test case, an initial span in the template content is used instead of a newly-added span wrapper, and so normalize strips an attribute when it shouldn't. Both of these will be fixed in upstream parserTests. Change-Id: I823e5515698764b5b7fbed4a1bd902f8233e71c7 --- M js/lib/mediawiki.Util.js M js/tests/parserTests-blacklist.js M js/tests/parserTests.js 3 files changed, 9 insertions(+), 7 deletions(-) Approvals: Subramanya Sastry: Looks good to me, but someone else must approve GWicke: Looks good to me, approved jenkins-bot: Verified diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js index 82ab83b..8934503 100644 --- a/js/lib/mediawiki.Util.js +++ b/js/lib/mediawiki.Util.js @@ -1013,7 +1013,7 @@ } out = normalizeNewlines( out ); return out - .replace(/span typeof=mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))(?: [^\0-\cZ\s\\'\/=]+(?:=[^]*)?)*((?:[^]+|(?!\/span).)*)\/span/g, '$1') + .replace(/span(?: [^]+)* typeof=mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))(?: [^\0-\cZ\s\\'\/=]+(?:=[^]*)?)*((?:[^]+|(?!\/span).)*)\/span/g, '$1') // Ignore these attributes for now .replace(/ (data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)=[^]*/g, '') // replace mwt ids diff --git a/js/tests/parserTests-blacklist.js b/js/tests/parserTests-blacklist.js index 389b833..9f8a41a 100644 --- a/js/tests/parserTests-blacklist.js +++ b/js/tests/parserTests-blacklist.js @@ -113,13 +113,8 @@ add(wt2html, Bug 6563: Section extraction for section shown by includeonly); add(wt2html, Bug 6563: Section extraction for section suppressed by includeonly); add(wt2html, 2. includeonly in html attr value); -add(wt2html, Templates: 2. Inside a block tag); add(wt2html, Templates: P-wrapping: 1a. Templates on consecutive lines); add(wt2html, Templates: P-wrapping: 1b. Templates on consecutive lines); -add(wt2html, Templates: P-wrapping: 1c. Templates on consecutive lines); -add(wt2html, Templates: Inline Text: 1. Multiple tmeplate uses); -add(wt2html, Templates: Inline Text: 2. Back-to-back template uses); -add(wt2html, Templates: Links: 5. Generation of link text); add(wt2html, Templates: HTML Tag: 2. Generation of HTML attr. value); add(wt2html, Templates: HTML Tag: 3. Generation of HTML attr key and value); add(wt2html, Templates: Wiki Tables: 1a. Fostering of entire template content); @@ -129,6 +124,7 @@ add(wt2html, Templates: Wiki Tables: 4. Templated tags, no content); add(wt2html, Templates: Lists: Multi-line list-items via templates); add(wt2html, Templates: Ugly nesting: 1. Quotes opened/closed across templates (echo)); +add(wt2html, Templates: Ugly nesting: 2. Quotes opened/closed across templates (echo_with_span)\n(PHP parser generates misnested html)); add(wt2html, Templates: Ugly nesting: 4. Divs opened/closed across templates); add(wt2html, Templates: Ugly templates: 2. Navbox template parses badly leading to table misnesting\n(Parsoid-centric)); add(wt2html, Templates: Ugly templates: 4. newline-only template parameter inconsistency); @@ -142,6 +138,7 @@ add(wt2html, pre-save transform: unclosed tag); add(wt2html, pre-save transform: mixed tag case); add(wt2html, pre-save transform: unclosed comment in nowiki); +add(wt2html, (confirming safety of fix for subst bug 1936)); add(wt2html, pre-save transform: noinclude in subst (bug 3298)); add(wt2html, pre-save transform: onlyinclude in subst (bug 3298)); add(wt2html, bug 22297: safesubst: works during PST); diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js index f879a99..4c671e1 100755 --- a/js/tests/parserTests.js +++ b/js/tests/parserTests.js @@ -1121,7 +1121,12 @@ normalizedOut = Util.normalizeOut( out ); if ( item.cachedNormalizedHTML === null ) { - normalizedExpected = Util.normalizeHTML( item.result ); + if ('parsoid' in item.options) { + var
[MediaWiki-commits] [Gerrit] Better regexp for typeof attribute; use normalizeOut for p... - change (mediawiki...Parsoid)
Cscott has uploaded a new change for review. https://gerrit.wikimedia.org/r/62773 Change subject: Better regexp for typeof attribute; use normalizeOut for parsoid-only tests. .. Better regexp for typeof attribute; use normalizeOut for parsoid-only tests. 1. Use consistent normalization for parsoid-only tests (normalizeOut for both parsoid output and the output specified in the parserTests file, although we normalize the DOM read in from parserTests before normalizing). 2. Match typeof attributes during normalization, even if they are not the first attribute in their tag. Five different parserTests newly pass. There are two new failures, both due to the fact that we accurately strip span typeof=Object/Template now. In the Ugly nesting test case, the typeof used to be hidden by another attribute in front of it; aka the output was always wrong. In the confirming safety of fix test case, an initial span in the template content is used instead of a newly-added span wrapper, and so normalize strips an attribute when it shouldn't. Both of these will be fixed in upstream parserTests. Change-Id: I823e5515698764b5b7fbed4a1bd902f8233e71c7 --- M js/lib/mediawiki.Util.js M js/tests/parserTests-blacklist.js M js/tests/parserTests.js 3 files changed, 9 insertions(+), 7 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid refs/changes/73/62773/1 diff --git a/js/lib/mediawiki.Util.js b/js/lib/mediawiki.Util.js index 82ab83b..8934503 100644 --- a/js/lib/mediawiki.Util.js +++ b/js/lib/mediawiki.Util.js @@ -1013,7 +1013,7 @@ } out = normalizeNewlines( out ); return out - .replace(/span typeof=mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))(?: [^\0-\cZ\s\\'\/=]+(?:=[^]*)?)*((?:[^]+|(?!\/span).)*)\/span/g, '$1') + .replace(/span(?: [^]+)* typeof=mw:(?:(?:Placeholder|Nowiki|Object\/Template|Entity))(?: [^\0-\cZ\s\\'\/=]+(?:=[^]*)?)*((?:[^]+|(?!\/span).)*)\/span/g, '$1') // Ignore these attributes for now .replace(/ (data-mw|data-parsoid|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content|title|class)=[^]*/g, '') // replace mwt ids diff --git a/js/tests/parserTests-blacklist.js b/js/tests/parserTests-blacklist.js index 389b833..9f8a41a 100644 --- a/js/tests/parserTests-blacklist.js +++ b/js/tests/parserTests-blacklist.js @@ -113,13 +113,8 @@ add(wt2html, Bug 6563: Section extraction for section shown by includeonly); add(wt2html, Bug 6563: Section extraction for section suppressed by includeonly); add(wt2html, 2. includeonly in html attr value); -add(wt2html, Templates: 2. Inside a block tag); add(wt2html, Templates: P-wrapping: 1a. Templates on consecutive lines); add(wt2html, Templates: P-wrapping: 1b. Templates on consecutive lines); -add(wt2html, Templates: P-wrapping: 1c. Templates on consecutive lines); -add(wt2html, Templates: Inline Text: 1. Multiple tmeplate uses); -add(wt2html, Templates: Inline Text: 2. Back-to-back template uses); -add(wt2html, Templates: Links: 5. Generation of link text); add(wt2html, Templates: HTML Tag: 2. Generation of HTML attr. value); add(wt2html, Templates: HTML Tag: 3. Generation of HTML attr key and value); add(wt2html, Templates: Wiki Tables: 1a. Fostering of entire template content); @@ -129,6 +124,7 @@ add(wt2html, Templates: Wiki Tables: 4. Templated tags, no content); add(wt2html, Templates: Lists: Multi-line list-items via templates); add(wt2html, Templates: Ugly nesting: 1. Quotes opened/closed across templates (echo)); +add(wt2html, Templates: Ugly nesting: 2. Quotes opened/closed across templates (echo_with_span)\n(PHP parser generates misnested html)); add(wt2html, Templates: Ugly nesting: 4. Divs opened/closed across templates); add(wt2html, Templates: Ugly templates: 2. Navbox template parses badly leading to table misnesting\n(Parsoid-centric)); add(wt2html, Templates: Ugly templates: 4. newline-only template parameter inconsistency); @@ -142,6 +138,7 @@ add(wt2html, pre-save transform: unclosed tag); add(wt2html, pre-save transform: mixed tag case); add(wt2html, pre-save transform: unclosed comment in nowiki); +add(wt2html, (confirming safety of fix for subst bug 1936)); add(wt2html, pre-save transform: noinclude in subst (bug 3298)); add(wt2html, pre-save transform: onlyinclude in subst (bug 3298)); add(wt2html, bug 22297: safesubst: works during PST); diff --git a/js/tests/parserTests.js b/js/tests/parserTests.js index f879a99..4c671e1 100755 --- a/js/tests/parserTests.js +++ b/js/tests/parserTests.js @@ -1121,7 +1121,12 @@ normalizedOut = Util.normalizeOut( out ); if ( item.cachedNormalizedHTML === null ) { - normalizedExpected = Util.normalizeHTML( item.result ); + if ('parsoid' in item.options) { + var normalDOM =