Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/78092
Change subject: (WIP) Code cleanup: Moved constant maps to wikitext.constants.js
......................................................................
(WIP) Code cleanup: Moved constant maps to wikitext.constants.js
* Fixed a lot of "nodeName in {...}" patterns by moving the {...}
to wikitext.constants.js where they are easily nameable and fixed
use sites.
* More to be done.
* Mysteriously, we now have 2 wt2wt and 8 additional selser tests
passing. To be investigated.
Change-Id: I574ae735c7c68fcca53f5eed981cace4f7c5e9b1
---
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.DOMUtils.js
M js/lib/mediawiki.WikitextSerializer.js
M js/lib/mediawiki.wikitext.constants.js
M js/tests/parserTests-blacklist.js
5 files changed, 50 insertions(+), 50 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/92/78092/1
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index c9320c7..3d786a8 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -6,7 +6,9 @@
util = require('util'),
Util = require('./mediawiki.Util.js').Util,
DU = require('./mediawiki.DOMUtils.js').DOMUtils,
- Node = require('./mediawiki.wikitext.constants.js').Node,
+ wtc = require('./mediawiki.wikitext.constants.js'),
+ Consts = wtc.WikitextConstants,
+ Node = wtc.Node,
domino = require('./domino');
// map from mediawiki metadata names to RDFa property names
@@ -2150,7 +2152,9 @@
if (DU.isElt(next) && next.data.parsoid.src &&
/\bmw:Placeholder\/StrippedTag\b/.test(next.getAttribute("typeof")))
{
- if (next.data.parsoid.name in {B:1,
I:1} && child.nodeName in {B:1, I:1}) {
+ if (next.data.parsoid.name in
Consts.QuoteTags &&
+ child.nodeName in
Consts.QuoteTags)
+ {
correction =
next.data.parsoid.src.length;
ce += correction;
dsrCorrection = correction;
@@ -2190,7 +2194,7 @@
//
// Currently, this fix is only for
// B and I tags where the fix is clear-cut and obvious.
- if (editMode && ce !== null && dp.autoInsertedEnd &&
child.nodeName in {B:1, I:1}) {
+ if (editMode && ce !== null && dp.autoInsertedEnd &&
DU.isQuoteElt(child)) {
correction = (3 + child.nodeName.length);
if (correction === dsrCorrection) {
ce -= correction;
diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js
index eedcf33..601bc86 100644
--- a/js/lib/mediawiki.DOMUtils.js
+++ b/js/lib/mediawiki.DOMUtils.js
@@ -6,7 +6,9 @@
require('./core-upgrade.js');
var Util = require('./mediawiki.Util.js').Util,
- Node = require('./mediawiki.wikitext.constants.js').Node,
+ wtc = require('./mediawiki.wikitext.constants.js'),
+ Consts = wtc.WikitextConstants,
+ Node = wtc.Node,
pd = require('./mediawiki.parser.defines.js');
// define some constructor shortcuts
@@ -25,14 +27,12 @@
return node && Util.isBlockTag(node.nodeName.toLowerCase());
},
- // See
http://www.w3.org/html/wg/drafts/html/master/syntax.html#formatting
- formattingTagMap: Util.arrayToHash([
- 'A', 'B', 'BIG', 'CODE', 'EM', 'FONT', 'I', 'NOBR',
- 'S', 'SMALL', 'STRIKE', 'STRONG', 'TT', 'U'
- ]),
-
isFormattingElt: function(node) {
- return this.isElt(node) && this.formattingTagMap[node.nodeName];
+ return node && node.nodeName in Consts.HTML.FormattingTags;
+ },
+
+ isQuoteElt: function(node) {
+ return node && node.nodeName in Consts.QuoteTags;
},
/**
@@ -341,15 +341,15 @@
},
isList: function(n) {
- return n && n.nodeName in {OL:1, UL:1, DL:1};
+ return n && n.nodeName in Consts.HTML.ListTags;
},
- isListElt: function(n) {
- return n && n.nodeName in {LI:1, DD:1, DT:1};
+ isListItem: function(n) {
+ return n && n.nodeName in Consts.HTML.ListItemTags;
},
- isListOrListElt: function(n) {
- return n && n.nodeName in {OL:1, UL:1, DL:1, LI:1, DT:1, DD:1};
+ isListOrListItem: function(n) {
+ return this.isList(n) || this.isListItem(n);
},
getPrecedingElementSibling: function(node) {
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 54e6306..77c9354 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -27,7 +27,7 @@
require('./core-upgrade.js');
var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer,
wtConsts = require('./mediawiki.wikitext.constants.js'),
- WikitextConstants = wtConsts.WikitextConstants,
+ Consts = wtConsts.WikitextConstants,
Util = require('./mediawiki.Util.js').Util,
DU = require('./mediawiki.DOMUtils.js').DOMUtils,
pd = require('./mediawiki.parser.defines.js'),
@@ -84,7 +84,7 @@
// SSS FIXME: Can be set up as part of an init routine
function getTagWhiteList() {
if (!tagWhiteListHash) {
- tagWhiteListHash =
Util.arrayToHash(WikitextConstants.Sanitizer.TagWhiteList);
+ tagWhiteListHash =
Util.arrayToHash(Consts.Sanitizer.TagWhiteList);
}
return tagWhiteListHash;
}
@@ -100,10 +100,6 @@
function isListItem(token) {
return token && token.constructor === pd.TagTk &&
['li', 'dt', 'dd'].indexOf(token.name) !== -1;
-}
-
-function isListElementName(name) {
- return name in {li:1, dt:1, dd:1};
}
function precedingSeparatorTxt(n) {
@@ -2025,9 +2021,7 @@
// Now move down the lastChilds to see if there are any italics / bolds
while (node && DU.isElt(node)) {
- if (node.nodeName in {I:1, B:1} &&
- node.lastChild && node.lastChild.nodeName in {I:1, B:1})
- {
+ if (DU.isQuoteElt(node) && DU.isQuoteElt(node.lastChild)) {
return state.sep.lastSourceNode === node ?
node.lastChild : null;
} else if (state.sep.lastSourceNode === node) {
// If a separator was already emitted, or an
outstanding separator
@@ -2068,17 +2062,17 @@
otherNode.data.parsoid.stx === 'html' ||
otherNode.data.parsoid.src)
{
return {min:0, max:2};
- } else if (nextSibling === otherNode && DU.isListOrListElt(otherNode)) {
+ } else if (nextSibling === otherNode && DU.isListOrListItem(otherNode))
{
if (DU.isList(node) && otherNode.nodeName === node.nodeName) {
// Adjacent lists of same type need extra newline
return {min: 2, max:2};
- } else if (DU.isListElt(node) || node.parentNode.nodeName in
{LI:1, DD:1}) {
+ } else if (DU.isListItem(node) || node.parentNode.nodeName in
{LI:1, DD:1}) {
// Top-level list
return {min:1, max:1};
} else {
return {min:1, max:2};
}
- } else if (otherNode.nodeName in {'UL':1, 'OL':1, 'DL':1}) {
+ } else if (DU.isList(otherNode)) {
// last child in ul/ol (the list element is our parent), defer
// separator constraints to the list.
return {};
@@ -2122,7 +2116,7 @@
// about the node and its context so that
leading pre-inducing WS
// can be stripped
- if (DU.isText(otherNode) &&
DU.isListElt(node.parentNode)) {
+ if (DU.isText(otherNode) &&
DU.isListItem(node.parentNode)) {
// A list nested inside a list item
// <li> foo <dl> .. </dl></li>
return {min:1, max:1};
@@ -2372,18 +2366,18 @@
},
sepnls: {
before: function(node, otherNode) {
- var otherNodeName =
otherNode.nodeName.toLowerCase();
+ var otherNodeName = otherNode.nodeName;
if( node.parentNode === otherNode &&
-
isListElementName(otherNodeName) || otherNodeName in {td:1, body:1} )
+ DU.isListItem(otherNodeName) ||
otherNodeName in {TD:1, BODY:1} )
{
- if (otherNodeName in {td:1, body:1}) {
+ if (otherNodeName in {TD:1, BODY:1}) {
return {min: 0, max: 1};
} else {
return {min: 0, max: 0};
}
} else if (otherNode === node.previousSibling &&
// p-p transition
- otherNodeName === 'p' ||
+ otherNodeName === 'P' ||
// Treat text/p similar to p/p
transition
// XXX: also check if parent
node and first sibling
// serializes(|d) to single
line.
@@ -2391,7 +2385,7 @@
// needed in that case. Example:
// <div>foo</div> a
// b
- ((otherNodeName === '#text' &&
+ ((otherNodeName === '#TEXT' &&
otherNode ===
DU.previousNonSepSibling(node) &&
// FIXME HACK: Avoid forcing
two newlines if the
// first line is a text node
that ends up on the
@@ -2919,7 +2913,7 @@
// 2. A node whose previous sibling is a list element.
if (liHackSrc !== undefined &&
((prev === null && DU.isList(node.parentNode)) || // Case 1
- (prev !== null && DU.isListElt(prev)))) { // Case 2
+ (prev !== null && DU.isListItem(prev)))) { // Case 2
cb(liHackSrc, node);
}
};
@@ -3214,7 +3208,7 @@
if (dp.stx === 'html' ||
(DU.isNewElt(node) && node.parentNode &&
node.parentNode.data && node.parentNode.data.parsoid.stx ===
'html' &&
- ((DU.isList(node.parentNode) && DU.isListElt(node)) ||
+ ((DU.isList(node.parentNode) && DU.isListItem(node)) ||
(node.parentNode.nodeName in {TABLE:1, TBODY:1, TH:1, TR:1} &&
node.nodeName in {TBODY:1, CAPTION:1, TH:1, TR:1, TD:1}))
))
@@ -3552,11 +3546,9 @@
// we should find the "next" (at this and and ancestor levels), the
non-sep
// sibling and check if that node is one of these types.
//
- // FIXME: Should this be moved to wikitext.constants.js or some such?
- var preSafeTags = {'BR':1, 'TABLE':1, 'TBODY':1, 'CAPTION':1, 'TR':1,
'TD':1, 'TH':1},
// SSS FIXME: how is it that parentNode can be null?? is body
getting here?
- parentName = node.parentNode && node.parentNode.nodeName;
- if (nlConstraints.min > 0 && !(node.nodeName in preSafeTags)) {
+ var parentName = node.parentNode && node.parentNode.nodeName;
+ if (nlConstraints.min > 0 && !(node.nodeName in Consts.PreSafeTags)) {
sep =
sep.replace(/[^\n>]+(<!--(?:[^\-]|-(?!->))*-->[^\n]*)?$/g, '$1');
}
this.trace('makeSeparator', sep, origSep, minNls, sepNlCount,
nlConstraints);
diff --git a/js/lib/mediawiki.wikitext.constants.js
b/js/lib/mediawiki.wikitext.constants.js
index bb5e814..6bc8df8 100644
--- a/js/lib/mediawiki.wikitext.constants.js
+++ b/js/lib/mediawiki.wikitext.constants.js
@@ -86,6 +86,20 @@
'table', 'td', 'th', 'tr', 'tt',
'u', 'ul', 'wbr'
]
+ },
+
+ // Whitespace in these elements does not lead to indent-pre
+ PreSafeTags: Util.arrayToHash(['BR', 'TABLE', 'TBODY', 'CAPTION', 'TR',
'TD', 'TH']),
+ QuoteTags: Util.arrayToHash(['I', 'B']),
+
+ HTML: {
+ ListTags: Util.arrayToHash(['UL', 'OL', 'DL']),
+ ListItemTags: Util.arrayToHash(['LI', 'DD', 'DT']),
+ // See
http://www.w3.org/html/wg/drafts/html/master/syntax.html#formatting
+ FormattingTags: Util.arrayToHash([
+ 'A', 'B', 'BIG', 'CODE', 'EM', 'FONT', 'I', 'NOBR',
+ 'S', 'SMALL', 'STRIKE', 'STRONG', 'TT', 'U'
+ ])
}
};
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index 966557a..a1c7280 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -536,7 +536,6 @@
add("wt2wt", "Italicized possessive");
add("wt2wt", "Parsoid only: Quote balancing context should be restricted to
td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to
be fixed up)");
add("wt2wt", "Comment semantics: unclosed comment at end");
-add("wt2wt", "Multiple lines without block tags");
add("wt2wt", "<nowiki> inside <pre> (bug 13238)");
add("wt2wt", "<nowiki> and <pre> preference (first one wins)");
add("wt2wt", "Templates: Single-line variant of parameter whitespace stripping
test");
@@ -676,7 +675,6 @@
add("wt2wt", "Table with broken attribute value quoting on consecutive lines");
add("wt2wt", "Parsoid-only: Table with broken attribute value quoting on
consecutive lines");
add("wt2wt", "RT-ed inter-element separators should be valid separators");
-add("wt2wt", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output)");
add("wt2wt", "Empty TD followed by TD with tpl-generated attribute");
add("wt2wt", "Empty TR followed by mixed-ws-comment line should RT correctly");
add("wt2wt", "Improperly nested inline or quotes tags with whitespace in
between");
@@ -2363,8 +2361,6 @@
add("selser", "Comment semantics: unclosed comment at end [3]");
add("selser", "Comment semantics: unclosed comment at end [4]");
add("selser", "Comment semantics: unclosed comment at end [2]");
-add("selser", "Multiple lines without block tags [0,4,1,0,3,0,4]");
-add("selser", "Multiple lines without block tags [[3],3,1,4,0,0,2]");
add("selser", "<nowiki> inside <pre> (bug 13238) [4,2,2,0,0]");
add("selser", "<nowiki> inside <pre> (bug 13238) [0,0,4,0,[[4],0,2,4]]");
add("selser", "<nowiki> inside <pre> (bug 13238) [3,0,3,3,2]");
@@ -3842,14 +3838,8 @@
add("selser", "RT-ed inter-element separators should be valid separators
[2,[0,[[2],0]]]");
add("selser", "RT-ed inter-element separators should be valid separators
[0,2]");
add("selser", "RT-ed inter-element separators should be valid separators
[4,1]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [1,0,[0,2]]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[0,1],0,2]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[2,[[[[2,[2]]]],0]],0,[0,[[3],3]]]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[3,[2,3]],0,4]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[2,4],4,[4,[2,0]]]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [2,0,[0,2]]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [2,0,[4,4]]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [2,0,[2,4]]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[0,[2,0]],4,1]");
add("selser", "Empty TD followed by TD with tpl-generated attribute [1]");
add("selser", "Empty TD followed by TD with tpl-generated attribute [2]");
--
To view, visit https://gerrit.wikimedia.org/r/78092
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I574ae735c7c68fcca53f5eed981cace4f7c5e9b1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits