GWicke has uploaded a new change for review.
https://gerrit.wikimedia.org/r/51333
Change subject: WIP: DOM-based WTS refactor
......................................................................
WIP: DOM-based WTS refactor
Change-Id: I67463a12fd200f3a9f15c55658d5efda5e334d83
---
M js/lib/mediawiki.WikitextSerializer.js
1 file changed, 203 insertions(+), 408 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/33/51333/1
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 5b19afb..6149d8c 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -310,6 +310,7 @@
lastRes: '',
onNewline: true,
onStartOfLine : true,
+ atStartOfOutput: true,
singleLineMode: 0,
wteHandlerStack: [],
tplAttrs: {},
@@ -349,7 +350,7 @@
this.chunkCB = chunkCB;
}
- this.serializer._serializeDOM(node, this);
+ this.serializer._serializeNode(node, this);
if ( wtEscaper ) {
this.wteHandlerStack.pop();
@@ -376,8 +377,21 @@
if ( wtEscaper ) {
this.wteHandlerStack.push(wtEscaper);
}
- for (var i = 0, l = nodes.length; i < l; i++) {
- this.serializer._serializeDOM(nodes[i], this);
+ var node = nodes[0],
+ parentNode = node && node.parentNode,
+ nextNode;
+ while(node) {
+ nextNode = this.serializer._serializeNode(node, this);
+ if (nextNode === parentNode) {
+ // serialized all children
+ break;
+ } else if (nextNode === node) {
+ // advance the child
+ node = node.nextSibling;
+ } else {
+ console.log('nextNode', nextNode &&
nextNode.outerHTML);
+ node = nextNode;
+ }
}
this.chunkCB = oldCB;
if ( wtEscaper ) {
@@ -713,7 +727,7 @@
/**
* DOM-based figure handler
*/
-WSP.figureHandler = function(state, node, cb) {
+WSP.figureHandler = function(node, state, cb) {
var img, caption,
dp = node.data.parsoid,
@@ -962,7 +976,7 @@
// openTagSrc = ...; endTagSrc = ...; and at the end of the function,
// check for autoInsertedStart and autoInsertedEnd attributes and
// supress openTagSrc or endTagSrc appropriately.
-WSP.linkHandler = function( state, node, cb ) {
+WSP.linkHandler = function(node, state, cb) {
//return '[[';
// TODO: handle internal/external links etc using RDFa and dataAttribs
// Also convert unannotated html links without advanced attributes to
@@ -1154,7 +1168,7 @@
} else {
// Unknown rel was set
//cb( state.serializeDOMToString( node ) );
- WSP.htmlElementHandler(state, node, cb);
+ WSP.htmlElementHandler(node, state, cb);
return;
}
} else {
@@ -1173,7 +1187,7 @@
if ( true || isComplexLink ( node.attributes ) ) {
// Complex attributes we can't support in wiki syntax
- WSP.htmlElementHandler(state, node, cb);
+ WSP.htmlElementHandler(node, state, cb);
} else {
// TODO: serialize as external wikilink
cb( '' );
@@ -1276,13 +1290,55 @@
return prefix;
}
}
+// XXX refactor: move to DOM handlers!
+// Newly created elements/tags in this list inherit their default
+// syntax from their parent scope
+var inheritSTXTags = { tbody:1, tr: 1, td: 1, li: 1, dd: 1, dt: 1 },
+ // These reset the inherited syntax no matter what
+ setSTXTags = { table: 1, ul: 1, ol: 1, dl: 1 },
+ // These (and inline elements) reset the default syntax to
+ // undefined
+ noHTMLSTXTags = {p: 1};
+
+// XXX refactor: move to dedicated template handler that consumes siblings
+// if (state.activeTemplateId &&
+// state.activeTemplateId === node.getAttribute("about"))
+// {
+// // skip -- template content
+// return;
+// } else {
+// state.activeTemplateId = null;
+// }
+//
+// if (!state.activeTemplateId) {
+// // Check if this node marks the start of template output
+// // NOTE: Since we are deleting all mw:Object/**/End
markers,
+// // we need not verify if it is an End marker
+// var typeofVal = node.getAttribute("typeof");
+// if (typeofVal &&
typeofVal.match(/\bmw:Object(\/[^\s]+|\b)/)) {
+// state.activeTemplateId =
node.getAttribute("about") || "";
+// var attrs = [ new pd.KV("typeof",
"mw:TemplateSource") ];
+// var dps =
node.getAttribute("data-parsoid-serialize");
+// if (dps) {
+// attrs.push(new
pd.KV("data-parsoid-serialize", dps));
+// }
+// var dummyToken = new pd.SelfclosingTagTk("meta",
+// attrs,
+// { src: this._getDOMRTInfo(node).src }
+// );
+//
+// if ( dps ) {
+// state.selser.serializeInfo = dps;
+// }
+// this._serializeToken(state, dummyToken);
+// return;
+// }
+// }
+// } else if (node.nodeType !== node.COMMENT_NODE) {
+// state.activeTemplateId = null;
+// }
WSP.tagHandlers = {
- body: {
- end: {
- handle: id('')
- }
- },
ul: buildListHandler('*'),
ol: buildListHandler('#'),
dl: buildListHandler(''),
@@ -1434,7 +1490,15 @@
return '';
}
},
+ handle: function(node, state, cb) {
+ console.log('p handler');
+ state.serializeChildren(node.childNodes, cb);
+ },
sepNls: {
+ firstChild: function(otherNode) {
+ return otherNode.nodeName === 'BODY' ?
+ {min: 0, max:0} : {min:1, max:1};
+ },
before: function(otherNode) {
return otherNode.nodeName === 'P' ?
{min: 2, max: 2} : {min: 1, max: 2};
@@ -1447,6 +1511,37 @@
},
// XXX: support indent variant instead by registering a newline handler?
pre: {
+ /**
+ *
+ * XXX X: create DOM handler
+ // Handle html-pres specially
+ // 1. If the node has a leading newline, add one like
it (logic copied from VE)
+ // 2. If not, and it has a data-parsoid strippedNL
flag, add it back.
+ // This patched DOM will serialize html-pres correctly.
+ //
+ // FIXME: This code should be extracted into a
DOMUtils.js file to be used
+ // by the testing setup.
+ if (nodeName === 'pre' && tkRTInfo.stx === 'html') {
+ var modified = false;
+ var fc = node.firstChild;
+ if (fc && fc.nodeType === node.TEXT_NODE) {
+ var matches =
fc.data.match(/^(\r\n|\r|\n)/);
+ if (matches) {
+ fc.insertData(0, matches[1]);
+ modified = true;
+ }
+ }
+
+ var strippedNL = tkRTInfo.strippedNL;
+ if (!modified && strippedNL) {
+ if (fc && fc.nodeType ===
node.TEXT_NODE) {
+ fc.insertData(0, strippedNL);
+ } else {
+
node.insertBefore(node.ownerDocument.createTextNode(strippedNL), fc);
+ }
+ }
+ }
+ */
start: {
startsLine: true,
handle: function( state, token ) {
@@ -1570,9 +1665,7 @@
}
},
figure: {
- node: {
- handle: WSP.figureHandler.bind(WSP)
- }
+ handle: WSP.figureHandler.bind(WSP)
},
img: {
start: {
@@ -1586,18 +1679,15 @@
}
},
hr: {
- start: {
- startsLine: true,
- handle: function(state, token) {
- return charSequence("----", "-",
token.dataAttribs.extra_dashes);
- }
+ handle: function (node, state, cb) {
+ cb(charSequence("----", "-",
node.data.parsoid.extra_dashes));
},
- end: {
- handle: function(state, token) {
- // Default to ending the line, but omit it if
the source did
- // not have one.
- this.endsLine = ! token.dataAttribs.lineContent;
- return '';
+ sepNls: {
+ before: function(otherNode) {
+ return {min: 1, max: 2};
+ },
+ after: function(otherNode) {
+ return {min: 1, max: 2};
}
}
},
@@ -1633,13 +1723,16 @@
wtEscapeHandler: WSP.wteHandlers.quoteHandler
},
a: {
- node: {
- handle: WSP.linkHandler.bind(WSP)
- }
+ handle: WSP.linkHandler.bind(WSP)
+ // TODO: Implement link tail escaping with nowiki in DOM
handler!
},
link: {
- node: {
- handle: WSP.linkHandler.bind(WSP)
+ handle: WSP.linkHandler.bind(WSP)
+ },
+ body: {
+ handle: function(node, state, cb) {
+ // Just serialize the children
+ state.serializeChildren(node.childNodes, cb);
}
}
};
@@ -1803,7 +1896,7 @@
end : { handle: WSP._serializeHTMLEndTag }
};
-WSP.htmlElementHandler = function (state, node, cb) {
+WSP.htmlElementHandler = function (node, state, cb) {
var attribKVs = DU.getAttributeKVArray(node);
cb( WSP._serializeHTMLTag(
@@ -1822,7 +1915,7 @@
/**
* Get a DOM-based handler for an element node
*/
-WSP.getDOMHandler = function(state, node, cb) {
+WSP.getDOMHandler = function(node, state, cb) {
var dp = node.data.parsoid,
nodeName = node.nodeName.toLowerCase(),
handler,
@@ -1879,7 +1972,7 @@
return null; // this.htmlElementHandler;
} else if (this.tagHandlers[nodeName]) {
handler = this.tagHandlers[nodeName];
- return handler && handler.node || null;
+ return handler && handler.handle || null;
}
};
@@ -1926,6 +2019,26 @@
return handler.end || {};
}
};
+
+/**
+ * Serialize the content of a text node or comment
+ */
+WSP._serializeText = function(text, state, cb) {
+ var res;
+ // Always escape entities
+ res = Util.escapeEntities(text);
+ // If not in nowiki and pre context, also escape wikitext
+ // XXX refactor: Handle this with escape handlers instead!
+ res = ( state.inNoWiki || state.inHTMLPre ) ? res
+ : this.escapeWikiText( state, res );
+
+ // XXX: replace with similar constraints as for separators?
+ if (state.textHandler) {
+ res = state.textHandler( state, res );
+ }
+ cb(res);
+};
+
/**
* Serialize a token.
@@ -2008,8 +2121,11 @@
// Always escape entities
res = Util.escapeEntities(token);
// If not in nowiki and pre context, also escape
wikitext
+ // XXX refactor: Handle this with escape handlers
instead!
res = ( state.inNoWiki || state.inHTMLPre ) ? res
: this.escapeWikiText( state, res );
+
+ // XXX: replace with similar constraints as for
separators?
if (textHandler) {
res = textHandler( state, res );
}
@@ -2106,7 +2222,7 @@
if (res.match(/[\r\n]$/)) {
state.onNewline = true;
state.onStartOfLine = true;
- } else if ( res !== '' ) {
+ } else if (res !== '') {
state.onNewline = false;
if (!handler.solTransparent) {
state.onStartOfLine = false;
@@ -2233,24 +2349,6 @@
// 2. Strip non-semantic leading and trailing newlines.
WSP.preprocessDOM = function(node, state, inPre, haveOrigSrc) {
- function setupSeparator(nodeA, nodeB, sepNodes, sepText) {
- // Create meta with the separator src in data-sep attribute
- var sepMeta = (nodeA ||
nodeB).ownerDocument.createElement('meta');
- sepMeta.setAttribute("typeof", "mw:Separator");
- sepMeta.setAttribute("data-sep", sepText.join(''));
-
- if (nodeA) {
- nodeA.parentNode.insertBefore(sepMeta,
nodeA.nextSibling);
- } else {
- nodeB.parentNode.insertBefore(sepMeta,
nodeB.previousSibling);
- }
-
- // delete separator nodes
- for (var i = 0, n = sepNodes.length; i < n; i++) {
- sepNodes[i].parentNode.removeChild(sepNodes[i]);
- }
- }
-
if (node.nodeName.toLowerCase() === "meta") {
var prop = node.getAttribute("property");
if (prop && prop.match(/mw:objectAttr/)) {
@@ -2291,122 +2389,7 @@
// Descend and recurse
this.preprocessDOM(child, state, inPre || childIsPre,
haveOrigSrc);
- // Collapse a sequence of text nodes and delete empty
text nodes
- // NOTE: We could have used body.normalize() and got
this for free,
- // but JSDOM is buggy and strips empty comments.
- // Domino actually exhibits the same behavior. Probably
a
- // misfeature in the spec.
- if (child.nodeType === node.TEXT_NODE) {
- var buf = [child.data];
- while (next && next.nodeType ===
node.TEXT_NODE) {
- var nextnext = next.nextSibling;
- buf.push(next.data);
- node.removeChild(next);
- next = nextnext;
- }
-
- if (buf.length > 1) {
- child.data = buf.join('');
- }
-
- // Delete empty text nodes
- if (child.data === '') {
- node.removeChild(child);
- }
- }
-
child = next;
- }
-
- // Post-text-normalization, strip runs of whitespace and
comments and
- // record them in a meta-tag.
- //
- //
(http://dev.w3.org/html5/spec-LC/content-models.html#content-models)
- //
- // Dont normalize if we are in a PRE-node or if the node is a
mw:Entity SPAN
- // Or if the node has no element-node child
- if (!inPre &&
- !DU.isNodeOfType(node, 'span', 'mw:Entity') &&
- (!haveOrigSrc || DU.hasElementChild(node)))
- {
- var prevSentinel = null,
- waitForSentinel = false,
- sepNodes = [],
- sepText = [];
-
- child = node.firstChild;
- while (child) {
- var nodeType = child.nodeType;
-
- next = child.nextSibling;
-
- // Delete empty text nodes
- if (nodeType === node.TEXT_NODE && child.data
=== '') {
- node.removeChild(child);
- child = next;
- continue;
- }
-
- if (!haveOrigSrc) {
- if (nodeType === node.TEXT_NODE) {
- str = child.data;
- // Strip leading/trailing
newlines if preceded by or
- // followed by block nodes --
these newlines are syntactic
- // and can be normalized away
since the serializer in sourceless
- // mode is tuned to normalize
newlines.
- if (str.match(/\n$/) && (!next
|| DU.isBlockNode(next))) {
- child.data =
str.replace(/\n+$/, '');
- }
- if (str.match(/^\n/) && (!prev ||
DU.isBlockNode(prev))) {
- child.data =
str.replace(/^\n+/, '');
- }
- }
- } else {
- switch (nodeType) {
- case node.TEXT_NODE:
- str = child.data;
- if (!waitForSentinel &&
str.match(/^\s+$/)) {
-
sepText.push(str);
-
sepNodes.push(child);
- } else {
- prevSentinel =
null;
- waitForSentinel
= true;
- }
- break;
-
- case node.COMMENT_NODE:
- if (!waitForSentinel) {
-
sepText.push("<!--");
-
sepText.push(child.data);
-
sepText.push("-->");
-
sepNodes.push(child);
- }
- break;
-
- case node.ELEMENT_NODE:
- if (!waitForSentinel &&
DU.isMarkerMeta(child, "mw:DiffMarker")) {
- // Float
"mw:DiffMarker" to the left till we bump into a sentinel
-
node.insertBefore(child, prevSentinel ? prevSentinel.nextSibling :
node.firstChild);
- prevSentinel =
child;
- } else {
- if
(!waitForSentinel && sepNodes.length > 0) {
-
setupSeparator(prevSentinel, child, sepNodes, sepText);
- }
- waitForSentinel
= false;
- prevSentinel =
child;
- sepNodes = [];
- sepText = [];
- }
- break;
- }
- }
-
- child = next;
- }
-
- if (prevSentinel && sepNodes.length > 0) {
- setupSeparator(prevSentinel, null, sepNodes,
sepText);
- }
}
}
};
@@ -2461,7 +2444,7 @@
};
}
- this._serializeDOM( node, state );
+ this._serializeNode( node, state );
this._serializeToken( state, new pd.EOFTk() );
if ( finalCB && typeof finalCB === 'function' ) {
@@ -2520,7 +2503,7 @@
}
/**
- * Helper for handleSeparator
+ * Helper for doHandleSeparator
*
* Collects, checks and integrates separator newline requirements to a sinple
* min, max structure.
@@ -2570,8 +2553,8 @@
*
* node handlers:
*
- * node: {
- * handle: function(state, node) {},
+ * body: {
+ * handle: function(node, state, cb) {},
* // responsible for calling
* sepNls: {
* before: function(node) -> {min: 1, max: 2}
@@ -2582,7 +2565,7 @@
* }
*
*/
-WSP.handleSeparator = function( state, sep, cb, nodeA, handlerA, nodeB,
handlerB) {
+WSP.doHandleSeparator = function( state, sep, cb, nodeA, handlerA, nodeB,
handlerB) {
var constraints,
i;
var sepHandlerA = handlerA && handlerA.sepNls || {},
@@ -2614,6 +2597,12 @@
var sepMatch = sep.match(/\n/g),
sepNlCount = sepMatch && sepMatch.length || 0;
+
+ if (state.atStartOfOutput) {
+ constraints.min--;
+ state.atStartOfOutput = false;
+ }
+
if (constraints.min && sepNlCount < constraints.min) {
for (i = 0; i < (constraints.min - sepNlCount); i++) {
sep += '\n';
@@ -2639,6 +2628,7 @@
// This avoids triggering pres
sep =
sep.replace(/[^\n>]+(<!--(?:[^\-]+|-(?!->))*-->[^\n]*)?$/g, '$1');
}
+ console.log(JSON.stringify(sep));
cb(sep);
}
@@ -2649,25 +2639,21 @@
*
* Called on a text node.
*/
-WSP.testHandleSeparator = function(node, state) {
- // Gather IEW nodes into string, and call handleSeparator
- var maybeSeparator = this.gatherSeparatorText(node),
- prev = node.previousSibling || node.parentNode,
+WSP.handleSeparator = function(node, state, cb, maybeSeparator) {
+ // Gather IEW nodes into string, and call doHandleSeparator
+ var prev = node.previousSibling || node.parentNode,
prevHandler = WSP.tagHandlers[prev.nodeName.toLowerCase()],
- cb = function(sep) {
- console.log('NEW:', prev.outerHTML + sep +
next.outerHTML);
- },
next, nextHandler,
sepSrc = '';
if (maybeSeparator) {
//console.log('<origsep>', maybeSeparator.sepSrc, '</origsep>');
- // call handleSeparator with separator string
+ // call doHandleSeparator with separator string
next = maybeSeparator.nextElement;
nextHandler = WSP.tagHandlers[next.nodeName.toLowerCase()];
sepSrc = maybeSeparator.sepSrc;
} else {
- // call handleSeparator
- next = node.nextSibling || node.parentNode;
+ // call doHandleSeparator
+ next = node;
nextHandler = WSP.tagHandlers[next.nodeName.toLowerCase()];
}
@@ -2676,11 +2662,13 @@
prev.nodeType === node.ELEMENT_NODE &&
next.nodeType === node.ELEMENT_NODE )
{
+ console.log('calling doHandleSeparator', prev.outerHTML,
next.outerHTML);
// Looks like it.
- this.handleSeparator(state, sepSrc, cb,
+ this.doHandleSeparator(state, sepSrc, cb,
prev,
prevHandler,
next,
nextHandler);
}
+ return next;
};
@@ -2688,271 +2676,78 @@
* Internal worker. Recursively serialize a DOM subtree by creating tokens and
* calling _serializeToken on each of these.
*/
-WSP._serializeDOM = function( node, state ) {
- var newNLs;
+WSP._serializeNode = function( node, state, cb) {
+ cb = cb || state.chunkCB;
// serialize this node
- if (node.nodeType === node.ELEMENT_NODE) {
- if (state.activeTemplateId &&
- state.activeTemplateId === node.getAttribute("about"))
- {
- // skip -- template content
- return;
- } else {
- state.activeTemplateId = null;
- }
-
- if (!state.activeTemplateId) {
- // Check if this node marks the start of template output
- // NOTE: Since we are deleting all mw:Object/**/End
markers,
- // we need not verify if it is an End marker
- var typeofVal = node.getAttribute("typeof");
- if (typeofVal &&
typeofVal.match(/\bmw:Object(\/[^\s]+|\b)/)) {
- state.activeTemplateId =
node.getAttribute("about") || "";
- var attrs = [ new pd.KV("typeof",
"mw:TemplateSource") ];
- var dps =
node.getAttribute("data-parsoid-serialize");
- if (dps) {
- attrs.push(new
pd.KV("data-parsoid-serialize", dps));
- }
- var dummyToken = new pd.SelfclosingTagTk("meta",
- attrs,
- { src: this._getDOMRTInfo(node).src }
- );
-
- if ( dps ) {
- state.selser.serializeInfo = dps;
- }
- this._serializeToken(state, dummyToken);
- return;
- }
- }
- } else if (node.nodeType !== node.COMMENT_NODE) {
- state.activeTemplateId = null;
- }
-
- var i, n, child, children;
-
switch( node.nodeType ) {
case node.ELEMENT_NODE:
var nodeName = node.nodeName.toLowerCase(),
tkAttribs =
this._getDOMAttribs(node.attributes),
tkRTInfo = this._getDOMRTInfo(node),
- parentSTX = state.parentST1X;
+ parentSTX = state.parentSTX;
// populate node.data.parsoid and
node.data['parsoid-serialize']
DU.loadDataParsoid(node);
DU.loadDataAttrib(node, 'parsoid-serialize', null);
- children = node.childNodes;
-
- if (isHtmlBlockTag(nodeName)) {
- state.currLine = {
- text: null,
- numPieces: 0,
- processed: false,
- hasBracketPair: false,
- hasHeadingPair: false
- };
+ // Insert a possible separator
+ var prev = node.previousSibling || node.parentNode;
+ if (prev && prev.nodeType === node.ELEMENT_NODE) {
+ this.handleSeparator(node, state, cb);
}
- var tailSrc = '';
- // Hack for link tail escaping- access to the next node
is
- // difficult otherwise.
- // TODO: Implement this more cleanly!
- if ( nodeName === 'a' && node.getAttribute('rel') ===
'mw:WikiLink' ) {
- var dp =
JSON.parse(node.getAttribute('data-parsoid') || '{}');
- if ( dp.stx !== 'html' &&
- ! dp.tail &&
- node.nextSibling &&
node.nextSibling.nodeType === node.TEXT_NODE &&
- // TODO: use tokenizer
- node.nextSibling.nodeValue &&
-
node.nextSibling.nodeValue.match(/^[a-z]/) )
- {
- tailSrc = '<nowiki/>';
- }
- }
-
- // Handle html-pres specially
- // 1. If the node has a leading newline, add one like
it (logic copied from VE)
- // 2. If not, and it has a data-parsoid strippedNL
flag, add it back.
- // This patched DOM will serialize html-pres correctly.
- //
- // FIXME: This code should be extracted into a
DOMUtils.js file to be used
- // by the testing setup.
- if (nodeName === 'pre' && tkRTInfo.stx === 'html') {
- var modified = false;
- var fc = node.firstChild;
- if (fc && fc.nodeType === node.TEXT_NODE) {
- var matches =
fc.data.match(/^(\r\n|\r|\n)/);
- if (matches) {
- fc.insertData(0, matches[1]);
- modified = true;
- }
- }
-
- var strippedNL = tkRTInfo.strippedNL;
- if (!modified && strippedNL) {
- if (fc && fc.nodeType ===
node.TEXT_NODE) {
- fc.insertData(0, strippedNL);
- } else {
-
node.insertBefore(node.ownerDocument.createTextNode(strippedNL), fc);
- }
- }
- }
var serializeInfo = null;
if ( state.selser.serializeInfo === null ) {
serializeInfo = node.data['parsoid-serialize'];
state.selser.serializeInfo = serializeInfo;
- state.chunkCB('', serializeInfo);
+ cb('', serializeInfo);
}
// See if we have a DOM-based handler for this node
- var domHandler = this.getDOMHandler(state, node,
state.chunkCB);
- if ( domHandler && domHandler.handle ) {
-
- // Update some state based on the serializer
result
- var stateCB = state.chunkCB, // remember the
current cb
- cbWrapper = function (res,
serializeInfo) {
- if (res) {
- state.onStartOfLine =
res.match(/\n$/) ? true : false;
- }
- stateCB(res, serializeInfo);
- };
-
+ var domHandler = this.getDOMHandler(node, state,
state.chunkCB);
+ if ( domHandler ) {
// DOM-based serialization
- domHandler.handle(state, node, cbWrapper);
-
- // Fake curToken state for token-based
handlers. This is then
- // assigned to prevToken in _serializeToken.
- state.curToken = new pd.EndTagTk(nodeName,
tkAttribs, tkRTInfo);
- state.prevToken = state.curToken;
- state.currTagToken = state.curToken;
- state.prevTagToken = state.curToken;
+ domHandler(node, state, cb);
+ // The handler is responsible for serializing
its children
} else {
- // Token-based serialization
-
- // Serialize the start token
- var startToken = new pd.TagTk(nodeName,
tkAttribs, tkRTInfo);
- this._serializeToken(state, startToken);
-
- // Newly created elements/tags in this list
inherit their default
- // syntax from their parent scope
- var inheritSTXTags = { tbody:1, tr: 1, td: 1,
li: 1, dd: 1, dt: 1 },
- // These reset the inherited syntax no
matter what
- setSTXTags = { table: 1, ul: 1, ol: 1,
dl: 1 },
- // These (and inline elements) reset
the default syntax to
- // undefined
- noHTMLSTXTags = {p: 1};
-
- // Set self to parent token if data-parsoid is
set
- if ( Object.keys(tkRTInfo).length > 0 ||
- setSTXTags[nodeName] ||
- ! inheritSTXTags[nodeName] )
- {
- if ( noHTMLSTXTags[nodeName] || !
Util.isBlockTag(nodeName) ) {
- // Don't inherit stx in these
- state.parentSTX = undefined;
- } else {
- state.parentSTX = tkRTInfo.stx;
- }
- }
-
- // Clear out prevTagToken at each dom level
- var oldPrevToken = state.prevToken,
oldPrevTagToken = state.prevTagToken;
- state.prevToken = null;
- state.prevTagToken = null;
-
- var prevEltChild = null;
- for (i = 0, n = children.length; i < n; i++) {
- child = children[i];
-
- // Ignore -- handled separately
- if (DU.isMarkerMeta(child,
"mw:Separator")) {
- continue;
- }
-
- // Skip over comment, white-space text
nodes, and tpl-content nodes
- var nodeType = child.nodeType;
- if ( nodeType !== node.COMMENT_NODE &&
- !(nodeType === node.TEXT_NODE
&& child.data.match(/^\s*$/)) &&
- !(nodeType ===
node.ELEMENT_NODE &&
- state.activeTemplateId
&&
- state.activeTemplateId
=== child.getAttribute("about"))
- )
- {
- if (child.nodeType ===
node.ELEMENT_NODE) {
- if (prevEltChild ===
null) {
- if
(!DU.hasNodeName(node, "pre")) {
- //
extract separator text between node and child;
-
state.emitSeparator(node, child, START_SEP);
- }
- } else if
(prevEltChild.nodeType === node.ELEMENT_NODE) {
- if
(!DU.hasNodeName(node, "pre")) {
- //
extract separator text between prevEltChild and child;
-
state.emitSeparator(prevEltChild, child, IE_SEP);
- }
- }
- }
-
- prevEltChild = child;
- }
-
- this._serializeDOM( children[i], state
);
- }
-
- if (prevEltChild && prevEltChild.nodeType ===
node.ELEMENT_NODE) {
- // extract separator text between
prevEltChild and node
- if (!DU.hasNodeName(node, "pre")) {
-
state.emitSeparator(prevEltChild, node, END_SEP);
- }
- }
-
- // Reset parent state
- state.prevTagToken = oldPrevTagToken;
- state.prevToken = oldPrevToken;
- state.parentSTX = parentSTX;
-
- // then the end token
- this._serializeToken(state, new
pd.EndTagTk(nodeName, tkAttribs, tkRTInfo));
-
- if ( tailSrc ) {
- // emit the tail
- state.chunkCB( tailSrc,
state.selser.serializeInfo );
- }
+ // Used to be token-based serialization
+ console.error('no dom handler found for',
node.outerHTML);
}
- if ( serializeInfo !== null ) {
- state.selser.serializeInfo = null;
+ // Insert end separator
+ if (node && node.nodeType === node.ELEMENT_NODE &&
!node.nextSibling) {
+ var next = node.parentNode;
+ if (next) {
+ this.doHandleSeparator(state, '', cb,
+ node,
WSP.tagHandlers[node.nodeName.toLowerCase()],
+ next,
WSP.tagHandlers[next.nodeName.toLowerCase()]);
+ }
}
break;
case node.TEXT_NODE:
- if (state.currLine.text === null) {
- var buf = [],
- bn = firstBlockNodeAncestor(node);
-
- children = bn.childNodes;
- for (i = 0, n = children.length; i < n; i++) {
- gatherInlineText(buf, children[i]);
- }
- state.currLine.numPieces = n;
- state.currLine.text = buf.join('');
- }
-
// Test the separator handler, but don't use it yet.
//this.testHandleSeparator(node, state);
+ var maybeSeparator = this.gatherSeparatorText(node);
- this._serializeToken( state, node.data );
+ if (maybeSeparator) {
+ node = this.handleSeparator(node, state, cb,
maybeSeparator);
+ } else {
+ // regular serialization
+ this._serializeText(node.data, state,
state.chunkCB );
+ }
break;
case node.COMMENT_NODE:
// delay the newline creation until after the comment
- this._serializeToken( state, new pd.CommentTk(
node.data ) );
+ state.chunkCB( '<!--' + node.data.replace(/-->/,
'-->') + '-->' );
break;
default:
console.warn( "Unhandled node type: " +
node.outerHTML );
break;
}
+ return node;
};
if (typeof module === "object") {
--
To view, visit https://gerrit.wikimedia.org/r/51333
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I67463a12fd200f3a9f15c55658d5efda5e334d83
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits