[MediaWiki-commits] [Gerrit] WIP: DOM-based WTS refactor - change (mediawiki...Parsoid)

GWicke (Code Review) Wed, 27 Feb 2013 18:03:38 -0800

GWicke has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/51333



Change subject: WIP: DOM-based WTS refactor
......................................................................

WIP: DOM-based WTS refactor

Change-Id: I67463a12fd200f3a9f15c55658d5efda5e334d83
---
M js/lib/mediawiki.WikitextSerializer.js
1 file changed, 203 insertions(+), 408 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/33/51333/1

diff --git a/js/lib/mediawiki.WikitextSerializer.js 
b/js/lib/mediawiki.WikitextSerializer.js
index 5b19afb..6149d8c 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -310,6 +310,7 @@
        lastRes: '',
        onNewline: true,
        onStartOfLine : true,
+       atStartOfOutput: true,
        singleLineMode: 0,
        wteHandlerStack: [],
        tplAttrs: {},
@@ -349,7 +350,7 @@
                        this.chunkCB = chunkCB;
                }
 
-               this.serializer._serializeDOM(node, this);
+               this.serializer._serializeNode(node, this);
 
                if ( wtEscaper ) {
                        this.wteHandlerStack.pop();
@@ -376,8 +377,21 @@
                if ( wtEscaper ) {
                        this.wteHandlerStack.push(wtEscaper);
                }
-               for (var i = 0, l = nodes.length; i < l; i++) {
-                       this.serializer._serializeDOM(nodes[i], this);
+               var node = nodes[0],
+                       parentNode = node && node.parentNode,
+                       nextNode;
+               while(node) {
+                       nextNode = this.serializer._serializeNode(node, this);
+                       if (nextNode === parentNode) {
+                               // serialized all children
+                               break;
+                       } else if (nextNode === node) {
+                               // advance the child
+                               node = node.nextSibling;
+                       } else {
+                               console.log('nextNode', nextNode && 
nextNode.outerHTML);
+                               node = nextNode;
+                       }
                }
                this.chunkCB = oldCB;
                if ( wtEscaper ) {
@@ -713,7 +727,7 @@
 /**
  * DOM-based figure handler
  */
-WSP.figureHandler = function(state, node, cb) {
+WSP.figureHandler = function(node, state, cb) {
 
        var img, caption,
                dp = node.data.parsoid,
@@ -962,7 +976,7 @@
 // openTagSrc = ...; endTagSrc = ...; and at the end of the function,
 // check for autoInsertedStart and autoInsertedEnd attributes and
 // supress openTagSrc or endTagSrc appropriately.
-WSP.linkHandler =  function( state, node, cb ) {
+WSP.linkHandler =  function(node, state, cb) {
        //return '[[';
        // TODO: handle internal/external links etc using RDFa and dataAttribs
        // Also convert unannotated html links without advanced attributes to
@@ -1154,7 +1168,7 @@
                } else {
                        // Unknown rel was set
                        //cb( state.serializeDOMToString( node ) );
-                       WSP.htmlElementHandler(state, node, cb);
+                       WSP.htmlElementHandler(node, state, cb);
                        return;
                }
        } else {
@@ -1173,7 +1187,7 @@
 
                if ( true || isComplexLink ( node.attributes ) ) {
                        // Complex attributes we can't support in wiki syntax
-                       WSP.htmlElementHandler(state, node, cb);
+                       WSP.htmlElementHandler(node, state, cb);
                } else {
                        // TODO: serialize as external wikilink
                        cb( '' );
@@ -1276,13 +1290,55 @@
                return prefix;
        }
 }
+// XXX refactor: move to DOM handlers!
+// Newly created elements/tags in this list inherit their default
+// syntax from their parent scope
+var inheritSTXTags = { tbody:1, tr: 1, td: 1, li: 1, dd: 1, dt: 1 },
+       // These reset the inherited syntax no matter what
+       setSTXTags = { table: 1, ul: 1, ol: 1, dl: 1 },
+       // These (and inline elements) reset the default syntax to
+       // undefined
+       noHTMLSTXTags = {p: 1};
+
+// XXX refactor: move to dedicated template handler that consumes siblings
+//             if (state.activeTemplateId &&
+//                     state.activeTemplateId === node.getAttribute("about"))
+//             {
+//                     // skip -- template content
+//                     return;
+//             } else {
+//                     state.activeTemplateId = null;
+//             }
+//
+//             if (!state.activeTemplateId) {
+//                     // Check if this node marks the start of template output
+//                     // NOTE: Since we are deleting all mw:Object/**/End 
markers,
+//                     // we need not verify if it is an End marker
+//                     var typeofVal = node.getAttribute("typeof");
+//                     if (typeofVal && 
typeofVal.match(/\bmw:Object(\/[^\s]+|\b)/)) {
+//                             state.activeTemplateId = 
node.getAttribute("about") || "";
+//                             var attrs = [ new pd.KV("typeof", 
"mw:TemplateSource") ];
+//                             var dps = 
node.getAttribute("data-parsoid-serialize");
+//                             if (dps) {
+//                                     attrs.push(new 
pd.KV("data-parsoid-serialize", dps));
+//                             }
+//                             var dummyToken = new pd.SelfclosingTagTk("meta",
+//                                     attrs,
+//                                     { src: this._getDOMRTInfo(node).src }
+//                             );
+//
+//                             if ( dps ) {
+//                                     state.selser.serializeInfo = dps;
+//                             }
+//                             this._serializeToken(state, dummyToken);
+//                             return;
+//                     }
+//             }
+//     } else if (node.nodeType !== node.COMMENT_NODE) {
+//             state.activeTemplateId = null;
+//     }
 
 WSP.tagHandlers = {
-       body: {
-               end: {
-                       handle: id('')
-               }
-       },
        ul: buildListHandler('*'),
        ol: buildListHandler('#'),
        dl: buildListHandler(''),
@@ -1434,7 +1490,15 @@
                                return '';
                        }
                },
+               handle: function(node, state, cb) {
+                       console.log('p handler');
+                       state.serializeChildren(node.childNodes, cb);
+               },
                sepNls: {
+                       firstChild: function(otherNode) {
+                               return otherNode.nodeName === 'BODY' ?
+                                       {min: 0, max:0} : {min:1, max:1};
+                       },
                        before: function(otherNode) {
                                return otherNode.nodeName === 'P' ?
                                        {min: 2, max: 2} : {min: 1, max: 2};
@@ -1447,6 +1511,37 @@
        },
        // XXX: support indent variant instead by registering a newline handler?
        pre: {
+               /**
+                *
+                * XXX X: create DOM handler
+                       // Handle html-pres specially
+                       // 1. If the node has a leading newline, add one like 
it (logic copied from VE)
+                       // 2. If not, and it has a data-parsoid strippedNL 
flag, add it back.
+                       // This patched DOM will serialize html-pres correctly.
+                       //
+                       // FIXME: This code should be extracted into a 
DOMUtils.js file to be used
+                       // by the testing setup.
+                       if (nodeName === 'pre' && tkRTInfo.stx === 'html') {
+                               var modified = false;
+                               var fc = node.firstChild;
+                               if (fc && fc.nodeType === node.TEXT_NODE) {
+                                       var matches = 
fc.data.match(/^(\r\n|\r|\n)/);
+                                       if (matches) {
+                                               fc.insertData(0, matches[1]);
+                                               modified = true;
+                                       }
+                               }
+
+                               var strippedNL = tkRTInfo.strippedNL;
+                               if (!modified && strippedNL) {
+                                       if (fc && fc.nodeType === 
node.TEXT_NODE) {
+                                               fc.insertData(0, strippedNL);
+                                       } else {
+                                               
node.insertBefore(node.ownerDocument.createTextNode(strippedNL), fc);
+                                       }
+                               }
+                       }
+                       */
                start: {
                        startsLine: true,
                        handle: function( state, token ) {
@@ -1570,9 +1665,7 @@
                }
        },
        figure: {
-               node: {
-                       handle: WSP.figureHandler.bind(WSP)
-               }
+               handle: WSP.figureHandler.bind(WSP)
        },
        img: {
                start: {
@@ -1586,18 +1679,15 @@
                }
        },
        hr: {
-               start: {
-                       startsLine: true,
-                       handle: function(state, token) {
-                               return charSequence("----", "-", 
token.dataAttribs.extra_dashes);
-                       }
+               handle: function (node, state, cb) {
+                       cb(charSequence("----", "-", 
node.data.parsoid.extra_dashes));
                },
-               end: {
-                       handle: function(state, token) {
-                               // Default to ending the line, but omit it if 
the source did
-                               // not have one.
-                               this.endsLine = ! token.dataAttribs.lineContent;
-                               return '';
+               sepNls: {
+                       before: function(otherNode) {
+                               return {min: 1, max: 2};
+                       },
+                       after: function(otherNode) {
+                               return {min: 1, max: 2};
                        }
                }
        },
@@ -1633,13 +1723,16 @@
                wtEscapeHandler: WSP.wteHandlers.quoteHandler
        },
        a:  {
-               node: {
-                       handle: WSP.linkHandler.bind(WSP)
-               }
+               handle: WSP.linkHandler.bind(WSP)
+               // TODO: Implement link tail escaping with nowiki in DOM 
handler!
        },
        link:  {
-               node: {
-                       handle: WSP.linkHandler.bind(WSP)
+               handle: WSP.linkHandler.bind(WSP)
+       },
+       body: {
+               handle: function(node, state, cb) {
+                       // Just serialize the children
+                       state.serializeChildren(node.childNodes, cb);
                }
        }
 };
@@ -1803,7 +1896,7 @@
        end  : { handle: WSP._serializeHTMLEndTag }
 };
 
-WSP.htmlElementHandler = function (state, node, cb) {
+WSP.htmlElementHandler = function (node, state, cb) {
        var attribKVs = DU.getAttributeKVArray(node);
 
        cb( WSP._serializeHTMLTag(
@@ -1822,7 +1915,7 @@
 /**
  * Get a DOM-based handler for an element node
  */
-WSP.getDOMHandler = function(state, node, cb) {
+WSP.getDOMHandler = function(node, state, cb) {
        var dp = node.data.parsoid,
                nodeName = node.nodeName.toLowerCase(),
                handler,
@@ -1879,7 +1972,7 @@
                return null; // this.htmlElementHandler;
        } else if (this.tagHandlers[nodeName]) {
                handler = this.tagHandlers[nodeName];
-               return handler && handler.node || null;
+               return handler && handler.handle || null;
        }
 };
 
@@ -1926,6 +2019,26 @@
                return handler.end || {};
        }
 };
+
+/**
+ * Serialize the content of a text node or comment
+ */
+WSP._serializeText = function(text, state, cb) {
+       var res;
+       // Always escape entities
+       res = Util.escapeEntities(text);
+       // If not in nowiki and pre context, also escape wikitext
+       // XXX refactor: Handle this with escape handlers instead!
+       res = ( state.inNoWiki || state.inHTMLPre ) ? res
+               : this.escapeWikiText( state, res );
+
+       // XXX: replace with similar constraints as for separators?
+       if (state.textHandler) {
+               res = state.textHandler( state, res );
+       }
+       cb(res);
+};
+
 
 /**
  * Serialize a token.
@@ -2008,8 +2121,11 @@
                        // Always escape entities
                        res = Util.escapeEntities(token);
                        // If not in nowiki and pre context, also escape 
wikitext
+                       // XXX refactor: Handle this with escape handlers 
instead!
                        res = ( state.inNoWiki || state.inHTMLPre ) ? res
                                : this.escapeWikiText( state, res );
+
+                       // XXX: replace with similar constraints as for 
separators?
                        if (textHandler) {
                                res = textHandler( state, res );
                        }
@@ -2106,7 +2222,7 @@
        if (res.match(/[\r\n]$/)) {
                state.onNewline = true;
                state.onStartOfLine = true;
-       } else if ( res !== '' ) {
+       } else if (res !== '') {
                state.onNewline = false;
                if (!handler.solTransparent) {
                        state.onStartOfLine = false;
@@ -2233,24 +2349,6 @@
 // 2. Strip non-semantic leading and trailing newlines.
 WSP.preprocessDOM = function(node, state, inPre, haveOrigSrc) {
 
-       function setupSeparator(nodeA, nodeB, sepNodes, sepText) {
-               // Create meta with the separator src in data-sep attribute
-               var sepMeta = (nodeA || 
nodeB).ownerDocument.createElement('meta');
-               sepMeta.setAttribute("typeof", "mw:Separator");
-               sepMeta.setAttribute("data-sep", sepText.join(''));
-
-               if (nodeA) {
-                       nodeA.parentNode.insertBefore(sepMeta, 
nodeA.nextSibling);
-               } else {
-                       nodeB.parentNode.insertBefore(sepMeta, 
nodeB.previousSibling);
-               }
-
-               // delete separator nodes
-               for (var i = 0, n = sepNodes.length; i < n; i++) {
-                       sepNodes[i].parentNode.removeChild(sepNodes[i]);
-               }
-       }
-
        if (node.nodeName.toLowerCase() === "meta") {
                var prop = node.getAttribute("property");
                if (prop && prop.match(/mw:objectAttr/)) {
@@ -2291,122 +2389,7 @@
                        // Descend and recurse
                        this.preprocessDOM(child, state, inPre || childIsPre, 
haveOrigSrc);
 
-                       // Collapse a sequence of text nodes and delete empty 
text nodes
-                       // NOTE: We could have used body.normalize() and got 
this for free,
-                       // but JSDOM is buggy and strips empty comments.
-                       // Domino actually exhibits the same behavior. Probably 
a
-                       // misfeature in the spec.
-                       if (child.nodeType === node.TEXT_NODE) {
-                               var buf = [child.data];
-                               while (next && next.nodeType === 
node.TEXT_NODE) {
-                                       var nextnext = next.nextSibling;
-                                       buf.push(next.data);
-                                       node.removeChild(next);
-                                       next = nextnext;
-                               }
-
-                               if (buf.length > 1) {
-                                       child.data = buf.join('');
-                               }
-
-                               // Delete empty text nodes
-                               if (child.data === '') {
-                                       node.removeChild(child);
-                               }
-                       }
-
                        child = next;
-               }
-
-               // Post-text-normalization, strip runs of whitespace and 
comments and
-               // record them in a meta-tag.
-               //
-               // 
(http://dev.w3.org/html5/spec-LC/content-models.html#content-models)
-               //
-               // Dont normalize if we are in a PRE-node or if the node is a 
mw:Entity SPAN
-               // Or if the node has no element-node child
-               if (!inPre &&
-                       !DU.isNodeOfType(node, 'span', 'mw:Entity') &&
-                       (!haveOrigSrc || DU.hasElementChild(node)))
-               {
-                       var prevSentinel = null,
-                               waitForSentinel = false,
-                               sepNodes = [],
-                               sepText = [];
-
-                       child = node.firstChild;
-                       while (child) {
-                               var nodeType = child.nodeType;
-
-                               next = child.nextSibling;
-
-                               // Delete empty text nodes
-                               if (nodeType === node.TEXT_NODE && child.data 
=== '') {
-                                       node.removeChild(child);
-                                       child = next;
-                                       continue;
-                               }
-
-                               if (!haveOrigSrc) {
-                                       if (nodeType === node.TEXT_NODE) {
-                                               str = child.data;
-                                               // Strip leading/trailing 
newlines if preceded by or
-                                               // followed by block nodes -- 
these newlines are syntactic
-                                               // and can be normalized away 
since the serializer in sourceless
-                                               // mode is tuned to normalize 
newlines.
-                                               if (str.match(/\n$/) && (!next 
|| DU.isBlockNode(next))) {
-                                                       child.data = 
str.replace(/\n+$/, '');
-                                               }
-                                           if (str.match(/^\n/) && (!prev || 
DU.isBlockNode(prev))) {
-                                                       child.data = 
str.replace(/^\n+/, '');
-                                               }
-                                       }
-                               } else {
-                                       switch (nodeType) {
-                                               case node.TEXT_NODE:
-                                                       str = child.data;
-                                                       if (!waitForSentinel && 
str.match(/^\s+$/)) {
-                                                               
sepText.push(str);
-                                                               
sepNodes.push(child);
-                                                       } else {
-                                                               prevSentinel = 
null;
-                                                               waitForSentinel 
= true;
-                                                       }
-                                                       break;
-
-                                               case node.COMMENT_NODE:
-                                                       if (!waitForSentinel) {
-                                                               
sepText.push("<!--");
-                                                               
sepText.push(child.data);
-                                                               
sepText.push("-->");
-                                                               
sepNodes.push(child);
-                                                       }
-                                                       break;
-
-                                               case node.ELEMENT_NODE:
-                                                       if (!waitForSentinel && 
DU.isMarkerMeta(child, "mw:DiffMarker")) {
-                                                               // Float 
"mw:DiffMarker" to the left till we bump into a sentinel
-                                                               
node.insertBefore(child, prevSentinel ? prevSentinel.nextSibling : 
node.firstChild);
-                                                               prevSentinel = 
child;
-                                                       } else {
-                                                               if 
(!waitForSentinel && sepNodes.length > 0) {
-                                                                       
setupSeparator(prevSentinel, child, sepNodes, sepText);
-                                                               }
-                                                               waitForSentinel 
= false;
-                                                               prevSentinel = 
child;
-                                                               sepNodes = [];
-                                                               sepText = [];
-                                                       }
-                                                       break;
-                                       }
-                               }
-
-                               child = next;
-                       }
-
-                       if (prevSentinel && sepNodes.length > 0) {
-                               setupSeparator(prevSentinel, null, sepNodes, 
sepText);
-                       }
                }
        }
 };
@@ -2461,7 +2444,7 @@
                        };
                }
 
-               this._serializeDOM( node, state );
+               this._serializeNode( node, state );
                this._serializeToken( state, new pd.EOFTk() );
 
                if ( finalCB && typeof finalCB === 'function' ) {
@@ -2520,7 +2503,7 @@
 }
 
 /**
- * Helper for handleSeparator
+ * Helper for doHandleSeparator
  *
  * Collects, checks and integrates separator newline requirements to a sinple
  * min, max structure.
@@ -2570,8 +2553,8 @@
  *
  * node handlers:
  *
- * node: {
- *     handle: function(state, node) {},
+ * body: {
+ *     handle: function(node, state, cb) {},
  *             // responsible for calling
  *     sepNls: {
  *             before: function(node) -> {min: 1, max: 2}
@@ -2582,7 +2565,7 @@
  * }
  *
  */
-WSP.handleSeparator = function( state, sep, cb, nodeA, handlerA, nodeB, 
handlerB) {
+WSP.doHandleSeparator = function( state, sep, cb, nodeA, handlerA, nodeB, 
handlerB) {
        var constraints,
                i;
        var sepHandlerA = handlerA && handlerA.sepNls || {},
@@ -2614,6 +2597,12 @@
 
                var sepMatch = sep.match(/\n/g),
                        sepNlCount = sepMatch && sepMatch.length || 0;
+
+               if (state.atStartOfOutput) {
+                       constraints.min--;
+                       state.atStartOfOutput = false;
+               }
+
                if (constraints.min && sepNlCount < constraints.min) {
                        for (i = 0; i < (constraints.min - sepNlCount); i++) {
                                sep += '\n';
@@ -2639,6 +2628,7 @@
                        // This avoids triggering pres
                        sep = 
sep.replace(/[^\n>]+(<!--(?:[^\-]+|-(?!->))*-->[^\n]*)?$/g, '$1');
                }
+               console.log(JSON.stringify(sep));
 
                cb(sep);
        }
@@ -2649,25 +2639,21 @@
  *
  * Called on a text node.
  */
-WSP.testHandleSeparator = function(node, state) {
-       // Gather IEW nodes into string, and call handleSeparator
-       var maybeSeparator = this.gatherSeparatorText(node),
-               prev = node.previousSibling || node.parentNode,
+WSP.handleSeparator = function(node, state, cb, maybeSeparator) {
+       // Gather IEW nodes into string, and call doHandleSeparator
+       var prev = node.previousSibling || node.parentNode,
                prevHandler = WSP.tagHandlers[prev.nodeName.toLowerCase()],
-               cb = function(sep) {
-                       console.log('NEW:', prev.outerHTML + sep + 
next.outerHTML);
-               },
                next, nextHandler,
                sepSrc = '';
        if (maybeSeparator) {
                //console.log('<origsep>', maybeSeparator.sepSrc, '</origsep>');
-               // call handleSeparator with separator string
+               // call doHandleSeparator with separator string
                next = maybeSeparator.nextElement;
                nextHandler = WSP.tagHandlers[next.nodeName.toLowerCase()];
                sepSrc = maybeSeparator.sepSrc;
        } else {
-               // call handleSeparator
-               next = node.nextSibling || node.parentNode;
+               // call doHandleSeparator
+               next = node;
                nextHandler = WSP.tagHandlers[next.nodeName.toLowerCase()];
        }
 
@@ -2676,11 +2662,13 @@
                        prev.nodeType === node.ELEMENT_NODE &&
                        next.nodeType === node.ELEMENT_NODE )
        {
+               console.log('calling doHandleSeparator', prev.outerHTML, 
next.outerHTML);
                // Looks like it.
-               this.handleSeparator(state, sepSrc, cb,
+               this.doHandleSeparator(state, sepSrc, cb,
                                                                prev, 
prevHandler,
                                                                next, 
nextHandler);
        }
+       return next;
 };
 
 
@@ -2688,271 +2676,78 @@
  * Internal worker. Recursively serialize a DOM subtree by creating tokens and
  * calling _serializeToken on each of these.
  */
-WSP._serializeDOM = function( node, state ) {
-       var newNLs;
+WSP._serializeNode = function( node, state, cb) {
+       cb = cb || state.chunkCB;
        // serialize this node
-       if (node.nodeType === node.ELEMENT_NODE) {
-               if (state.activeTemplateId &&
-                       state.activeTemplateId === node.getAttribute("about"))
-               {
-                       // skip -- template content
-                       return;
-               } else {
-                       state.activeTemplateId = null;
-               }
-
-               if (!state.activeTemplateId) {
-                       // Check if this node marks the start of template output
-                       // NOTE: Since we are deleting all mw:Object/**/End 
markers,
-                       // we need not verify if it is an End marker
-                       var typeofVal = node.getAttribute("typeof");
-                       if (typeofVal && 
typeofVal.match(/\bmw:Object(\/[^\s]+|\b)/)) {
-                               state.activeTemplateId = 
node.getAttribute("about") || "";
-                               var attrs = [ new pd.KV("typeof", 
"mw:TemplateSource") ];
-                               var dps = 
node.getAttribute("data-parsoid-serialize");
-                               if (dps) {
-                                       attrs.push(new 
pd.KV("data-parsoid-serialize", dps));
-                               }
-                               var dummyToken = new pd.SelfclosingTagTk("meta",
-                                       attrs,
-                                       { src: this._getDOMRTInfo(node).src }
-                               );
-
-                               if ( dps ) {
-                                       state.selser.serializeInfo = dps;
-                               }
-                               this._serializeToken(state, dummyToken);
-                               return;
-                       }
-               }
-       } else if (node.nodeType !== node.COMMENT_NODE) {
-               state.activeTemplateId = null;
-       }
-
-       var i, n, child, children;
-
        switch( node.nodeType ) {
                case node.ELEMENT_NODE:
                        var nodeName = node.nodeName.toLowerCase(),
                                tkAttribs = 
this._getDOMAttribs(node.attributes),
                                tkRTInfo = this._getDOMRTInfo(node),
-                               parentSTX = state.parentST1X;
+                               parentSTX = state.parentSTX;
 
                        // populate node.data.parsoid and 
node.data['parsoid-serialize']
                        DU.loadDataParsoid(node);
                        DU.loadDataAttrib(node, 'parsoid-serialize', null);
 
-                       children = node.childNodes;
-
-                       if (isHtmlBlockTag(nodeName)) {
-                               state.currLine = {
-                                       text: null,
-                                       numPieces: 0,
-                                       processed: false,
-                                       hasBracketPair: false,
-                                       hasHeadingPair: false
-                               };
+                       // Insert a possible separator
+                       var prev = node.previousSibling || node.parentNode;
+                       if (prev && prev.nodeType === node.ELEMENT_NODE) {
+                               this.handleSeparator(node, state, cb);
                        }
 
-                       var tailSrc = '';
-                       // Hack for link tail escaping- access to the next node 
is
-                       // difficult otherwise.
-                       // TODO: Implement this more cleanly!
-                       if ( nodeName === 'a' && node.getAttribute('rel') === 
'mw:WikiLink' ) {
-                               var dp = 
JSON.parse(node.getAttribute('data-parsoid') || '{}');
-                               if ( dp.stx !== 'html' &&
-                                       ! dp.tail &&
-                                       node.nextSibling && 
node.nextSibling.nodeType === node.TEXT_NODE &&
-                                       // TODO: use tokenizer
-                                       node.nextSibling.nodeValue &&
-                                       
node.nextSibling.nodeValue.match(/^[a-z]/) )
-                               {
-                                       tailSrc = '<nowiki/>';
-                               }
-                       }
-
-                       // Handle html-pres specially
-                       // 1. If the node has a leading newline, add one like 
it (logic copied from VE)
-                       // 2. If not, and it has a data-parsoid strippedNL 
flag, add it back.
-                       // This patched DOM will serialize html-pres correctly.
-                       //
-                       // FIXME: This code should be extracted into a 
DOMUtils.js file to be used
-                       // by the testing setup.
-                       if (nodeName === 'pre' && tkRTInfo.stx === 'html') {
-                               var modified = false;
-                               var fc = node.firstChild;
-                               if (fc && fc.nodeType === node.TEXT_NODE) {
-                                       var matches = 
fc.data.match(/^(\r\n|\r|\n)/);
-                                       if (matches) {
-                                               fc.insertData(0, matches[1]);
-                                               modified = true;
-                                       }
-                               }
-
-                               var strippedNL = tkRTInfo.strippedNL;
-                               if (!modified && strippedNL) {
-                                       if (fc && fc.nodeType === 
node.TEXT_NODE) {
-                                               fc.insertData(0, strippedNL);
-                                       } else {
-                                               
node.insertBefore(node.ownerDocument.createTextNode(strippedNL), fc);
-                                       }
-                               }
-                       }
 
                        var serializeInfo = null;
                        if ( state.selser.serializeInfo === null ) {
                                serializeInfo = node.data['parsoid-serialize'];
                                state.selser.serializeInfo = serializeInfo;
-                               state.chunkCB('', serializeInfo);
+                               cb('', serializeInfo);
                        }
 
                        // See if we have a DOM-based handler for this node
-                       var domHandler = this.getDOMHandler(state, node, 
state.chunkCB);
-                       if ( domHandler && domHandler.handle ) {
-
-                               // Update some state based on the serializer 
result
-                               var stateCB = state.chunkCB, // remember the 
current cb
-                                       cbWrapper = function (res, 
serializeInfo) {
-                                       if (res) {
-                                               state.onStartOfLine = 
res.match(/\n$/) ? true : false;
-                                       }
-                                       stateCB(res, serializeInfo);
-                               };
-
+                       var domHandler = this.getDOMHandler(node, state, 
state.chunkCB);
+                       if ( domHandler ) {
                                // DOM-based serialization
-                               domHandler.handle(state, node, cbWrapper);
-
-                               // Fake curToken state for token-based 
handlers. This is then
-                               // assigned to prevToken in _serializeToken.
-                               state.curToken = new pd.EndTagTk(nodeName, 
tkAttribs, tkRTInfo);
-                               state.prevToken = state.curToken;
-                               state.currTagToken = state.curToken;
-                               state.prevTagToken = state.curToken;
+                               domHandler(node, state, cb);
+                               // The handler is responsible for serializing 
its children
                        } else {
-                               // Token-based serialization
-
-                               // Serialize the start token
-                               var startToken = new pd.TagTk(nodeName, 
tkAttribs, tkRTInfo);
-                               this._serializeToken(state, startToken);
-
-                               // Newly created elements/tags in this list 
inherit their default
-                               // syntax from their parent scope
-                               var inheritSTXTags = { tbody:1, tr: 1, td: 1, 
li: 1, dd: 1, dt: 1 },
-                                       // These reset the inherited syntax no 
matter what
-                                       setSTXTags = { table: 1, ul: 1, ol: 1, 
dl: 1 },
-                                       // These (and inline elements) reset 
the default syntax to
-                                       // undefined
-                                       noHTMLSTXTags = {p: 1};
-
-                               // Set self to parent token if data-parsoid is 
set
-                               if ( Object.keys(tkRTInfo).length > 0 ||
-                                               setSTXTags[nodeName] ||
-                                               ! inheritSTXTags[nodeName] )
-                               {
-                                       if ( noHTMLSTXTags[nodeName] || ! 
Util.isBlockTag(nodeName) ) {
-                                               // Don't inherit stx in these
-                                               state.parentSTX = undefined;
-                                       } else {
-                                               state.parentSTX = tkRTInfo.stx;
-                                       }
-                               }
-
-                               // Clear out prevTagToken at each dom level
-                               var oldPrevToken = state.prevToken, 
oldPrevTagToken = state.prevTagToken;
-                               state.prevToken = null;
-                               state.prevTagToken = null;
-
-                               var prevEltChild = null;
-                               for (i = 0, n = children.length; i < n; i++) {
-                                       child = children[i];
-
-                                       // Ignore -- handled separately
-                                       if (DU.isMarkerMeta(child, 
"mw:Separator")) {
-                                               continue;
-                                       }
-
-                                       // Skip over comment, white-space text 
nodes, and tpl-content nodes
-                                       var nodeType = child.nodeType;
-                                       if (  nodeType !== node.COMMENT_NODE &&
-                                               !(nodeType === node.TEXT_NODE 
&& child.data.match(/^\s*$/)) &&
-                                               !(nodeType === 
node.ELEMENT_NODE &&
-                                                       state.activeTemplateId 
&&
-                                                       state.activeTemplateId 
=== child.getAttribute("about"))
-                                               )
-                                       {
-                                               if (child.nodeType === 
node.ELEMENT_NODE) {
-                                                       if (prevEltChild === 
null) {
-                                                               if 
(!DU.hasNodeName(node, "pre")) {
-                                                                       // 
extract separator text between node and child;
-                                                                       
state.emitSeparator(node, child, START_SEP);
-                                                               }
-                                                       } else if 
(prevEltChild.nodeType === node.ELEMENT_NODE) {
-                                                               if 
(!DU.hasNodeName(node, "pre")) {
-                                                                       // 
extract separator text between prevEltChild and child;
-                                                                       
state.emitSeparator(prevEltChild, child, IE_SEP);
-                                                               }
-                                                       }
-                                               }
-
-                                               prevEltChild = child;
-                                       }
-
-                                       this._serializeDOM( children[i], state 
);
-                               }
-
-                               if (prevEltChild && prevEltChild.nodeType === 
node.ELEMENT_NODE) {
-                                       // extract separator text between 
prevEltChild and node
-                                       if (!DU.hasNodeName(node, "pre")) {
-                                               
state.emitSeparator(prevEltChild, node, END_SEP);
-                                       }
-                               }
-
-                               // Reset parent state
-                               state.prevTagToken = oldPrevTagToken;
-                               state.prevToken = oldPrevToken;
-                               state.parentSTX = parentSTX;
-
-                               // then the end token
-                               this._serializeToken(state, new 
pd.EndTagTk(nodeName, tkAttribs, tkRTInfo));
-
-                               if ( tailSrc ) {
-                                       // emit the tail
-                                       state.chunkCB( tailSrc, 
state.selser.serializeInfo );
-                               }
+                               // Used to be token-based serialization
+                               console.error('no dom handler found for', 
node.outerHTML);
                        }
 
-                       if ( serializeInfo !== null ) {
-                               state.selser.serializeInfo = null;
+                       // Insert end separator
+                       if (node && node.nodeType === node.ELEMENT_NODE && 
!node.nextSibling) {
+                               var next = node.parentNode;
+                               if (next) {
+                                       this.doHandleSeparator(state, '', cb,
+                                                       node, 
WSP.tagHandlers[node.nodeName.toLowerCase()],
+                                                       next, 
WSP.tagHandlers[next.nodeName.toLowerCase()]);
+                               }
                        }
 
                        break;
                case node.TEXT_NODE:
-                       if (state.currLine.text === null) {
-                               var buf = [],
-                                       bn = firstBlockNodeAncestor(node);
-
-                               children = bn.childNodes;
-                               for (i = 0, n = children.length; i < n; i++) {
-                                       gatherInlineText(buf, children[i]);
-                               }
-                               state.currLine.numPieces = n;
-                               state.currLine.text = buf.join('');
-                       }
-
                        // Test the separator handler, but don't use it yet.
                        //this.testHandleSeparator(node, state);
+                       var maybeSeparator = this.gatherSeparatorText(node);
 
-                       this._serializeToken( state, node.data );
+                       if (maybeSeparator) {
+                               node = this.handleSeparator(node, state, cb, 
maybeSeparator);
+                       } else {
+                               // regular serialization
+                               this._serializeText(node.data, state, 
state.chunkCB );
+                       }
                        break;
                case node.COMMENT_NODE:
                        // delay the newline creation until after the comment
-                       this._serializeToken( state, new pd.CommentTk( 
node.data ) );
+                       state.chunkCB( '<!--' + node.data.replace(/-->/, 
'--&gt;') + '-->' );
                        break;
                default:
                        console.warn( "Unhandled node type: " +
                                        node.outerHTML );
                        break;
        }
+       return node;
 };
 
 if (typeof module === "object") {

-- 
To view, visit https://gerrit.wikimedia.org/r/51333
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I67463a12fd200f3a9f15c55658d5efda5e334d83
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] WIP: DOM-based WTS refactor - change (mediawiki...Parsoid)

Reply via email to