Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/257688

Change subject: Remove wts buffering and move bullet emitting where it belongs
......................................................................

Remove wts buffering and move bullet emitting where it belongs

 * Blacklist changes are from not emitting a space when no bullets are
   returned.

Change-Id: I9cf2a8d4127a831be6a0eb96135a472a19d0c7a7
---
M lib/html2wt/DOMHandlers.js
M lib/html2wt/SerializerState.js
M tests/parserTests-blacklist.js
3 files changed, 13 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/88/257688/1

diff --git a/lib/html2wt/DOMHandlers.js b/lib/html2wt/DOMHandlers.js
index 17763cc..8ffbc6a 100644
--- a/lib/html2wt/DOMHandlers.js
+++ b/lib/html2wt/DOMHandlers.js
@@ -166,7 +166,8 @@
                node = node.parentNode;
        }
 
-       return res + space;
+       // Don't emit a space if we aren't returning any bullets.
+       return res.length ? res + space : '';
 }
 
 function wtListEOL(node, otherNode) {
@@ -273,8 +274,6 @@
 
                        if (!firstChildElt || !(firstChildElt.nodeName in 
firstChildNames)) {
                                state.emitChunk(getListBullets(state, node), 
node);
-                       } else if (isTplListWithoutSharedPrefix(firstChildElt)) 
{
-                               state.bufferChunk(getListBullets(state, node));
                        }
 
                        var liHandler = state.serializer.wteHandlers.liHandler
@@ -318,8 +317,6 @@
                        var chunk = (stx === 'row') ? ':' : 
getListBullets(state, node);
                        if (!DU.isList(firstChildElement)) {
                                state.emitChunk(chunk, node);
-                       } else if 
(isTplListWithoutSharedPrefix(firstChildElement)) {
-                               state.bufferChunk(chunk);
                        }
                        var liHandler = state.serializer.wteHandlers.liHandler
                                        .bind(state.serializer.wteHandlers, 
node);
@@ -549,8 +546,6 @@
                        var firstChildElement = DU.firstNonSepChildNode(node);
                        if (!DU.isList(firstChildElement)) {
                                state.emitChunk(getListBullets(state, node), 
node);
-                       } else if 
(isTplListWithoutSharedPrefix(firstChildElement)) {
-                               state.bufferChunk(getListBullets(state, node));
                        }
                        var liHandler = state.serializer.wteHandlers.liHandler
                                        .bind(state.serializer.wteHandlers, 
node);
@@ -585,8 +580,6 @@
                        var firstChildElement = DU.firstNonSepChildNode(node);
                        if (!DU.isList(firstChildElement)) {
                                state.emitChunk(getListBullets(state, node), 
node);
-                       } else if 
(isTplListWithoutSharedPrefix(firstChildElement)) {
-                               state.bufferChunk(getListBullets(state, node));
                        }
                        var liHandler = state.serializer.wteHandlers.liHandler
                                        .bind(state.serializer.wteHandlers, 
node);
@@ -1305,6 +1298,14 @@
                return {
                        handle: Promise.method(function(node, state, 
wrapperUnmodified) {
                                var src, p, dataMW;
+                               var bullets = '';
+                               if (DU.isListOrListItem(node) &&
+                                               // FIXME: skip builder-inserted 
wrappers
+                                               
(DU.firstNonSepChildNode(node.parentNode) === node) &&
+                                               
isTplListWithoutSharedPrefix(node)) {
+                                       // FIXME: row dd, but probably never 
the case
+                                       bullets = getListBullets(state, 
node.parentNode);
+                               }
                                if 
(/(?:^|\s)mw:Transclusion(?=$|\s)/.test(typeOf)) {
                                        dataMW = DU.getDataMw(node);
                                        if (dataMW.parts) {
@@ -1368,7 +1369,7 @@
 
                                return p.then(function(s) {
                                        state.singleLineContext.disable();
-                                       self.emitWikitext(s, node);
+                                       self.emitWikitext(bullets + s, node);
                                        state.singleLineContext.pop();
                                        return 
DU.skipOverEncapsulatedContent(node);
                                });
diff --git a/lib/html2wt/SerializerState.js b/lib/html2wt/SerializerState.js
index fd668eb..61b8c8c 100644
--- a/lib/html2wt/SerializerState.js
+++ b/lib/html2wt/SerializerState.js
@@ -77,18 +77,6 @@
  *
  * singleLineContext
  *    Stack used to enforce single-line context
- *
- * buffer
- *    Buffered wikitext for scenarios where a single separator applies
- *    to multiple nodes and we don't want to emit the separator prematurely.
- *
- *    Ex: In list handlers, for nested lists, we implicitly buffer bullets by
- *    not emitting anything for parent lists. But, that implicit buffering
- *    is insufficient by itself for scenarios where templates are involved.
- *
- *    TODO: For followup patches, to make all buffering explicit instead of
- *    the implicit buffering currently in place in list and list item
- *    handlers.
  * ********************************************************************* */
 
 var initialState = {
@@ -107,7 +95,6 @@
        wteHandlerStack: [],
        // XXX: replace with output buffering per line
        currLine: null,
-       buffer: '',
        out: '',
        logPrefix: 'OUT:',
 };
@@ -286,16 +273,8 @@
                function() { return JSON.stringify(sep); });
 };
 
-// Buffer this chunk so that it can be emitted with the next emitChunk
-// along with any necessary separators before it.
-SSP.bufferChunk = function(res) {
-       this.buffer += res;
-};
-
 SSP.emitChunk = function(res, node) {
        res = ConstrainedText.cast(res, node);
-       res.text = this.buffer + res.text;
-       this.buffer = '';
 
        // Replace newlines if we're in a single-line context
        if (this.singleLineContext.enforced()) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 95c011c..7a59f66 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -466,7 +466,7 @@
 add("html2html", "Failing to transform badly formed HTML into correct XHTML", 
"\n\n<p data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[2,2,0,0]}'><br 
data-parsoid='{\"dsr\":[2,2,0,0]}'/></p>\n");
 add("html2html", "Handling html with a br self-closing tag", "\n\n<p 
data-parsoid='{\"dsr\":[2,2,0,0]}'><br 
data-parsoid='{\"dsr\":[2,2,0,0]}'/></p>\n\n\n<p 
data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[5,5,0,0]}'><br 
data-parsoid='{\"dsr\":[5,5,0,0]}'/></p>\n");
 add("html2html", "Unbalanced closing non-block tags don't break a list\n(php 
parser relies on Tidy to fix up)", "<p data-parsoid='{\"dsr\":[0,6,0,0]}'><span 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[0,6,6,0]}'></span></p>\n\n<ul
 data-parsoid='{\"dsr\":[8,28,0,0]}'><li data-parsoid='{\"dsr\":[8,24,1,0]}'> 
a<span data-parsoid='{\"stx\":\"html\",\"dsr\":[11,24,6,7]}'></span></li>\n<li 
data-parsoid='{\"dsr\":[25,28,1,0]}'> b</li></ul>\n\n<p 
data-parsoid='{\"dsr\":[30,37,0,0]}'></p>");
-add("html2html", "2. List embedded in a formatting tag", "<p 
data-parsoid='{\"dsr\":[0,15,0,0]}'><small 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,15,7,8]}'></small></p>\n<pre 
data-parsoid='{\"dsr\":[16,25,1,0]}'><small 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[17,25,7,0]}'></small></pre><small
 
data-parsoid='{\"stx\":\"html\",\"autoInsertedStart\":true,\"dsr\":[25,37,0,8]}'><ul
 data-parsoid='{\"dsr\":[25,28,0,0]}'><li data-parsoid='{\"dsr\":[25,28,1,0]}'> 
a</li></ul>\n</small>\n<ul data-parsoid='{\"dsr\":[38,56,0,0]}'><li 
data-parsoid='{\"dsr\":[38,56,1,0]}'> <small 
data-parsoid='{\"stx\":\"html\",\"dsr\":[40,56,7,8]}'>b</small></li></ul>\n");
+add("html2html", "2. List embedded in a formatting tag", "<p 
data-parsoid='{\"dsr\":[0,23,0,0]}'><small 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,15,7,8]}'></small>\n<small 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[16,23,7,0]}'></small></p><small
 
data-parsoid='{\"stx\":\"html\",\"autoInsertedStart\":true,\"dsr\":[23,36,0,8]}'>\n<ul
 data-parsoid='{\"dsr\":[24,27,0,0]}'><li data-parsoid='{\"dsr\":[24,27,1,0]}'> 
a</li></ul>\n</small>\n<ul data-parsoid='{\"dsr\":[37,55,0,0]}'><li 
data-parsoid='{\"dsr\":[37,55,1,0]}'> <small 
data-parsoid='{\"stx\":\"html\",\"dsr\":[39,55,7,8]}'>b</small></li></ul>\n");
 add("html2html", "Case-sensitive magic words, when cased differently, should 
just be template transclusions", "<p 
data-parsoid='{\"dsr\":[0,331,0,0]}'>[/index.php?title=Template:CurrentMonth&amp;action=edit&amp;redlink=1
 
Template:CurrentMonth]\n[/index.php?title=Template:Currentday&amp;action=edit&amp;redlink=1
 
Template:Currentday]\n[/index.php?title=Template:CURreNTweEK&amp;action=edit&amp;redlink=1
 
Template:CURreNTweEK]\n[/index.php?title=Template:CurrentHour&amp;action=edit&amp;redlink=1
 Template:CurrentHour]</p>\n");
 add("html2html", "Nonexistent template", "<p 
data-parsoid='{\"dsr\":[0,108,0,0]}'>[/index.php?title=Template:Thistemplatedoesnotexist&amp;action=edit&amp;redlink=1
 Template:Thistemplatedoesnotexist]</p>\n");
 add("html2html", "Template with invalid target containing wikilink", "<p 
data-parsoid='{\"dsr\":[0,17,0,0]}'>{{<a rel=\"mw:WikiLink\" 
href=\"./Main_Page\" title=\"Main Page\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page\"},\"sa\":{\"href\":\"Main
 Page\"},\"dsr\":[2,15,2,2]}'>Main Page</a>}}</p>\n");
@@ -850,7 +850,7 @@
 add("html2wt", "Unbalanced closing block tags break a list\n(php parser relies 
on Tidy to fix up)", "<div>\n\n* a\n\n</div>\n<div>\n\n* b\n\n</div>");
 add("html2wt", "Unbalanced closing non-block tags don't break a list\n(php 
parser relies on Tidy to fix up)", "<span>\n\n* a<span></span>\n* 
b\n\n</span>");
 add("html2wt", "1. List embedded in a formatting tag", "<small>\n\n* 
foo\n\n</small>");
-add("html2wt", "2. List embedded in a formatting tag", "<small></small>\n 
<small>\n* a\n</small>\n* <small>b</small>\n");
+add("html2wt", "2. List embedded in a formatting tag", 
"<small></small>\n<small>\n* a\n</small>\n* <small>b</small>\n");
 add("html2wt", "Magic Word: {{CURRENTDAY}}", "1\n");
 add("html2wt", "Magic Word: {{CURRENTDAY2}}", "01\n");
 add("html2wt", "Magic Word: {{CURRENTDAYNAME}}", "Thursday\n");

-- 
To view, visit https://gerrit.wikimedia.org/r/257688
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9cf2a8d4127a831be6a0eb96135a472a19d0c7a7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to