Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/257688
Change subject: Remove wts buffering and move bullet emitting where it belongs
......................................................................
Remove wts buffering and move bullet emitting where it belongs
* Blacklist changes are from not emitting a space when no bullets are
returned.
Change-Id: I9cf2a8d4127a831be6a0eb96135a472a19d0c7a7
---
M lib/html2wt/DOMHandlers.js
M lib/html2wt/SerializerState.js
M tests/parserTests-blacklist.js
3 files changed, 13 insertions(+), 33 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/88/257688/1
diff --git a/lib/html2wt/DOMHandlers.js b/lib/html2wt/DOMHandlers.js
index 17763cc..8ffbc6a 100644
--- a/lib/html2wt/DOMHandlers.js
+++ b/lib/html2wt/DOMHandlers.js
@@ -166,7 +166,8 @@
node = node.parentNode;
}
- return res + space;
+ // Don't emit a space if we aren't returning any bullets.
+ return res.length ? res + space : '';
}
function wtListEOL(node, otherNode) {
@@ -273,8 +274,6 @@
if (!firstChildElt || !(firstChildElt.nodeName in
firstChildNames)) {
state.emitChunk(getListBullets(state, node),
node);
- } else if (isTplListWithoutSharedPrefix(firstChildElt))
{
- state.bufferChunk(getListBullets(state, node));
}
var liHandler = state.serializer.wteHandlers.liHandler
@@ -318,8 +317,6 @@
var chunk = (stx === 'row') ? ':' :
getListBullets(state, node);
if (!DU.isList(firstChildElement)) {
state.emitChunk(chunk, node);
- } else if
(isTplListWithoutSharedPrefix(firstChildElement)) {
- state.bufferChunk(chunk);
}
var liHandler = state.serializer.wteHandlers.liHandler
.bind(state.serializer.wteHandlers,
node);
@@ -549,8 +546,6 @@
var firstChildElement = DU.firstNonSepChildNode(node);
if (!DU.isList(firstChildElement)) {
state.emitChunk(getListBullets(state, node),
node);
- } else if
(isTplListWithoutSharedPrefix(firstChildElement)) {
- state.bufferChunk(getListBullets(state, node));
}
var liHandler = state.serializer.wteHandlers.liHandler
.bind(state.serializer.wteHandlers,
node);
@@ -585,8 +580,6 @@
var firstChildElement = DU.firstNonSepChildNode(node);
if (!DU.isList(firstChildElement)) {
state.emitChunk(getListBullets(state, node),
node);
- } else if
(isTplListWithoutSharedPrefix(firstChildElement)) {
- state.bufferChunk(getListBullets(state, node));
}
var liHandler = state.serializer.wteHandlers.liHandler
.bind(state.serializer.wteHandlers,
node);
@@ -1305,6 +1298,14 @@
return {
handle: Promise.method(function(node, state,
wrapperUnmodified) {
var src, p, dataMW;
+ var bullets = '';
+ if (DU.isListOrListItem(node) &&
+ // FIXME: skip builder-inserted
wrappers
+
(DU.firstNonSepChildNode(node.parentNode) === node) &&
+
isTplListWithoutSharedPrefix(node)) {
+ // FIXME: row dd, but probably never
the case
+ bullets = getListBullets(state,
node.parentNode);
+ }
if
(/(?:^|\s)mw:Transclusion(?=$|\s)/.test(typeOf)) {
dataMW = DU.getDataMw(node);
if (dataMW.parts) {
@@ -1368,7 +1369,7 @@
return p.then(function(s) {
state.singleLineContext.disable();
- self.emitWikitext(s, node);
+ self.emitWikitext(bullets + s, node);
state.singleLineContext.pop();
return
DU.skipOverEncapsulatedContent(node);
});
diff --git a/lib/html2wt/SerializerState.js b/lib/html2wt/SerializerState.js
index fd668eb..61b8c8c 100644
--- a/lib/html2wt/SerializerState.js
+++ b/lib/html2wt/SerializerState.js
@@ -77,18 +77,6 @@
*
* singleLineContext
* Stack used to enforce single-line context
- *
- * buffer
- * Buffered wikitext for scenarios where a single separator applies
- * to multiple nodes and we don't want to emit the separator prematurely.
- *
- * Ex: In list handlers, for nested lists, we implicitly buffer bullets by
- * not emitting anything for parent lists. But, that implicit buffering
- * is insufficient by itself for scenarios where templates are involved.
- *
- * TODO: For followup patches, to make all buffering explicit instead of
- * the implicit buffering currently in place in list and list item
- * handlers.
* ********************************************************************* */
var initialState = {
@@ -107,7 +95,6 @@
wteHandlerStack: [],
// XXX: replace with output buffering per line
currLine: null,
- buffer: '',
out: '',
logPrefix: 'OUT:',
};
@@ -286,16 +273,8 @@
function() { return JSON.stringify(sep); });
};
-// Buffer this chunk so that it can be emitted with the next emitChunk
-// along with any necessary separators before it.
-SSP.bufferChunk = function(res) {
- this.buffer += res;
-};
-
SSP.emitChunk = function(res, node) {
res = ConstrainedText.cast(res, node);
- res.text = this.buffer + res.text;
- this.buffer = '';
// Replace newlines if we're in a single-line context
if (this.singleLineContext.enforced()) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 95c011c..7a59f66 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -466,7 +466,7 @@
add("html2html", "Failing to transform badly formed HTML into correct XHTML",
"\n\n<p data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[2,2,0,0]}'><br
data-parsoid='{\"dsr\":[2,2,0,0]}'/></p>\n");
add("html2html", "Handling html with a br self-closing tag", "\n\n<p
data-parsoid='{\"dsr\":[2,2,0,0]}'><br
data-parsoid='{\"dsr\":[2,2,0,0]}'/></p>\n\n\n<p
data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[5,5,0,0]}'><br
data-parsoid='{\"dsr\":[5,5,0,0]}'/></p>\n");
add("html2html", "Unbalanced closing non-block tags don't break a list\n(php
parser relies on Tidy to fix up)", "<p data-parsoid='{\"dsr\":[0,6,0,0]}'><span
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[0,6,6,0]}'></span></p>\n\n<ul
data-parsoid='{\"dsr\":[8,28,0,0]}'><li data-parsoid='{\"dsr\":[8,24,1,0]}'>
a<span data-parsoid='{\"stx\":\"html\",\"dsr\":[11,24,6,7]}'></span></li>\n<li
data-parsoid='{\"dsr\":[25,28,1,0]}'> b</li></ul>\n\n<p
data-parsoid='{\"dsr\":[30,37,0,0]}'></p>");
-add("html2html", "2. List embedded in a formatting tag", "<p
data-parsoid='{\"dsr\":[0,15,0,0]}'><small
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,15,7,8]}'></small></p>\n<pre
data-parsoid='{\"dsr\":[16,25,1,0]}'><small
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[17,25,7,0]}'></small></pre><small
data-parsoid='{\"stx\":\"html\",\"autoInsertedStart\":true,\"dsr\":[25,37,0,8]}'><ul
data-parsoid='{\"dsr\":[25,28,0,0]}'><li data-parsoid='{\"dsr\":[25,28,1,0]}'>
a</li></ul>\n</small>\n<ul data-parsoid='{\"dsr\":[38,56,0,0]}'><li
data-parsoid='{\"dsr\":[38,56,1,0]}'> <small
data-parsoid='{\"stx\":\"html\",\"dsr\":[40,56,7,8]}'>b</small></li></ul>\n");
+add("html2html", "2. List embedded in a formatting tag", "<p
data-parsoid='{\"dsr\":[0,23,0,0]}'><small
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,15,7,8]}'></small>\n<small
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[16,23,7,0]}'></small></p><small
data-parsoid='{\"stx\":\"html\",\"autoInsertedStart\":true,\"dsr\":[23,36,0,8]}'>\n<ul
data-parsoid='{\"dsr\":[24,27,0,0]}'><li data-parsoid='{\"dsr\":[24,27,1,0]}'>
a</li></ul>\n</small>\n<ul data-parsoid='{\"dsr\":[37,55,0,0]}'><li
data-parsoid='{\"dsr\":[37,55,1,0]}'> <small
data-parsoid='{\"stx\":\"html\",\"dsr\":[39,55,7,8]}'>b</small></li></ul>\n");
add("html2html", "Case-sensitive magic words, when cased differently, should
just be template transclusions", "<p
data-parsoid='{\"dsr\":[0,331,0,0]}'>[/index.php?title=Template:CurrentMonth&action=edit&redlink=1
Template:CurrentMonth]\n[/index.php?title=Template:Currentday&action=edit&redlink=1
Template:Currentday]\n[/index.php?title=Template:CURreNTweEK&action=edit&redlink=1
Template:CURreNTweEK]\n[/index.php?title=Template:CurrentHour&action=edit&redlink=1
Template:CurrentHour]</p>\n");
add("html2html", "Nonexistent template", "<p
data-parsoid='{\"dsr\":[0,108,0,0]}'>[/index.php?title=Template:Thistemplatedoesnotexist&action=edit&redlink=1
Template:Thistemplatedoesnotexist]</p>\n");
add("html2html", "Template with invalid target containing wikilink", "<p
data-parsoid='{\"dsr\":[0,17,0,0]}'>{{<a rel=\"mw:WikiLink\"
href=\"./Main_Page\" title=\"Main Page\"
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page\"},\"sa\":{\"href\":\"Main
Page\"},\"dsr\":[2,15,2,2]}'>Main Page</a>}}</p>\n");
@@ -850,7 +850,7 @@
add("html2wt", "Unbalanced closing block tags break a list\n(php parser relies
on Tidy to fix up)", "<div>\n\n* a\n\n</div>\n<div>\n\n* b\n\n</div>");
add("html2wt", "Unbalanced closing non-block tags don't break a list\n(php
parser relies on Tidy to fix up)", "<span>\n\n* a<span></span>\n*
b\n\n</span>");
add("html2wt", "1. List embedded in a formatting tag", "<small>\n\n*
foo\n\n</small>");
-add("html2wt", "2. List embedded in a formatting tag", "<small></small>\n
<small>\n* a\n</small>\n* <small>b</small>\n");
+add("html2wt", "2. List embedded in a formatting tag",
"<small></small>\n<small>\n* a\n</small>\n* <small>b</small>\n");
add("html2wt", "Magic Word: {{CURRENTDAY}}", "1\n");
add("html2wt", "Magic Word: {{CURRENTDAY2}}", "01\n");
add("html2wt", "Magic Word: {{CURRENTDAYNAME}}", "Thursday\n");
--
To view, visit https://gerrit.wikimedia.org/r/257688
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I9cf2a8d4127a831be6a0eb96135a472a19d0c7a7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits