jenkins-bot has submitted this change and it was merged.
Change subject: Refactor wts to emphasize cb is an accumulator
......................................................................
Refactor wts to emphasize cb is an accumulator
* The passing test is from preserving `atStartOfOutput` after the
serializeToString use.
Change-Id: I8e3b6c60c97d58f63b0d52e030b4c51055060bc6
---
M lib/html2wt/ConstrainedText.js
M lib/html2wt/SerializerState.js
M lib/html2wt/WikitextSerializer.js
M tests/parserTests-blacklist.js
4 files changed, 33 insertions(+), 34 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/html2wt/ConstrainedText.js b/lib/html2wt/ConstrainedText.js
index 85cc33e..1068755 100644
--- a/lib/html2wt/ConstrainedText.js
+++ b/lib/html2wt/ConstrainedText.js
@@ -27,7 +27,7 @@
* processing).
* @private
*/
-var escapeLine = function(line, cb) {
+var escapeLine = function(line) {
// The left context will be precise (that is, it is the result
// of `ConstrainedText#escape` and will include any escapes
// triggered by chunks on the left), but the right context
@@ -51,12 +51,8 @@
// Process the escapes for this chunk, given escaped previous
chunk
state.rightContext =
state.rightContext.slice(chunk.text.length);
var thisEscape = chunk.escape(state);
- var origPos = state.leftContext.length;
state.leftContext +=
(thisEscape.prefix || '') + thisEscape.text +
(thisEscape.suffix || '');
- if (cb) {
- cb(state.leftContext.slice(origPos), chunk.node);
- }
if (thisEscape.greedy) {
// protect the left context: this will be matched
greedily
// by this chunk, so there's no chance that a subsequent
diff --git a/lib/html2wt/SerializerState.js b/lib/html2wt/SerializerState.js
index ee69f37..c9d2f54 100644
--- a/lib/html2wt/SerializerState.js
+++ b/lib/html2wt/SerializerState.js
@@ -132,6 +132,7 @@
Util.extendProps(this, Util.clone(options), Util.clone(initialState));
this.resetCurrLine(null);
this.singleLineContext = new SingleLineContext();
+ this.out = '';
}
var SSP = SerializerState.prototype;
@@ -151,8 +152,8 @@
/**
*/
-SSP.flushLine = function(cb) {
- escapeLine(this.currLine.chunks, cb);
+SSP.flushLine = function() {
+ this.out += escapeLine(this.currLine.chunks);
this.currLine.chunks.length = 0;
};
@@ -244,7 +245,7 @@
/**
*/
-SSP.emitSepAndOutput = function(res, node, cb, logPrefix) {
+SSP.emitSepAndOutput = function(res, node, logPrefix) {
res = ConstrainedText.cast(res, node);
// Replace newlines if we're in a single-line context
@@ -327,7 +328,7 @@
if (this.onSOL) {
// process escapes in our full line
- this.flushLine(cb);
+ this.flushLine();
this.resetCurrLine(node);
}
@@ -432,6 +433,9 @@
if (!solRE.test(res)) {
this.onSOL = false;
}
+
+ // We've emit something so we're no longer at SOO.
+ this.atStartOfOutput = false;
};
/**
@@ -441,29 +445,36 @@
SSP._serializeChildrenToString = function(node, wtEscaper, inState) {
// FIXME: Make sure that the separators emitted here conform to the
// syntactic constraints of syntactic context.
- var bits = '';
var oldSep = this.sep;
var oldSOL = this.onSOL;
+ var oldOut = this.out;
+ var oldStart = this.atStartOfOutput;
var oldChunks = this.currLine.chunks;
- // appendToBits just ignores anything returned but
- // the source, but that is fine. Selser etc is handled in
- // the top level callback at a slightly coarser level.
- var appendToBits = function(out) { bits += out; };
var self = this;
var cb = function(res, node) {
- self.emitSepAndOutput(res, node, appendToBits, "OUT(C):");
+ self.emitSepAndOutput(res, node, "OUT(C):");
};
+
+ this.out = '';
this.sep = {};
this.onSOL = false;
+ this.atStartOfOutput = false;
this.currLine.chunks = [];
this[inState] = true;
+
this.serializeChildren(node, cb, wtEscaper);
- this.flushLine(appendToBits);
- self.serializer.buildAndEmitSep(this, appendToBits, node);
+ // Emit child-parent seps,
+ cb('', node);
+ // We've reached EOF, flush the remaining buffered text.
+ this.flushLine();
+
// restore the state
+ var bits = this.out;
+ this.out = oldOut;
this[inState] = false;
this.sep = oldSep;
this.onSOL = oldSOL;
+ this.atStartOfOutput = oldStart;
this.currLine.chunks = oldChunks;
return bits;
};
diff --git a/lib/html2wt/WikitextSerializer.js
b/lib/html2wt/WikitextSerializer.js
index 5905c6e..cbb345e 100644
--- a/lib/html2wt/WikitextSerializer.js
+++ b/lib/html2wt/WikitextSerializer.js
@@ -1366,34 +1366,27 @@
return "--- DOM --- \n" + body.outerHTML + "\n-----------";
});
- var out = '';
-
state.sep.lastSourceNode = body;
state.currLine.firstNode = body.firstChild;
// Wrapper CB for every chunk that emits any required separators
// before emitting the chunk itself.
- var chunkCBWrapper = function(chunk, node, atEOF) {
- var accum = function(o) { out += o; };
- state.emitSepAndOutput(chunk, node, accum, "OUT:");
- state.atStartOfOutput = false;
- if (atEOF === 'EOF') {
- state.flushLine(accum);
- }
+ var chunkCBWrapper = function(chunk, node) {
+ state.emitSepAndOutput(chunk, node, "OUT:");
};
- // Kick it off
state.serializeChildren(body, chunkCBWrapper);
-
- // Handle EOF
- chunkCBWrapper('', body, 'EOF');
+ // Emit child-parent seps.
+ chunkCBWrapper('', body);
+ // We've reached EOF, flush the remaining buffered text.
+ state.flushLine();
if (state.hasIndentPreNowikis) {
// FIXME: Perhaps this can be done on a per-line basis
// rather than do one post-pass on the entire document.
//
// Strip excess/useless nowikis
- out = stripUnnecessaryIndentPreNowikis(this.env, out);
+ state.out = stripUnnecessaryIndentPreNowikis(this.env,
state.out);
}
if (state.hasQuoteNowikis) {
@@ -1401,10 +1394,10 @@
// rather than do one post-pass on the entire document.
//
// Strip excess/useless nowikis
- out = stripUnnecessaryQuoteNowikis(out);
+ state.out = stripUnnecessaryQuoteNowikis(state.out);
}
- return out;
+ return state.out;
};
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 2fee96d..30c9a80 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -526,7 +526,6 @@
add("html2html", "Link to image page- image page normally doesn't exists,
hence edit link\nAdd test with existing image page\n#<p><a
href=\"/wiki/File:Test\" title=\"Image:Test\">Image:test</a>", "<p
data-parsoid='{\"dsr\":[0,61,0,0]}'>[/index.php?title=File:Test&action=edit&redlink=1
Image:test]</p>\n");
add("html2html", "bug 18784 Link to non-existent image page with caption
should use caption as link text", "<p
data-parsoid='{\"dsr\":[0,58,0,0]}'>[/index.php?title=File:Test&action=edit&redlink=1
caption]</p>\n");
add("html2html", "SVG thumbnails with invalid language code", "<figure
class=\"mw-default-size\" typeof=\"mw:Image/Thumb\"
data-parsoid='{\"optList\":[{\"ck\":\"thumbnail\",\"ak\":\"thumb\"},{\"ck\":\"lang\",\"ak\":\"lang=invalid.language.code\"}],\"dsr\":[0,52,2,2]}'><a
href=\"./File:Foobar.svg\"
data-parsoid='{\"a\":{\"href\":\"./File:Foobar.svg\"},\"sa\":{},\"dsr\":[2,50,null,null]}'><img
resource=\"./File:Foobar.svg\"
src=\"//example.com/images/thumb/f/ff/Foobar.svg/220px-Foobar.svg\"
lang=\"invalid.language.code\" data-file-width=\"240\" data-file-height=\"180\"
data-file-type=\"drawing\" height=\"165\" width=\"220\"
data-parsoid='{\"a\":{\"resource\":\"./File:Foobar.svg\",\"lang\":\"invalid.language.code\",\"height\":\"165\",\"width\":\"220\"},\"sa\":{\"resource\":\"File:Foobar.svg\",\"lang\":\"lang=invalid.language.code\"}}'/></a></figure>\n");
-add("html2html", "Image: caption containing a table", "<figure
typeof=\"mw:Image/Thumb\"
data-parsoid='{\"optList\":[{\"ck\":\"thumbnail\",\"ak\":\"thumb\"},{\"ck\":\"width\",\"ak\":\"200x200px\"},{\"ck\":\"bogus\",\"ak\":\"This
is an example image thumbnail caption with a
table\\n{\"},{\"ck\":\"bogus\",\"ak\":\"\\n\\n!Foo
!Bar\\n\"},{\"ck\":\"bogus\",\"ak\":\"-\\n\"},{\"ck\":\"bogus\",\"ak\":\"Foo1
\\n\"},{\"ck\":\"bogus\",\"ak\":\"Bar1\"},{\"ck\":\"caption\",\"ak\":\"}\\nand
some more text.\"}],\"dsr\":[0,143,2,2]}'><a href=\"./File:Foobar.jpg\"
data-parsoid='{\"a\":{\"href\":\"./File:Foobar.jpg\"},\"sa\":{},\"dsr\":[2,null,null,null]}'><img
resource=\"./File:Foobar.jpg\"
src=\"//example.com/images/thumb/3/3a/Foobar.jpg/200px-Foobar.jpg\"
data-file-width=\"1941\" data-file-height=\"220\" data-file-type=\"bitmap\"
height=\"23\" width=\"200\"
data-parsoid='{\"a\":{\"resource\":\"./File:Foobar.jpg\",\"height\":\"23\",\"width\":\"200\"},\"sa\":{\"resource\":\"File:Foobar.jpg\"}}'/></a><figcaption
data-parsoid='{\"dsr\":[null,141,null,null]}'>}\nand some more
text.</figcaption></figure>\n");
add("html2html", "T93580: 3. Templated <ref> inside inline images", "<p
data-parsoid='{\"dsr\":[0,98,0,0]}'><span class=\"mw-default-size\"
typeof=\"mw:Image\"
data-parsoid='{\"optList\":[{\"ck\":\"caption\",\"ak\":\"Undisplayed caption in
inline image with ref:
{{echo|<ref>{{echo|foo}}</ref>}}\"}],\"dsr\":[0,98,null,null]}'
data-mw='{\"caption\":\"Undisplayed caption in inline image with ref: <span
about=\\\"#mwt3\\\" class=\\\"mw-ref\\\" id=\\\"cite_ref-1\\\"
rel=\\\"dc:references\\\" typeof=\\\"mw:Transclusion mw:Extension/ref\\\"
data-parsoid=\\\"{&quot;dsr&quot;:[64,96,null,null],&quot;pi&quot;:[[{&quot;k&quot;:&quot;1&quot;,&quot;spc&quot;:[&quot;&quot;,&quot;&quot;,&quot;&quot;,&quot;&quot;]}]]}\\\"
data-mw=\\\"{&quot;parts&quot;:[{&quot;template&quot;:{&quot;target&quot;:{&quot;wt&quot;:&quot;echo&quot;,&quot;href&quot;:&quot;./Template:Echo&quot;},&quot;params&quot;:{&quot;1&quot;:{&quot;wt&quot;:&quot;<ref>{{echo|foo}}</ref>&quot;}},&quot;i&quot;:0}}]}\\\"><a
href=\\\"#cite_note-1\\\" style=\\\"counter-reset: mw-Ref 1;\\\"><span
class=\\\"mw-reflink-text\\\">[1]</span></a></span><meta
typeof=\\\"mw:Transclusion mw:Extension/ref/Marker\\\" about=\\\"#mwt3\\\"
data-parsoid=\\\"{&quot;group&quot;:&quot;&quot;,&quot;name&quot;:&quot;&quot;,&quot;content&quot;:&quot;foo&quot;,&quot;hasRefInRef&quot;:false,&quot;dsr&quot;:[64,96,null,null],&quot;pi&quot;:[[{&quot;k&quot;:&quot;1&quot;,&quot;spc&quot;:[&quot;&quot;,&quot;&quot;,&quot;&quot;,&quot;&quot;]}]],&quot;tmp&quot;:{}}\\\"
data-mw=\\\"{&quot;parts&quot;:[{&quot;template&quot;:{&quot;target&quot;:{&quot;wt&quot;:&quot;echo&quot;,&quot;href&quot;:&quot;./Template:Echo&quot;},&quot;params&quot;:{&quot;1&quot;:{&quot;wt&quot;:&quot;<ref>{{echo|foo}}</ref>&quot;}},&quot;i&quot;:0}}]}\\\">\"}'><a
href=\"./File:Foobar.jpg\"
data-parsoid='{\"a\":{\"href\":\"./File:Foobar.jpg\"},\"sa\":{}}'><img
resource=\"./File:Foobar.jpg\" src=\"//example.com/images/3/3a/Foobar.jpg\"
data-file-width=\"1941\" data-file-height=\"220\" data-file-type=\"bitmap\"
height=\"220\" width=\"1941\"
data-parsoid='{\"a\":{\"resource\":\"./File:Foobar.jpg\",\"height\":\"220\",\"width\":\"1941\"},\"sa\":{\"resource\":\"File:Foobar.jpg\"}}'/></a></span></p>\n\n<ol
class=\"mw-references\" typeof=\"mw:Extension/references\" about=\"#mwt7\"
data-parsoid='{\"dsr\":[100,114,2,2]}'
data-mw='{\"name\":\"references\",\"attrs\":{}}'><li about=\"#cite_note-1\"
id=\"cite_note-1\"><a href=\"#cite_ref-1\" rel=\"mw:referencedBy\"><span
class=\"mw-linkback-text\">↑ </span></a> <span
id=\"mw-reference-text-cite_note-1\" class=\"mw-reference-text\"
data-parsoid=\"{}\">foo</span></li></ol>");
add("html2html", "Subpage link", "<p data-parsoid='{\"dsr\":[0,38,0,0]}'><a
rel=\"mw:WikiLink\" href=\"./Wiki/Subpage_test/subpage\" title=\"Wiki/Subpage
test/subpage\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Subpage_test/subpage\"},\"sa\":{\"href\":\"wiki/Subpage
test/subpage\"},\"dsr\":[0,38,28,2]}'>/subpage</a></p>\n");
add("html2html", "Subpage noslash link", "<p
data-parsoid='{\"dsr\":[0,37,0,0]}'><a rel=\"mw:WikiLink\"
href=\"./Wiki/Subpage_test/subpage\" title=\"Wiki/Subpage test/subpage\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Subpage_test/subpage\"},\"sa\":{\"href\":\"wiki/Subpage
test/subpage\"},\"dsr\":[0,37,28,2]}'>subpage</a></p>\n");
--
To view, visit https://gerrit.wikimedia.org/r/250603
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8e3b6c60c97d58f63b0d52e030b4c51055060bc6
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits