Arlolra has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/382186 )
Change subject: Consolidate separator handling when emitting text
......................................................................
Consolidate separator handling when emitting text
Change-Id: If83d9b88c5e3e6947ba82266e479850e94b88822
---
M lib/html2wt/WikitextSerializer.js
1 file changed, 40 insertions(+), 52 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/86/382186/1
diff --git a/lib/html2wt/WikitextSerializer.js
b/lib/html2wt/WikitextSerializer.js
index 4c4d9f9..763df6e 100644
--- a/lib/html2wt/WikitextSerializer.js
+++ b/lib/html2wt/WikitextSerializer.js
@@ -799,10 +799,9 @@
};
/**
- * Serialize the content of a text node
+ * Consolidate separator handling when emitting text
*/
-WSP._serializeTextNode = Promise.method(function(node) {
- var res = node.nodeValue;
+WSP._serializeText = function(res, node, omitEscaping) {
var state = this.state;
// Deal with trailing separator-like text (at least 1 newline and other
whitespace)
@@ -816,32 +815,38 @@
state.setSep((state.sep.src || '') + match[0]);
res = res.substring(match[0].length);
}
-
- var doubleNewlineMatch =
res.match(this.separatorREs.doubleNewlineRE_G);
- var doubleNewlineCount = doubleNewlineMatch &&
doubleNewlineMatch.length || 0;
-
- // Don't strip two newlines for wikitext like this:
- // <div>foo
- //
- // bar</div>
- // The PHP parser won't create paragraphs on lines that also
contain
- // block-level tags.
- if (!state.inHTMLPre &&
- // These conditions are at least safe, given the above
constraint
- (!DU.allChildrenAreText(node.parentNode) ||
doubleNewlineCount > 1)) {
- // Strip more than one consecutive newline
- res = res.replace(this.separatorREs.doubleNewlineRE_G,
'\n');
- }
}
- // Always escape entities
- res = Util.escapeEntities(res);
+ if (omitEscaping) {
+ state.emitChunk(res, node);
+ } else {
+ if (!state.inIndentPre) {
+ var doubleNewlineMatch =
res.match(this.separatorREs.doubleNewlineRE_G);
+ var doubleNewlineCount = doubleNewlineMatch &&
doubleNewlineMatch.length || 0;
- // If not in pre context, escape wikitext
- // XXX refactor: Handle this with escape handlers instead!
- state.escapeText = (state.onSOL || !state.currNodeUnmodified) &&
!state.inHTMLPre;
- state.emitChunk(res, node);
- state.escapeText = false;
+ // Don't strip two newlines for wikitext like this:
+ // <div>foo
+ //
+ // bar</div>
+ // The PHP parser won't create paragraphs on lines that
also contain
+ // block-level tags.
+ if (!state.inHTMLPre &&
+ // These conditions are at least safe, given
the above constraint
+ (!DU.allChildrenAreText(node.parentNode) ||
doubleNewlineCount > 1)) {
+ // Strip more than one consecutive newline
+ res =
res.replace(this.separatorREs.doubleNewlineRE_G, '\n');
+ }
+ }
+
+ // Always escape entities
+ res = Util.escapeEntities(res);
+
+ // If not in pre context, escape wikitext
+ // XXX refactor: Handle this with escape handlers instead!
+ state.escapeText = (state.onSOL || !state.currNodeUnmodified)
&& !state.inHTMLPre;
+ state.emitChunk(res, node);
+ state.escapeText = false;
+ }
// Move trailing newlines into the next separator
if (newSepMatch) {
@@ -852,38 +857,21 @@
/* SSS FIXME: what are we doing with the stripped NLs??
*/
}
}
+};
+
+/**
+ * Serialize the content of a text node
+ */
+WSP._serializeTextNode = Promise.method(function(node) {
+ var res = node.nodeValue;
+ this._serializeText(res, node, false);
});
/**
* Emit non-separator wikitext that does not need to be escaped
*/
WSP.emitWikitext = function(res, node) {
- var state = this.state;
-
- // Deal with trailing separator-like text (at least 1 newline and other
whitespace)
- var newSepMatch = res.match(this.separatorREs.sepSuffixWithNlsRE);
- res = res.replace(this.separatorREs.sepSuffixWithNlsRE, '');
-
- if (!state.inIndentPre) {
- // Strip leading newlines and other whitespace
- var match = res.match(this.separatorREs.sepPrefixWithNlsRE);
- if (match) {
- state.setSep((state.sep.src || '') + match[0]);
- res = res.substring(match[0].length);
- }
- }
-
- state.emitChunk(res, node);
-
- // Move trailing newlines into the next separator
- if (newSepMatch) {
- if (!state.sep.src) {
- state.setSep(newSepMatch[0]);
- state.updateSep(node);
- } else {
- /* SSS FIXME: what are we doing with the stripped NLs??
*/
- }
- }
+ this._serializeText(res, node, true);
};
WSP._getDOMAttribs = function(attribs) {
--
To view, visit https://gerrit.wikimedia.org/r/382186
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If83d9b88c5e3e6947ba82266e479850e94b88822
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits