[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Consolidate separator handling when emitting text

Arlolra (Code Review) Wed, 04 Oct 2017 09:53:40 -0700

Arlolra has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/382186 )


Change subject: Consolidate separator handling when emitting text
......................................................................

Consolidate separator handling when emitting text

Change-Id: If83d9b88c5e3e6947ba82266e479850e94b88822
---
M lib/html2wt/WikitextSerializer.js
1 file changed, 40 insertions(+), 52 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/86/382186/1

diff --git a/lib/html2wt/WikitextSerializer.js 
b/lib/html2wt/WikitextSerializer.js
index 4c4d9f9..763df6e 100644
--- a/lib/html2wt/WikitextSerializer.js
+++ b/lib/html2wt/WikitextSerializer.js
@@ -799,10 +799,9 @@
 };
 
 /**
- * Serialize the content of a text node
+ * Consolidate separator handling when emitting text
  */
-WSP._serializeTextNode = Promise.method(function(node) {
-       var res = node.nodeValue;
+WSP._serializeText = function(res, node, omitEscaping) {
        var state = this.state;
 
        // Deal with trailing separator-like text (at least 1 newline and other 
whitespace)
@@ -816,32 +815,38 @@
                        state.setSep((state.sep.src || '') + match[0]);
                        res = res.substring(match[0].length);
                }
-
-               var doubleNewlineMatch = 
res.match(this.separatorREs.doubleNewlineRE_G);
-               var doubleNewlineCount = doubleNewlineMatch && 
doubleNewlineMatch.length || 0;
-
-               // Don't strip two newlines for wikitext like this:
-               // <div>foo
-               //
-               // bar</div>
-               // The PHP parser won't create paragraphs on lines that also 
contain
-               // block-level tags.
-               if (!state.inHTMLPre &&
-                       // These conditions are at least safe, given the above 
constraint
-                       (!DU.allChildrenAreText(node.parentNode) || 
doubleNewlineCount > 1)) {
-                       // Strip more than one consecutive newline
-                       res = res.replace(this.separatorREs.doubleNewlineRE_G, 
'\n');
-               }
        }
 
-       // Always escape entities
-       res = Util.escapeEntities(res);
+       if (omitEscaping) {
+               state.emitChunk(res, node);
+       } else {
+               if (!state.inIndentPre) {
+                       var doubleNewlineMatch = 
res.match(this.separatorREs.doubleNewlineRE_G);
+                       var doubleNewlineCount = doubleNewlineMatch && 
doubleNewlineMatch.length || 0;
 
-       // If not in pre context, escape wikitext
-       // XXX refactor: Handle this with escape handlers instead!
-       state.escapeText = (state.onSOL || !state.currNodeUnmodified) && 
!state.inHTMLPre;
-       state.emitChunk(res, node);
-       state.escapeText = false;
+                       // Don't strip two newlines for wikitext like this:
+                       // <div>foo
+                       //
+                       // bar</div>
+                       // The PHP parser won't create paragraphs on lines that 
also contain
+                       // block-level tags.
+                       if (!state.inHTMLPre &&
+                               // These conditions are at least safe, given 
the above constraint
+                               (!DU.allChildrenAreText(node.parentNode) || 
doubleNewlineCount > 1)) {
+                               // Strip more than one consecutive newline
+                               res = 
res.replace(this.separatorREs.doubleNewlineRE_G, '\n');
+                       }
+               }
+
+               // Always escape entities
+               res = Util.escapeEntities(res);
+
+               // If not in pre context, escape wikitext
+               // XXX refactor: Handle this with escape handlers instead!
+               state.escapeText = (state.onSOL || !state.currNodeUnmodified) 
&& !state.inHTMLPre;
+               state.emitChunk(res, node);
+               state.escapeText = false;
+       }
 
        // Move trailing newlines into the next separator
        if (newSepMatch) {
@@ -852,38 +857,21 @@
                        /* SSS FIXME: what are we doing with the stripped NLs?? 
*/
                }
        }
+};
+
+/**
+ * Serialize the content of a text node
+ */
+WSP._serializeTextNode = Promise.method(function(node) {
+       var res = node.nodeValue;
+       this._serializeText(res, node, false);
 });
 
 /**
  * Emit non-separator wikitext that does not need to be escaped
  */
 WSP.emitWikitext = function(res, node) {
-       var state = this.state;
-
-       // Deal with trailing separator-like text (at least 1 newline and other 
whitespace)
-       var newSepMatch = res.match(this.separatorREs.sepSuffixWithNlsRE);
-       res = res.replace(this.separatorREs.sepSuffixWithNlsRE, '');
-
-       if (!state.inIndentPre) {
-               // Strip leading newlines and other whitespace
-               var match = res.match(this.separatorREs.sepPrefixWithNlsRE);
-               if (match) {
-                       state.setSep((state.sep.src || '') + match[0]);
-                       res = res.substring(match[0].length);
-               }
-       }
-
-       state.emitChunk(res, node);
-
-       // Move trailing newlines into the next separator
-       if (newSepMatch) {
-               if (!state.sep.src) {
-                       state.setSep(newSepMatch[0]);
-                       state.updateSep(node);
-               } else {
-                       /* SSS FIXME: what are we doing with the stripped NLs?? 
*/
-               }
-       }
+       this._serializeText(res, node, true);
 };
 
 WSP._getDOMAttribs = function(attribs) {

-- 
To view, visit https://gerrit.wikimedia.org/r/382186
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If83d9b88c5e3e6947ba82266e479850e94b88822
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Consolidate separator handling when emitting text

Reply via email to