Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/204303
Change subject: Convert serializeDOM interface to promise returning func
......................................................................
Convert serializeDOM interface to promise returning func
* Removes the promisify everywhere and prepares for an internal
refactor of selective serializer.
Change-Id: I0eaa5cf0d2524634a993e1f2d6f5579433fb28a6
---
M api/routes.js
M lib/mediawiki.SelectiveSerializer.js
M lib/mediawiki.WikitextSerializer.js
M tests/mocha/parse.js
M tests/parse.js
5 files changed, 109 insertions(+), 110 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/03/204303/1
diff --git a/api/routes.js b/api/routes.js
index 3f3db08..a66bd82 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -114,12 +114,9 @@
// Re-parse the HTML to uncover foster-parenting issues
doc = domino.createDocument( doc.outerHTML );
- var Serializer = selser ? SelectiveSerializer : WikitextSerializer,
- serializer = new Serializer({ env: env });
-
- return Promise.promisify( serializer.serializeDOM, false, serializer )(
- doc.body, false
- ).then(function( out ) {
+ var Serializer = selser ? SelectiveSerializer : WikitextSerializer;
+ var serializer = new Serializer({ env: env });
+ return serializer.serializeDOM(doc.body, false).then(function(out) {
// Strip selser trigger comment
out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, '');
@@ -278,8 +275,6 @@
startTimers.set( 'html2wt.init.domparse', Date.now() );
}
- var doc = DU.parseHTML( html );
-
// send domparse time, input size and init time to
statsd/Graphite
// init time is the time elapsed before serialization
// init.domParse, a component of init time, is the time elapsed
from html string to DOM tree
@@ -291,21 +286,22 @@
Date.now() - startTimers.get( 'html2wt.init' ));
}
- var Serializer = parsoidConfig.useSelser ? SelectiveSerializer
: WikitextSerializer,
- serializer = new Serializer({ env: env, oldid: env.page.id });
+ var doc = DU.parseHTML(html);
if ( v2 && v2.original && v2.original["data-parsoid"] ) {
DU.applyDataParsoid( doc,
v2.original["data-parsoid"].body );
}
+
if ( v2 && v2.original && v2.original.html ) {
env.page.dom = DU.parseHTML( v2.original.html.body
).body;
if ( v2.original["data-parsoid"] ) {
DU.applyDataParsoid(
env.page.dom.ownerDocument, v2.original["data-parsoid"].body );
}
}
- return Promise.promisify( serializer.serializeDOM, false,
serializer )(
- doc.body, false
- );
+
+ var Serializer = parsoidConfig.useSelser ? SelectiveSerializer
: WikitextSerializer;
+ var serializer = new Serializer({ env: env, oldid: env.page.id
});
+ return serializer.serializeDOM(doc.body, false);
}).timeout( REQ_TIMEOUT ).then(function( output ) {
var contentType =
'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0;charset=utf-8';
if ( v2 ) {
diff --git a/lib/mediawiki.SelectiveSerializer.js
b/lib/mediawiki.SelectiveSerializer.js
index c41a81c..e52f672 100644
--- a/lib/mediawiki.SelectiveSerializer.js
+++ b/lib/mediawiki.SelectiveSerializer.js
@@ -7,14 +7,15 @@
'use strict';
-var WikitextSerializer = require( './mediawiki.WikitextSerializer.js'
).WikitextSerializer,
- Util = require( './mediawiki.Util.js' ).Util,
- DU = require('./mediawiki.DOMUtils.js').DOMUtils,
- ParserPipelineFactory =
require('./mediawiki.parser.js').ParserPipelineFactory,
- DOMDiff = require('./mediawiki.DOMDiff.js').DOMDiff,
- ParsoidCacheRequest =
require('./mediawiki.ApiRequest.js').ParsoidCacheRequest,
- normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM,
- async = require('async');
+var WikitextSerializer = require( './mediawiki.WikitextSerializer.js'
).WikitextSerializer;
+var Util = require( './mediawiki.Util.js' ).Util;
+var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
+var JSUtils = require('./jsutils.js').JSUtils;
+var ParserPipelineFactory =
require('./mediawiki.parser.js').ParserPipelineFactory;
+var DOMDiff = require('./mediawiki.DOMDiff.js').DOMDiff;
+var ParsoidCacheRequest =
require('./mediawiki.ApiRequest.js').ParsoidCacheRequest;
+var normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM;
+var async = require('async');
/**
* @class
@@ -51,10 +52,9 @@
*
* @param {Error} err
* @param {Node} body
- * @param {Function} finalcb The callback for when we've finished serializing
the DOM.
+ * @param {Function} cb The callback for when we've finished serializing the
DOM.
*/
-SSP.doSerializeDOM = function( err, body, finalcb ) {
- var self = this;
+SSP.doSerializeDOM = function( err, body, cb ) {
var startTimers = new Map();
if ( err || (!this.env.page.dom && !this.env.page.domdiff) ||
!this.env.page.src ) {
@@ -63,7 +63,7 @@
}
// If there's no old source, fall back to non-selective
serialization.
- this.wts.serializeDOM( body, false, finalcb );
+ this.wts.serializeDOM( body, false, cb );
if ( this.timer ) {
this.timer.timing( 'html2wt.full.serialize', '', (
Date.now() - startTimers.get( 'html2wt.full.serialize' )) );
@@ -96,7 +96,7 @@
}
// Call the WikitextSerializer to do our bidding
- this.wts.serializeDOM( body, true, finalcb );
+ this.wts.serializeDOM( body, true, cb );
if ( this.timer ) {
this.timer.timing( 'html2wt.selser.serialize',
'', ( Date.now() - startTimers.get( 'html2wt.selser.serialize' )) );
@@ -104,7 +104,7 @@
} else {
// Nothing was modified, just re-use the original source
- finalcb( null, this.env.page.src );
+ cb( null, this.env.page.src );
}
}
};
@@ -116,12 +116,12 @@
* Parse the wikitext source of the page for DOM-diffing purposes.
*
* @param {Node} body The node for which we're getting the source.
- * @param {Function} finalcb The callback for after we've serialized the
entire document.
+ * @param {Function} cb The callback for after we've serialized the entire
document.
* @param {Error} err
* @param {string} src The wikitext source of the document (optionally
* including page metadata)
*/
-SSP.parseOriginalSource = function( body, finalcb, err, src ) {
+SSP.parseOriginalSource = function( body, cb, err, src ) {
var self = this,
parserPipelineFactory = new ParserPipelineFactory( this.env ),
parserPipeline = parserPipelineFactory.getPipeline(
'text/x-mediawiki/full' );
@@ -133,7 +133,7 @@
// doSerializeDOM
parserPipeline.once( 'document', function( origDoc ) {
self.env.page.dom = DU.parseHTML( DU.serializeNode( origDoc )
).body;
- self.doSerializeDOM( null, body, finalcb );
+ self.doSerializeDOM( null, body, cb );
} );
parserPipeline.processToplevelDoc( this.env.page.src );
};
@@ -147,15 +147,17 @@
*
* @param {Node} body The document to serialize.
* @param {null} dummy Preserves the wt serializer interface.
- * @param {Function} finalcb The callback fired on completion of the
serialization.
+ * @param {Function} cb The callback fired on completion of the serialization.
*/
-SSP.serializeDOM = function( body, dummy, finalcb ) {
+SSP.serializeDOM = function(body, dummy, cb) {
+ cb = JSUtils.mkPromised(cb);
+
var self = this;
if ( this.env.page.dom || this.env.page.domdiff ) {
- this.doSerializeDOM( null, body, finalcb );
+ this.doSerializeDOM( null, body, cb );
} else if ( this.env.page.src ) {
// Have the src, only parse the src to the dom
- this.parseOriginalSource( body, finalcb, null,
this.env.page.src );
+ this.parseOriginalSource( body, cb, null, this.env.page.src );
} else if ( this.env.page.id && this.env.page.id !== '0' ) {
// Start by getting the old text of this page
if ( this.env.conf.parsoid.parsoidCacheURI ) {
@@ -181,18 +183,19 @@
// Selective serialization if there was no
error, full
// serialization if there was one.
- self.doSerializeDOM( err, body, finalcb );
+ self.doSerializeDOM( err, body, cb );
});
} else {
Util.getPageSrc(
this.env, this.env.page.name, this.env.page.id
|| null,
- this.parseOriginalSource.bind( this, body,
finalcb )
+ this.parseOriginalSource.bind( this, body, cb )
);
}
} else {
- this.doSerializeDOM( null, body, finalcb );
+ this.doSerializeDOM( null, body, cb );
}
+ return cb.promise;
};
if ( typeof module === 'object' ) {
diff --git a/lib/mediawiki.WikitextSerializer.js
b/lib/mediawiki.WikitextSerializer.js
index d542cab..d878b6a 100644
--- a/lib/mediawiki.WikitextSerializer.js
+++ b/lib/mediawiki.WikitextSerializer.js
@@ -78,7 +78,13 @@
WSP.serializeHTML = function(opts, html) {
opts.logType = this.logType;
- return (new
WikitextSerializer(opts)).serializeDOM(DU.parseHTML(html).body);
+ // FIXME: can we remove this block?
+ try {
+ var body = DU.parseHTML(html).body;
+ return (new WikitextSerializer(opts))._serializeDOM(body);
+ } catch (err) {
+ opts.env.log("fatal", err);
+ }
};
WSP.getAttributeKey = function(node, key) {
@@ -93,7 +99,6 @@
}
}
}
-
return key;
};
@@ -113,7 +118,6 @@
}
}
}
-
return value;
};
@@ -1361,79 +1365,78 @@
}
/**
- * Serialize an HTML DOM document.
+ * Serialize an HTML DOM document synchronously.
+ * WARNING: You probably want to use the wrapper below.
*/
-WSP.serializeDOM = function( body, selserMode, finalCB ) {
+WSP._serializeDOM = function(body, selserMode) {
this.logType = selserMode ? "trace/selser" : "trace/wts";
this.trace = this.env.log.bind(this.env, this.logType);
if (!this.env.page.editedDoc) {
this.env.page.editedDoc = body.ownerDocument;
}
+
var state = new SerializerState(this, this.options);
- try {
- if (!selserMode) {
- // Normalize the DOM
- normalizeDOM(body, state.env);
- }
- // Don't serialize the DOM if debugging is disabled
- this.env.log(this.logType, function() {
- return "--- DOM --- \n" + body.outerHTML +
"\n-----------";
- });
-
- var out = '';
-
- // Init state
- state.selserMode = selserMode || false;
- state.sep.lastSourceNode = body;
- state.currLine.firstNode = body.firstChild;
-
- // Wrapper CB for every chunk that emits any required separators
- // before emitting the chunk itself.
- var chunkCBWrapper = function(chunk, node, atEOF) {
- var accum = function(o) { out += o; };
- state.emitSepAndOutput(chunk, node, accum, "OUT:");
- state.atStartOfOutput = false;
- if (atEOF === 'EOF') {
- state.flushLine(accum);
- }
- };
-
- // Kick it off
- state.serializeChildren( body, chunkCBWrapper );
-
- // Handle EOF
- chunkCBWrapper( '', body, 'EOF' );
-
- if (state.hasIndentPreNowikis) {
- // FIXME: Perhaps this can be done on a per-line basis
- // rather than do one post-pass on the entire document.
- //
- // Strip excess/useless nowikis
- out = stripUnnecessaryIndentPreNowikis( this.env, out );
- }
-
- if (state.hasQuoteNowikis) {
- // FIXME: Perhaps this can be done on a per-line basis
- // rather than do one post-pass on the entire document.
- //
- // Strip excess/useless nowikis
- out = stripUnnecessaryQuoteNowikis(out);
- }
-
- if ( finalCB && typeof finalCB === 'function' ) {
- finalCB( null, out );
- }
-
- return out;
- } catch (e) {
- if ( finalCB && typeof finalCB === 'function' ) {
- finalCB( e );
- } else {
- state.env.log("fatal", e);
- }
+ if (!selserMode) {
+ // Normalize the DOM
+ normalizeDOM(body, state.env);
}
+
+ // Don't serialize the DOM if debugging is disabled
+ this.trace(function() {
+ return "--- DOM --- \n" + body.outerHTML + "\n-----------";
+ });
+
+ var out = '';
+
+ // Init state
+ state.selserMode = selserMode || false;
+ state.sep.lastSourceNode = body;
+ state.currLine.firstNode = body.firstChild;
+
+ // Wrapper CB for every chunk that emits any required separators
+ // before emitting the chunk itself.
+ var chunkCBWrapper = function(chunk, node, atEOF) {
+ var accum = function(o) { out += o; };
+ state.emitSepAndOutput(chunk, node, accum, "OUT:");
+ state.atStartOfOutput = false;
+ if (atEOF === 'EOF') {
+ state.flushLine(accum);
+ }
+ };
+
+ // Kick it off
+ state.serializeChildren(body, chunkCBWrapper);
+
+ // Handle EOF
+ chunkCBWrapper('', body, 'EOF');
+
+ if (state.hasIndentPreNowikis) {
+ // FIXME: Perhaps this can be done on a per-line basis
+ // rather than do one post-pass on the entire document.
+ //
+ // Strip excess/useless nowikis
+ out = stripUnnecessaryIndentPreNowikis(this.env, out);
+ }
+
+ if (state.hasQuoteNowikis) {
+ // FIXME: Perhaps this can be done on a per-line basis
+ // rather than do one post-pass on the entire document.
+ //
+ // Strip excess/useless nowikis
+ out = stripUnnecessaryQuoteNowikis(out);
+ }
+
+ return out;
+};
+
+// Promise returning wrapper of _serializeDOM
+WSP.serializeDOM = function(body, selserMode, cb) {
+ var self = this;
+ return Promise.resolve().then(function() {
+ return self._serializeDOM(body, selserMode);
+ }).nodify(cb);
};
if (typeof module === "object") {
diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js
index e373ff9..1a3d54b 100644
--- a/tests/mocha/parse.js
+++ b/tests/mocha/parse.js
@@ -55,9 +55,7 @@
DU.applyDataParsoid( doc, dp );
}
var serializer = new WikitextSerializer({ env:
env });
- return Promise.promisify(
- serializer.serializeDOM, false,
serializer
- )( doc.body, false );
+ return serializer.serializeDOM(doc.body, false);
});
};
diff --git a/tests/parse.js b/tests/parse.js
index 50646d2..2d49eef 100755
--- a/tests/parse.js
+++ b/tests/parse.js
@@ -131,9 +131,10 @@
var startsAtWikitext;
var startsAtHTML = function( argv, env, input, dp ) {
- var serializer, doc = DU.parseHTML( input );
+ var doc = DU.parseHTML(input);
dp = dp || dpFromHead( doc );
+ var serializer;
if ( argv.selser ) {
dp = dp || dpFromHead( env.page.dom.ownerDocument );
if ( dp ) {
@@ -148,9 +149,7 @@
DU.applyDataParsoid( doc, dp );
}
- return Promise.promisify( serializer.serializeDOM, false, serializer )(
- doc.body, false
- ).then(function(out) {
+ return serializer.serializeDOM(doc.body, false).then(function(out) {
if ( argv.html2wt || argv.wt2wt ) {
return { trailingNL: true, out: out };
} else {
--
To view, visit https://gerrit.wikimedia.org/r/204303
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0eaa5cf0d2524634a993e1f2d6f5579433fb28a6
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits