jenkins-bot has submitted this change and it was merged.
Change subject: Refactor roundtrip-test.js using promises
......................................................................
Refactor roundtrip-test.js using promises
* Reviewers should pay particular attention the FIXMEs where some
things that need clarifying are documented.
Change-Id: I646f8a916add26ba60171ed31d1593c6bb6b63c5
---
M api/routes.js
M lib/mediawiki.ApiRequest.js
M tests/client/client.js
M tests/roundtrip-test.js
4 files changed, 419 insertions(+), 492 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/api/routes.js b/api/routes.js
index d411066..a086d32 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -99,20 +99,6 @@
// Helpers
-var promiseTemplateReq = function( env, target, oldid ) {
- return new Promise(function( resolve, reject ) {
- var tpr = new TemplateRequest( env, target, oldid );
- tpr.once('src', function( err, src_and_metadata ) {
- if ( err ) {
- reject( err );
- } else {
- env.setPageSrcInfo( src_and_metadata );
- resolve();
- }
- });
- });
-};
-
var logTime = function( env, res, str ) {
env.log( "info", util.format(
"completed %s in %s ms", str, Date.now() - res.local("start")
@@ -508,7 +494,7 @@
env.setPageSrcInfo( wt );
p = Promise.resolve();
} else {
- p = promiseTemplateReq( env, target, oldid );
+ p = TemplateRequest.setPageSrcInfo(env, target, oldid);
}
if ( typeof wt === 'string' ) {
@@ -662,7 +648,7 @@
oldid = req.query.oldid;
}
- var p = promiseTemplateReq( env, target, oldid ).then(
+ var p = TemplateRequest.setPageSrcInfo(env, target, oldid).then(
parse.bind( null, env, req, res )
).then(
roundTripDiff.bind( null, env, req, res, false )
@@ -685,7 +671,7 @@
oldid = req.query.oldid;
}
- var p = promiseTemplateReq( env, target, oldid ).then(
+ var p = TemplateRequest.setPageSrcInfo(env, target, oldid).then(
parse.bind( null, env, req, res )
).then(function( doc ) {
// strip newlines from the html
@@ -709,7 +695,7 @@
oldid = req.query.oldid;
}
- var p = promiseTemplateReq( env, target, oldid ).then(
+ var p = TemplateRequest.setPageSrcInfo(env, target, oldid).then(
parse.bind( null, env, req, res )
).then(function( doc ) {
doc = DU.parseHTML( DU.serializeNode(doc) );
diff --git a/lib/mediawiki.ApiRequest.js b/lib/mediawiki.ApiRequest.js
index 82b1c61..58f2a6d 100644
--- a/lib/mediawiki.ApiRequest.js
+++ b/lib/mediawiki.ApiRequest.js
@@ -355,6 +355,21 @@
this._processListeners( null, metadata );
};
+// Promise to set page src info
+TemplateRequest.setPageSrcInfo = function(env, target, oldid) {
+ return new Promise(function(resolve, reject) {
+ var tpr = new TemplateRequest(env, target, oldid);
+ tpr.once('src', function(err, srcAndMetadata) {
+ if (err) {
+ reject(err);
+ } else {
+ env.setPageSrcInfo(srcAndMetadata);
+ resolve();
+ }
+ });
+ });
+};
+
/**
* @class
* @extends ApiRequest
diff --git a/tests/client/client.js b/tests/client/client.js
index 6d4e9a5..94f7dc9 100755
--- a/tests/client/client.js
+++ b/tests/client/client.js
@@ -69,10 +69,18 @@
Util.retryingHTTPRequest(10, requestOptions, callback );
};
-var runTest = function( cb, test) {
- var results, callback = rtTest.cbCombinator.bind( null,
rtTest.xmlFormat, function( err, results ) {
- if ( err ) {
- console.log( 'ERROR in ' + test.prefix + ':' +
test.title + ':\n' + err + '\n' + err.stack);
+var runTest = function(cb, test) {
+ rtTest.fetch(test.title, {
+ setup: config.setup,
+ prefix: test.prefix,
+ rtTestMode: true,
+ parsoidURL: parsoidURL
+ }, rtTest.xmlFormat).nodify(function(err, results) {
+ var callback = null;
+ if (err) {
+ // Log it to console (for gabriel to watch scroll by)
+ console.error('Error in %s:%s: %s\n%s', test.prefix,
test.title,
+ err, err.stack || '');
/*
* If you're looking at the line below and thinking
"Why in the
* hell would they have done that, it causes
unnecessary problems
@@ -83,29 +91,10 @@
* In sum, easier to die than to worry about having to
reset any
* broken application state.
*/
- cb( 'postResult', err, results, test, function() {
process.exit( 1 ); } );
- } else {
- cb( 'postResult', err, results, test, null );
+ callback = function() { process.exit(1); };
}
- } );
-
- try {
- rtTest.fetch( test.title, {
- setup: config.setup,
- prefix: test.prefix,
- rtTestMode: true,
- parsoidURL: parsoidURL
- }, callback );
- } catch ( err ) {
- // Log it to console (for gabriel to watch scroll by)
- console.error( "ERROR in " + test.prefix + ':' + test.title +
': ' + err + '\n' + err.stack);
-
- results = rtTest.xmlFormat( {
- page: { name: test.title },
- wiki: { iwp: test.prefix }
- }, err );
- cb( 'postResult', err, results, test, function() {
process.exit( 1 ); } );
- }
+ cb('postResult', err, results, test, callback);
+ });
};
/**
diff --git a/tests/roundtrip-test.js b/tests/roundtrip-test.js
index d5d1a53..0507d90 100755
--- a/tests/roundtrip-test.js
+++ b/tests/roundtrip-test.js
@@ -1,99 +1,101 @@
#!/usr/bin/env node
-"use strict";
-require( '../lib/core-upgrade.js' );
+'use strict';
+require('../lib/core-upgrade.js');
-var request = require( 'request' ),
- yargs = require( 'yargs' ),
- domino = require( 'domino' ),
- url = require( 'url' ),
- zlib = require( 'zlib' ),
- JSUtils = require( '../lib/jsutils.js' ).JSUtils,
- Util = require( '../lib/mediawiki.Util.js' ).Util,
- DU = require( '../lib/mediawiki.DOMUtils.js' ).DOMUtils,
- TemplateRequest = require( '../lib/mediawiki.ApiRequest.js'
).TemplateRequest,
- ParsoidConfig = require( '../lib/mediawiki.ParsoidConfig'
).ParsoidConfig,
- MWParserEnvironment = require( '../lib/mediawiki.parser.environment.js'
).MWParserEnvironment,
- Diff = require('../lib/mediawiki.Diff.js').Diff;
+var request = require('request');
+var yargs = require('yargs');
+var domino = require('domino');
+var url = require('url');
+var zlib = require('zlib');
+var JSUtils = require('../lib/jsutils.js').JSUtils;
+var Util = require('../lib/mediawiki.Util.js').Util;
+var DU = require('../lib/mediawiki.DOMUtils.js').DOMUtils;
+var TemplateRequest =
require('../lib/mediawiki.ApiRequest.js').TemplateRequest;
+var ParsoidConfig = require('../lib/mediawiki.ParsoidConfig').ParsoidConfig;
+var MWParserEnvironment =
require('../lib/mediawiki.parser.environment.js').MWParserEnvironment;
+var Diff = require('../lib/mediawiki.Diff.js').Diff;
-var plainCallback = function( env, err, results ) {
- var i, result, output = '',
- semanticDiffs = 0, syntacticDiffs = 0,
- testDivider = ( new Array( 70 ) ).join( '=' ) + '\n',
- diffDivider = ( new Array( 70 ) ).join( '-' ) + '\n';
- if ( err ) {
+var plainFormat = function(err, prefix, title, results, profile) {
+ var output = '';
+ var semanticDiffs = 0;
+ var syntacticDiffs = 0;
+ var testDivider = '='.repeat(70) + '\n';
+ var diffDivider = '-'.repeat(70) + '\n';
+
+ if (err) {
output += 'Parser failure!\n\n';
output += diffDivider;
output += err;
+ if (err.stack) {
+ output += '\nStack trace: ' + err.stack;
+ }
} else {
- for ( i = 0; i < results.length; i++ ) {
- result = results[i];
-
+ for (var i = 0; i < results.length; i++) {
+ var result = results[i];
output += testDivider;
- if ( result.type === 'fail' ) {
- output += 'Semantic difference' +
(result.selser ? ' (selser)' : '') + ':\n\n';
+ if (result.type === 'fail') {
+ output += 'Semantic difference' +
+ (result.selser ? ' (selser)' : '') +
':\n\n';
output += result.wtDiff + '\n';
- output += diffDivider + 'HTML diff:\n\n' +
result.htmlDiff + '\n';
+ output += diffDivider + 'HTML diff:\n\n' +
+ result.htmlDiff + '\n';
semanticDiffs++;
} else {
- output += 'Syntactic difference' +
(result.selser ? ' (selser)' : '') + ':\n\n';
+ output += 'Syntactic difference' +
+ (result.selser ? ' (selser)' : '') +
':\n\n';
output += result.wtDiff + '\n';
syntacticDiffs++;
}
}
-
-
output += testDivider;
output += testDivider;
- output += "SUMMARY:\n";
- output += "Semantic differences : " + semanticDiffs + "\n";
- output += "Syntactic differences: " + syntacticDiffs + "\n";
+ output += 'SUMMARY:\n';
+ output += 'Semantic differences : ' + semanticDiffs + '\n';
+ output += 'Syntactic differences: ' + syntacticDiffs + '\n';
output += diffDivider;
- output += "ALL differences : " + (semanticDiffs +
syntacticDiffs) + "\n";
+ output += 'ALL differences : ' +
+ (semanticDiffs + syntacticDiffs) + '\n';
output += testDivider;
output += testDivider;
}
-
return output;
};
-var encodeXmlEntities = function( str ) {
- return str.replace( /&/g, '&' )
- .replace( /</g, '<' )
- .replace( />/g, '>' );
-};
-
-function encodeAttribute(str) {
- return encodeXmlEntities(str)
- .replace(/"/g, '"');
+function encodeXmlEntities(str) {
+ return str.replace(/&/g, '&')
+ .replace(/</g, '<')
+ .replace(/>/g, '>');
}
+function encodeAttribute(str) {
+ return encodeXmlEntities(str).replace(/"/g, '"');
+}
-var xmlCallback = function( env, err, results ) {
+var xmlFormat = function(err, prefix, title, results, profile) {
var i, result;
- var prefix = ( env && env.conf && env.conf.wiki && env.conf.wiki.iwp )
|| '';
- var title = ( env && env.page && env.page.name ) || '';
-
+ var article = encodeAttribute(prefix + ':' + title);
var output = '<testsuites>\n';
var outputTestSuite = function(selser) {
- output += '<testsuite name="Roundtrip article ' +
encodeAttribute( prefix + ':' + title );
+ output += '<testsuite name="Roundtrip article ' +
article;
if (selser) {
output += ' (selser)';
}
output += '">\n';
};
- if ( err ) {
+ if (err) {
outputTestSuite(false);
- output += '<testcase name="entire article"><error
type="parserFailedToFinish">';
- output += encodeXmlEntities( err.stack || err.toString() );
+ output += '<testcase name="entire article">';
+ output += '<error type="parserFailedToFinish">';
+ output += encodeXmlEntities(err.stack || err.toString());
output += '</error></testcase>';
} else if (!results.length) {
outputTestSuite(false);
} else {
var currentSelser = results[0].selser;
outputTestSuite(currentSelser);
- for ( i = 0; i < results.length; i++ ) {
+ for (i = 0; i < results.length; i++) {
result = results[i];
// When going from normal to selser results, switch to
a new
@@ -104,24 +106,24 @@
outputTestSuite(currentSelser);
}
- output += '<testcase name="' + encodeAttribute( prefix
+ ':' + title );
+ output += '<testcase name="' + article;
output += ' character ' + result.offset[0].start +
'">\n';
- if ( result.type === 'fail' ) {
+ if (result.type === 'fail') {
output += '<failure
type="significantHtmlDiff">\n';
output += '<diff class="wt">\n';
- output += encodeXmlEntities( result.wtDiff );
+ output += encodeXmlEntities(result.wtDiff);
output += '\n</diff>\n';
output += '<diff class="html">\n';
- output += encodeXmlEntities( result.htmlDiff );
+ output += encodeXmlEntities(result.htmlDiff);
output += '\n</diff>\n';
output += '</failure>\n';
} else {
output += '<skipped
type="insignificantWikitextDiff">\n';
- output += encodeXmlEntities( result.wtDiff );
+ output += encodeXmlEntities(result.wtDiff);
output += '\n</skipped>\n';
}
@@ -131,23 +133,21 @@
output += '</testsuite>\n';
// Output the profiling data
- if ( env.profile ) {
-
- // Delete the total timer to avoid serializing it
- if (env.profile.time && env.profile.time.total_timer) {
- delete( env.profile.time.total_timer );
+ if (profile) {
+ // Delete the start time to avoid serializing it
+ if (profile.time && profile.time.start) {
+ delete(profile.time.start);
}
-
output += '<perfstats>\n';
- for ( var type in env.profile ) {
- for ( var prop in env.profile[ type ] ) {
- output += '<perfstat type="' + DU.encodeXml(
type ) + ':';
- output += DU.encodeXml( prop );
+ Object.keys(profile).forEach(function(type) {
+ Object.keys(profile[type]).forEach(function(prop) {
+ output += '<perfstat type="' +
DU.encodeXml(type) + ':';
+ output += DU.encodeXml(prop);
output += '">';
- output += DU.encodeXml( env.profile[ type ][
prop ].toString() );
+ output +=
DU.encodeXml(profile[type][prop].toString());
output += '</perfstat>\n';
- }
- }
+ });
+ });
output += '</perfstats>\n';
}
output += '</testsuites>';
@@ -156,31 +156,34 @@
};
var findMatchingNodes = function(root, targetRange, sourceLen) {
- var currentOffset = null, wasWaiting = false, waitingForEndMatch =
false;
+ var currentOffset = null;
+ var wasWaiting = false;
+ var waitingForEndMatch = false;
function walkDOM(element) {
- var elements = [],
- precedingNodes = [],
- attribs = DU.getJSONAttribute(element, 'data-parsoid');
+ var elements = [];
+ var precedingNodes = [];
+ var attribs = DU.getJSONAttribute(element, 'data-parsoid');
- if ( attribs.dsr && attribs.dsr.length ) {
- var start = attribs.dsr[0] || 0,
- end = attribs.dsr[1] || sourceLen - 1;
+ if (attribs.dsr && attribs.dsr.length) {
+ var start = attribs.dsr[0] || 0;
+ var end = attribs.dsr[1] || sourceLen - 1;
- if ( (targetRange.end - 1) < start ||
targetRange.start > (end - 1) ) {
+ if ((targetRange.end - 1) < start || targetRange.start
> (end - 1)) {
return null;
}
- if ( waitingForEndMatch ) {
- if ( end >= targetRange.end ) {
+ if (waitingForEndMatch) {
+ if (end >= targetRange.end) {
waitingForEndMatch = false;
}
return { done: true, nodes: [element] };
}
- if ( attribs.dsr[0] !== null && targetRange.start ===
start && end === targetRange.end ) {
+ if (attribs.dsr[0] !== null && targetRange.start ===
start &&
+ end === targetRange.end) {
return { done: true, nodes: [element] };
- } else if ( targetRange.start === start ) {
+ } else if (targetRange.start === start) {
waitingForEndMatch = true;
if (end < targetRange.end) {
// No need to walk children
@@ -196,38 +199,40 @@
while (c) {
wasWaiting = waitingForEndMatch;
- if ( DU.isElt(c) ) {
+ if (DU.isElt(c)) {
var res = walkDOM(c);
var matchedChildren = res ? res.nodes : null;
- if ( matchedChildren ) {
- if ( !currentOffset && attribs.dsr &&
(attribs.dsr[0] !== null) ) {
+ if (matchedChildren) {
+ if (!currentOffset && attribs.dsr &&
(attribs.dsr[0] !== null)) {
var elesOnOffset = [];
currentOffset = attribs.dsr[0];
- // Walk the preceding nodes
without dsr values and prefix matchedChildren
- // till we get the desired
matching start value.
+ // Walk the preceding nodes
without dsr values and
+ // prefix matchedChildren till
we get the desired
+ // matching start value.
var diff = currentOffset -
targetRange.start;
- while ( precedingNodes.length >
0 && diff > 0 ) {
+ while (precedingNodes.length >
0 && diff > 0) {
var n =
precedingNodes.pop();
var len =
DU.isComment(n) ?
DU.decodedCommentLength(n) :
n.nodeValue.length;
- if ( len > diff ) {
+ if (len > diff) {
break;
}
diff -= len;
- elesOnOffset.push( n );
+ elesOnOffset.push(n);
}
elesOnOffset.reverse();
- matchedChildren =
elesOnOffset.concat( matchedChildren );
+ matchedChildren =
elesOnOffset.concat(matchedChildren);
}
- // Check if there's only one child, and
make sure it's a node with getAttribute
- if ( matchedChildren.length === 1 &&
DU.isElt(matchedChildren[0]) ) {
- var childAttribs =
matchedChildren[0].getAttribute( 'data-parsoid' );
- if ( childAttribs ) {
- childAttribs =
JSON.parse( childAttribs );
- if ( childAttribs.dsr
&& childAttribs.dsr[1]) {
- if (
childAttribs.dsr[1] >= targetRange.end ) {
+ // Check if there's only one child,
+ // and make sure it's a node with
getAttribute.
+ if (matchedChildren.length === 1 &&
DU.isElt(matchedChildren[0])) {
+ var childAttribs =
matchedChildren[0].getAttribute('data-parsoid');
+ if (childAttribs) {
+ childAttribs =
JSON.parse(childAttribs);
+ if (childAttribs.dsr &&
childAttribs.dsr[1]) {
+ if
(childAttribs.dsr[1] >= targetRange.end) {
res.done = true;
} else {
currentOffset = childAttribs.dsr[1];
@@ -248,14 +253,14 @@
// Clear out when an element node is
encountered.
precedingNodes = [];
- } else if ( c.nodeType === c.TEXT_NODE || c.nodeType
=== c.COMMENT_NODE ) {
- if ( currentOffset && ( currentOffset <
targetRange.end ) ) {
+ } else if (c.nodeType === c.TEXT_NODE || c.nodeType ===
c.COMMENT_NODE) {
+ if (currentOffset && (currentOffset <
targetRange.end)) {
if (DU.isComment(c)) {
currentOffset +=
DU.decodedCommentLength(c);
} else {
currentOffset +=
c.nodeValue.length;
}
- if ( currentOffset >= targetRange.end )
{
+ if (currentOffset >= targetRange.end) {
waitingForEndMatch = false;
}
}
@@ -263,18 +268,18 @@
if (wasWaiting || waitingForEndMatch) {
// Part of target range
elements.push(c);
- } else if ( !currentOffset ) {
+ } else if (!currentOffset) {
// Accumulate nodes without dsr
- precedingNodes.push( c );
+ precedingNodes.push(c);
}
}
- if ( wasWaiting && !waitingForEndMatch ) {
+ if (wasWaiting && !waitingForEndMatch) {
break;
}
// Skip over encapsulated content
- var typeOf = DU.isElt(c) ? c.getAttribute( 'typeof' )
|| '' : '';
+ var typeOf = DU.isElt(c) ? c.getAttribute('typeof') ||
'' : '';
if
(/\bmw:(?:Transclusion\b|Param\b|Extension\/[^\s]+)/.test(typeOf)) {
c = DU.skipOverEncapsulatedContent(c);
} else {
@@ -286,7 +291,7 @@
var numChildren = element.childNodes.length;
if (numElements === 0) {
return null;
- } else if ( numElements < numChildren ) {
+ } else if (numElements < numChildren) {
return { done: !waitingForEndMatch, nodes: elements } ;
} else { /* numElements === numChildren */
return { done: !waitingForEndMatch, nodes: [element] } ;
@@ -296,61 +301,58 @@
return walkDOM(root);
};
-var checkIfSignificant = function(env, offsets, oldWt, oldBody, oldDp, newWt,
cb, err, html, dp) {
- if (err) {
- cb(err, null, []);
- return;
- }
+var normalizeWikitext = function(str) {
+ // Ignore leading tabs vs. leading spaces
+ str = str.replace(/^\t/, ' ');
+ str = str.replace(/\n\t/g, '\n ');
+ // Normalize multiple spaces to single space
+ str = str.replace(/ +/g, ' ');
+ // Eliminate spaces around wikitext chars
+ // gwicke: disabled for now- too aggressive IMO
+ // str = str.replace(/([<"'!#\*:;+-=|{}\[\]\/]) /g, "$1");
+ // Ignore capitalization of tags and void tag indications
+ str = str.replace(/<(\/?)([^ >\/]+)((?:[^>\/]|\/(?!>))*)\/?>/g,
+ function(match, close, name, remaining) {
+ return '<' + close + name.toLowerCase() +
+ remaining.replace(/ $/, '') + '>';
+ });
+ // Ignore whitespace in table cell attributes
+ str = str.replace(/(^|\n|\|(?=\|)|!(?=!))(\{\||\|[\-+]*|!) *([^|\n]*?)
*(?=[|\n]|$)/g, '$1$2$3');
+ // Ignore trailing semicolons and spaces in style attributes
+ str = str.replace(/style\s*=\s*"[^"]+"/g, function(match) {
+ return match.replace(/\s|;(?=")/g, '');
+ });
+ // Strip double-quotes
+ str = str.replace(/"([^"]*?)"/g, '$1');
+ // Ignore implicit </small> and </center> in table cells or the end
+ // of the string for now
+ str = str.replace(/(^|\n)<\/(?:small|center)>(?=\n[|!]|\n?$)/g, '');
+ str = str.replace(/([|!].*?)<\/(?:small|center)>(?=\n[|!]|\n?$)/gi,
'$1');
+ return str;
+};
- var normalizeWikitext = function(str) {
- // Ignore leading tabs vs. leading spaces
- str = str.replace(/^\t/, ' ');
- str = str.replace(/\n\t/g, '\n ');
- // Normalize multiple spaces to single space
- str = str.replace(/ +/g, " ");
- // Eliminate spaces around wikitext chars
- // gwicke: disabled for now- too aggressive IMO
- // str = str.replace(/([<"'!#\*:;+-=|{}\[\]\/]) /g, "$1");
- // Ignore capitalization of tags and void tag indications
- str = str.replace(/<(\/?)([^ >\/]+)((?:[^>\/]|\/(?!>))*)\/?>/g,
function(match, close, name, remaining) {
- return '<' + close + name.toLowerCase() +
remaining.replace(/ $/, '') + '>';
- } );
- // Ignore whitespace in table cell attributes
- str = str.replace(/(^|\n|\|(?=\|)|!(?=!))(\{\||\|[\-+]*|!)
*([^|\n]*?) *(?=[|\n]|$)/g, '$1$2$3');
- // Ignore trailing semicolons and spaces in style attributes
- str = str.replace(/style\s*=\s*"[^"]+"/g, function(match) {
- return match.replace(/\s|;(?=")/g, '');
- });
- // Strip double-quotes
- str = str.replace(/"([^"]*?)"/g, "$1");
+// Get diff substrings from offsets
+var formatDiff = function(oldWt, newWt, offset, context) {
+ return [
+ '----',
+ oldWt.substring(offset[0].start - context, offset[0].end +
context),
+ '++++',
+ newWt.substring(offset[1].start - context, offset[1].end +
context),
+ ].join('\n');
+};
- // Ignore implicit </small> and </center> in table cells or the
end
- // of the string for now
- str = str.replace(/(^|\n)<\/(?:small|center)>(?=\n[|!]|\n?$)/g,
'');
- str =
str.replace(/([|!].*?)<\/(?:small|center)>(?=\n[|!]|\n?$)/gi, '$1');
+var checkIfSignificant = function(offsets, data) {
+ var oldWt = data.oldWt;
+ var newWt = data.newWt;
- return str;
- };
-
- // Get diff substrings from offsets
- var formatDiff = function(offset, context) {
- return [
- '----',
- oldWt.substring(offset[0].start - context,
offset[0].end + context),
- '++++',
- newWt.substring(offset[1].start - context,
offset[1].end + context)
- ].join('\n');
- };
-
- var newDOC = domino.createDocument(html);
+ var oldBody = domino.createDocument(data.oldHTML.body).body;
+ var newBody = domino.createDocument(data.newHTML.body).body;
// Merge data-parsoid so that HTML nodes can be compared and diff'ed.
- DU.applyDataParsoid(oldBody.ownerDocument, oldDp.body);
- DU.applyDataParsoid(newDOC, dp.body);
- // console.warn("\nnewDOC:", newDOC)
+ DU.applyDataParsoid(oldBody.ownerDocument, data.oldDp.body);
+ DU.applyDataParsoid(newBody.ownerDocument, data.newDp.body);
var i, k, diff, offset;
- var thisResult;
var results = [];
// Use the full tests for fostered content.
@@ -360,62 +362,53 @@
// If parsoid-normalized HTML for old and new wikitext is
identical,
// the wt-diffs are purely syntactic.
var normalizedOld = DU.normalizeOut(oldBody, true);
- var normalizedNew = DU.normalizeOut(newDOC.body, true);
+ var normalizedNew = DU.normalizeOut(newBody, true);
if (normalizedOld === normalizedNew) {
for (i = 0; i < offsets.length; i++) {
offset = offsets[i];
results.push({
type: 'skip',
offset: offset,
- wtDiff: formatDiff(offset, 0),
+ wtDiff: formatDiff(oldWt, newWt,
offset, 0),
});
}
- cb( null, env, results );
- return;
+ return results;
}
}
- var origOut, newOut, origHTML, newHTML, origOrigHTML, origNewHTML;
+ var origOut, newOut, origHTML, newHTML;
// Now, proceed with full blown diffs
for (i = 0; i < offsets.length; i++) {
- thisResult = {};
- origOrigHTML = '';
- origNewHTML = '';
-
offset = offsets[i];
+ var origOrigHTML = '';
+ var origNewHTML = '';
+ var thisResult = { offset: offset };
- thisResult.offset = offset;
- // console.warn("--processing: " + JSON.stringify(offset));
-
- if (offset[0].start === offset[0].end &&
+ var implicitlyClosed = (offset[0].start === offset[0].end &&
newWt.substr(offset[1].start, offset[1].end -
offset[1].start)
- .match(/^\n?<\/[^>]+>\n?$/)) {
+ .match(/^\n?<\/[^>]+>\n?$/));
+ if (implicitlyClosed) {
// An element was implicitly closed. Fudge the orig
offset
// slightly so it finds the corresponding elements
which have the
// original (unclosed) DSR.
offset[0].start--;
}
- // console.warn("--orig--");
+
var res = findMatchingNodes(oldBody, offset[0] || {},
oldWt.length);
origOut = res ? res.nodes : [];
for (k = 0; k < origOut.length; k++) {
// node need not be an element always!
- origOrigHTML += DU.serializeNode(origOut[k],
{smartQuote: false});
+ origOrigHTML += DU.serializeNode(origOut[k], {
smartQuote: false });
}
origHTML = DU.formatHTML(DU.normalizeOut(origOrigHTML));
- // console.warn("# nodes: " + origOut.length);
- // console.warn("html: " + origHTML);
- // console.warn("--new--");
- res = findMatchingNodes(newDOC.body, offset[1] || {},
newWt.length);
+ res = findMatchingNodes(newBody, offset[1] || {}, newWt.length);
newOut = res ? res.nodes : [];
for (k = 0; k < newOut.length; k++) {
// node need not be an element always!
- origNewHTML += DU.serializeNode(newOut[k], {smartQuote:
false});
+ origNewHTML += DU.serializeNode(newOut[k], {
smartQuote: false });
}
newHTML = DU.formatHTML(DU.normalizeOut(origNewHTML));
- // console.warn("# nodes: " + newOut.length);
- // console.warn("html: " + newHTML);
// compute wt diffs
var wt1 = oldWt.substring(offset[0].start, offset[0].end);
@@ -425,347 +418,291 @@
diff = Diff.htmlDiff(origHTML, newHTML, false, true, true);
// No context by default
- thisResult.wtDiff = formatDiff(offset, 0);
+ thisResult.wtDiff = formatDiff(oldWt, newWt, offset, 0);
// Normalize wts to check if we really have a semantic diff
thisResult.type = 'skip';
if (diff.length > 0) {
- var normWT1 = normalizeWikitext(wt1),
- normWT2 = normalizeWikitext(wt2);
-
+ var normWT1 = normalizeWikitext(wt1);
+ var normWT2 = normalizeWikitext(wt2);
if (normWT1 !== normWT2) {
- // console.log( 'normDiff: =======\n' + normWT1
+ '\n--------\n' + normWT2);
thisResult.htmlDiff = diff;
thisResult.type = 'fail';
// Provide context for semantic diffs
- thisResult.wtDiff = formatDiff(offset, 25);
+ thisResult.wtDiff = formatDiff(oldWt, newWt,
offset, 25);
}
}
results.push(thisResult);
}
- cb(null, env, results);
+ return results;
};
-var parsoidPost = function(env, uri, domain, title, text, dp, oldid,
- recordSizes, profilePrefix, cb) {
- var data = {};
+function parsoidPost(env, options, cb) {
+ var title = encodeURIComponent(options.title);
+
+ var uri = options.uri;
// make sure the Parsoid URI ends on /
- if ( !/\/$/.test(uri) ) {
+ if (!/\/$/.test(uri)) {
uri += '/';
}
- uri += 'v2/' + domain + '/';
- title = encodeURIComponent(title);
+ uri += 'v2/' + options.domain + '/';
- if ( oldid ) {
- // We want html2wt
- uri += 'wt/' + title + '/' + oldid;
- data.html = {
- body: text
- };
- data.original = {
- 'data-parsoid': dp
- };
- } else {
- // We want wt2html
+ if (options.html2wt) {
+ uri += 'wt/' + title + '/' + options.oldid;
+ } else { // wt2html
uri += 'pagebundle/' + title;
- data.wikitext = text;
}
- var options = {
+ var httpOptions = {
uri: uri,
method: 'POST',
json: true,
- body: data
+ body: options.data,
};
- Util.retryingHTTPRequest( 10, options, function( err, res, body ) {
- if (err) {
- cb( err, null );
- } else if (res.statusCode !== 200) {
- err = new Error('Got status code: ' + res.statusCode);
- cb(err, null);
- } else {
- var resBody, resDP;
- if (oldid) {
- // Extract the wikitext from the response
- resBody = body.wikitext.body;
- } else {
- resBody = body.html.body;
- resDP = body['data-parsoid'];
+ return new Promise(function(resolve, reject) {
+ // TODO: convert Util.retryingHTTPRequest to a promise
returning func
+ Util.retryingHTTPRequest(10, httpOptions, function(err, res,
body) {
+ if (!err && res.statusCode !== 200) {
+ err = new Error('Got status code: ' +
res.statusCode);
}
- if ( env.profile ) {
- if (!profilePrefix) {
- profilePrefix = '';
+ if (err) { return reject(err); }
+
+ // FIXME: Parse time was removed from profiling when we
stopped
+ // sending the x-parsoid-performance header.
+ if (options.recordSizes) {
+ var prefix = '';
+ if (options.profilePrefix) {
+ prefix += options.profilePrefix + ':';
}
- // FIXME: Parse time was removed from profiling
when we stopped
- // sending the x-parsoid-performance header.
- if (recordSizes) {
- // Record the sizes
- var sizePrefix = profilePrefix + (oldid
? 'wt' : 'html');
- env.profile.size[ sizePrefix + 'raw' ] =
- resBody.length;
- // Compress to record the gzipped size
- zlib.gzip( resBody, function( err,
gzippedbuf ) {
- if ( !err ) {
- env.profile.size[
sizePrefix + 'gzip' ] =
-
gzippedbuf.length;
- }
- cb( null, resBody, resDP );
- } );
+ var str;
+ if (options.html2wt) {
+ prefix += 'html:';
+ str = body.wikitext.body;
} else {
- cb(null, resBody, resDP);
+ prefix += 'wt:';
+ str = body.html.body;
}
+ env.profile.size[prefix + 'raw'] = str.length;
+ // Compress to record the gzipped size
+ zlib.gzip(str, function(err, gzippedbuf) {
+ if (err) { return reject(err); }
+ env.profile.size[prefix + 'gzip'] =
gzippedbuf.length;
+ resolve(body);
+ });
} else {
- cb( null, resBody, resDP );
+ resolve(body);
}
- }
- } );
-};
+ });
+ }).nodify(cb);
+}
-var doubleRoundtripDiff = function(env, uri, domain, title, offsets, src,
body, dp, out, cb) {
- if ( offsets.length > 0 ) {
- env.setPageSrcInfo( out );
- env.errCB = function( error ) {
- cb( error, env, [] );
- process.exit( 1 );
- };
+function roundTripDiff(env, parsoidOptions, data) {
+ var diff = Diff.diffLines(data.newWt, data.oldWt);
+ var offsets = Diff.convertDiffToOffsetPairs(diff);
+ if (!diff.length || !offsets.length) { return []; }
- parsoidPost(env, uri, domain, title, out, null, null, false,
null,
- checkIfSignificant.bind(null, env, offsets, src, body,
dp, out, cb));
-
- } else {
- cb( null, env, [] );
- }
-};
-
-var roundTripDiff = function( env, uri, domain, title, src, html, dp, out, cb
) {
- var diff, offsetPairs;
-
- try {
- diff = Diff.diffLines(out, src);
- offsetPairs = Diff.convertDiffToOffsetPairs(diff);
-
- if ( diff.length > 0 ) {
- var body = domino.createDocument( html ).body;
- doubleRoundtripDiff( env, uri, domain, title,
offsetPairs, src, body, dp, out, cb );
- } else {
- cb( null, env, [] );
- }
- } catch ( e ) {
- cb( e, env, [] );
- }
-};
-
-var selserRoundTripDiff = function(env, uri, domain, title, html, dp, out,
diffs, cb) {
- var selserDiff, offsetPairs,
- src = env.page.src.replace(/\n(?=\n)/g, '\n ');
- // Remove the selser trigger comment
- out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, '');
- out = out.replace(/\n(?=\n)/g, '\n ');
-
- roundTripDiff(env, uri, domain, title, src, html, dp, out,
function(err, env, selserDiffs) {
- if (err) {
- cb(err, env, diffs);
- } else {
- for (var sD in selserDiffs) {
- selserDiffs[sD].selser = true;
- }
- if (selserDiffs.length) {
- diffs = diffs.concat(selserDiffs);
- }
- cb(null, env, diffs);
- }
+ var options = Object.assign({
+ wt2html: true,
+ data: { wikitext: data.newWt },
+ }, parsoidOptions);
+ return parsoidPost(env, options).then(function(body) {
+ data.newHTML = body.html;
+ data.newDp = body['data-parsoid'];
+ return checkIfSignificant(offsets, data);
});
-};
+}
-// Returns a Promise for an { env, rtDiffs } object. `cb` is optional.
-var fetch = function( page, options, cb ) {
- cb = JSUtils.mkPromised( cb, [ 'env', 'rtDiffs' ] );
- var domain, prefix, apiURL,
- // options are ParsoidConfig options if module.parent,
otherwise they
- // are CLI options (so use the Util.set* helpers to process
them)
- parsoidConfig = new ParsoidConfig( module.parent ? options :
null );
+// Returns a Promise for a formatted string. `cb` is optional.
+function fetch(title, options, formatter, cb) {
+ // options are ParsoidConfig options if module.parent, otherwise they
+ // are CLI options (so use the Util.set* helpers to process them)
+ var parsoidConfig = new ParsoidConfig(module.parent ? options : null);
if (!module.parent) {
// only process CLI flags if we're running as a CLI program.
- Util.setTemplatingAndProcessingFlags( parsoidConfig, options );
- Util.setDebuggingFlags( parsoidConfig, options );
+ Util.setTemplatingAndProcessingFlags(parsoidConfig, options);
+ Util.setDebuggingFlags(parsoidConfig, options);
}
-
- if ( options.apiURL ) {
+ if (options.apiURL) {
parsoidConfig.setInterwiki(options.prefix || 'localhost',
options.apiURL);
}
+ var err, domain, prefix;
if (options.prefix) {
// If prefix is present, use that.
prefix = options.prefix;
// Get the domain from the interwiki map.
- apiURL = parsoidConfig.interwikiMap.get(prefix);
+ var apiURL = parsoidConfig.interwikiMap.get(prefix);
if (!apiURL) {
- cb("Couldn't find the domain for prefix " + prefix,
null, []);
+ err = new Error('Couldn\'t find the domain for prefix '
+ prefix);
}
domain = url.parse(apiURL).hostname;
} else if (options.domain) {
domain = options.domain;
prefix = parsoidConfig.reverseIWMap.get(domain);
- }
-
- var envCb = function( err, env ) {
- env.errCB = function( error ) {
- cb( error, env, [] );
- };
- if ( err !== null ) {
- env.errCB( err );
- return;
- }
- env.profile = { time: { total: 0, total_timer: new Date() },
size: {} };
-
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
- var tpr = new TemplateRequest( env, target, null );
-
- tpr.once( 'src', function( err, src_and_metadata ) {
- if ( err ) {
- cb( err, env, [] );
- } else {
- // Shortcut for calling parsoidPost with common
options
- var parsoidPostShort = function(postBody,
postDp, postOldId,
- postRecordSizes,
postProfilePrefix, postCb) {
- parsoidPost(env, options.parsoidURL,
domain, page,
- postBody, postDp, postOldId,
postRecordSizes, postProfilePrefix,
- function(err, postResult,
postResultDp) {
- if (err) {
- cb(err, env,
[]);
- } else {
-
postCb(postResult, postResultDp);
- }
- });
- };
-
- // Once we have the diffs between the
round-tripped wt,
- // to test rt selser we need to modify the HTML
and request
- // the wt again to compare with selser, and
then concat the
- // resulting diffs to the ones we got from
basic rt
- var rtSelserTest = function(origHTMLBody,
origDp, err, env, rtDiffs) {
- if (err) {
- cb(err, env, rtDiffs);
- } else {
- var newDocument =
DU.parseHTML(origHTMLBody),
- newNode =
newDocument.createComment('rtSelserEditTestComment');
-
newDocument.body.appendChild(newNode);
-
parsoidPostShort(newDocument.outerHTML, origDp,
-
src_and_metadata.revision.revid, false, 'selser',
- function(wtSelserBody) {
- // Finish the
total time now
- if (
env.profile && env.profile.time ) {
-
env.profile.time.total += new Date() - env.profile.time.total_timer;
- }
-
-
selserRoundTripDiff(env, options.parsoidURL,
- domain,
page, origHTMLBody, origDp, wtSelserBody,
-
rtDiffs, cb);
- });
- }
- };
-
- env.setPageSrcInfo(src_and_metadata);
- // First, fetch the HTML for the requested
page's wikitext
- parsoidPostShort(env.page.src, null, null,
true, null, function(htmlBody, htmlDp) {
- // Now, request the wikitext for the
obtained HTML
- // (without sending data-parsoid, as we
don't want selser yet).
- parsoidPostShort(htmlBody, htmlDp,
-
src_and_metadata.revision.revid, true, null,
- function(wtBody) {
- roundTripDiff(env,
options.parsoidURL, domain, page,
- env.page.src,
htmlBody, htmlDp, wtBody,
-
rtSelserTest.bind(null, htmlBody, htmlDp));
- });
- });
- }
- } );
- };
-
- MWParserEnvironment.getParserEnv( parsoidConfig, null, { prefix:
prefix, pageName: page }, envCb );
- return cb.promise;
-};
-
-var cbCombinator = function( formatter, cb, err, env, text ) {
- cb( err, formatter( env, err, text ) );
-};
-
-var consoleOut = function( err, output ) {
- if ( err ) {
- console.log( 'ERROR: ' + err);
- if (err.stack) {
- console.log( 'Stack trace: ' + err.stack);
- }
- process.exit( 1 );
} else {
- console.log( output );
- process.exit( 0 );
+ err = new Error('No domain or prefix provided.');
}
-};
+ var env;
+ var closeFormatter = function(err, results) {
+ return formatter(err, prefix, title, results, env &&
env.profile);
+ };
+ var parsoidOptions = {
+ uri: options.parsoidURL,
+ domain: domain,
+ title: title,
+ };
+ var data = {};
+ return Promise[err ? 'reject' : 'resolve'](err).then(function() {
+ return MWParserEnvironment.getParserEnv(
+ parsoidConfig, null, { prefix: prefix, pageName: title }
+ );
+ }).then(function(_env) {
+ env = _env;
+ env.profile = { time: { total: 0, start: Date.now() }, size: {}
};
+ var target =
env.resolveTitle(env.normalizeTitle(env.page.name), '');
+ return TemplateRequest.setPageSrcInfo(env, target, null);
+ }).then(function() {
+ data.oldWt = env.page.src;
+ // First, fetch the HTML for the requested page's wikitext
+ var options = Object.assign({
+ wt2html: true,
+ recordSizes: true,
+ data: { wikitext: data.oldWt },
+ }, parsoidOptions);
+ return parsoidPost(env, options);
+ }).then(function(body) {
+ data.oldHTML = body.html;
+ data.oldDp = body['data-parsoid'];
+ // Now, request the wikitext for the obtained HTML
+ var options = Object.assign({
+ html2wt: true,
+ recordSizes: true,
+ oldid: env.page.meta.revision.revid,
+ data: {
+ html: data.oldHTML,
+ original: { 'data-parsoid': data.oldDp },
+ },
+ }, parsoidOptions);
+ return parsoidPost(env, options);
+ }).then(function(body) {
+ data.newWt = body.wikitext.body;
+ return roundTripDiff(env, parsoidOptions, data);
+ }).then(function(results) {
+ data.diffs = results;
+ // Once we have the diffs between the round-tripped wt,
+ // to test rt selser we need to modify the HTML and request
+ // the wt again to compare with selser, and then concat the
+ // resulting diffs to the ones we got from basic rt
+ var newDocument = DU.parseHTML(data.oldHTML.body);
+ var newNode =
newDocument.createComment('rtSelserEditTestComment');
+ newDocument.body.appendChild(newNode);
+ var options = Object.assign({
+ html2wt: true,
+ oldid: env.page.meta.revision.revid,
+ data: {
+ html: newDocument.outerHTML,
+ original: { 'data-parsoid': data.oldDp },
+ },
+ profilePrefix: 'selser',
+ }, parsoidOptions);
+ return parsoidPost(env, options);
+ }).then(function(body) {
+ var out = body.wikitext.body;
-if ( typeof module === 'object' ) {
- module.exports.fetch = fetch;
- module.exports.plainFormat = plainCallback;
- module.exports.xmlFormat = xmlCallback;
- module.exports.cbCombinator = cbCombinator;
+ // Finish the total time now
+ // FIXME: Is the right place to end it?
+ if (env.profile && env.profile.time) {
+ env.profile.time.total = Date.now() -
env.profile.time.start;
+ }
+
+ // FIXME: I guess so? This needs a comment. First we're diff'ing
+ // the old and new wt's. Now we're diff'ing the new and
supposedly
+ // selser'd wt's. Meanwhile, the serializer never seems to be
invoked
+ // in selserMode.
+ data.oldWt = data.newWt;
+
+ // Remove the selser trigger comment
+ out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, '');
+ data.newWt = out;
+
+ // FIXME: not sure about this stuff?
+ data.oldWt = data.oldWt.replace(/\n(?=\n)/g, '\n ');
+ data.newWt = data.newWt.replace(/\n(?=\n)/g, '\n ');
+ return roundTripDiff(env, parsoidOptions, data);
+ }).then(function(selserDiffs) {
+ selserDiffs.forEach(function(diff) {
+ diff.selser = true;
+ });
+ if (selserDiffs.length) {
+ data.diffs = data.diffs.concat(selserDiffs);
+ }
+ return data.diffs;
+ }).then(
+ closeFormatter.bind(null, null),
+ closeFormatter
+ ).nodify(cb);
}
-if ( !module.parent ) {
- var standardOpts = Util.addStandardOptions({
- 'xml': {
+
+if (require.main === module) {
+ var options = Util.addStandardOptions({
+ xml: {
description: 'Use xml callback',
- 'boolean': true,
- 'default': false
+ boolean: true,
+ default: false,
},
- 'prefix': {
- description: 'Which wiki prefix to use; e.g. "enwiki"
for English wikipedia, "eswiki" for Spanish, "mediawikiwiki" for mediawiki.org',
- 'default': ''
+ prefix: {
+ description: 'Which wiki prefix to use; e.g. "enwiki"
for ' +
+ 'English wikipedia, "eswiki" for Spanish,
"mediawikiwiki" ' +
+ 'for mediawiki.org',
+ default: '',
},
- 'domain': {
- description: 'Which wiki to use; e.g.
"en.wikipedia.org" for English wikipedia',
- 'default': 'en.wikipedia.org'
+ domain: {
+ description: 'Which wiki to use; e.g.
"en.wikipedia.org" for' +
+ ' English wikipedia',
+ default: 'en.wikipedia.org',
},
- 'parsoidURL': {
+ parsoidURL: {
description: 'The URL for the Parsoid API',
- }
+ },
}, {
// defaults for standard options
- rtTestMode: true // suppress noise by default
+ rtTestMode: true, // suppress noise by default
});
var opts = yargs.usage(
- 'Usage: $0 [options] <page-title> \n\n',
- standardOpts
- ).check(Util.checkUnknownArgs.bind(null, standardOpts));
+ 'Usage: $0 [options] <page-title> \n\n', options
+ ).check(Util.checkUnknownArgs.bind(null, options));
- var callback;
var argv = opts.argv;
var title = argv._[0];
- if ( title ) {
- callback = cbCombinator.bind( null,
- Util.booleanOption( argv.xml ) ?
- xmlCallback : plainCallback, consoleOut
- );
- if ( !argv.parsoidURL ) {
- // Start our own Parsoid server
- // TODO: This will not be necessary once we have a
top-level testing
- // script that takes care of setting everything up.
- var apiServer = require( './apiServer.js' ),
- parsoidOptions = {quiet: true};
- if (opts.apiURL) {
- parsoidOptions.mockUrl = opts.apiURL;
- }
-
apiServer.startParsoidServer(parsoidOptions).then(function( ret ) {
- argv.parsoidURL = ret.url;
- fetch( title, argv, callback );
- } ).done();
- apiServer.exitOnProcessTerm();
- } else {
- fetch( title, argv, callback );
- }
- } else {
- opts.showHelp();
+ if (!title) {
+ return opts.showHelp();
}
+ Promise.resolve().then(function() {
+ if (argv.parsoidURL) { return; }
+ // Start our own Parsoid server
+ // TODO: This will not be necessary once we have a top-level
testing
+ // script that takes care of setting everything up.
+ var apiServer = require('./apiServer.js');
+ var parsoidOptions = { quiet: true };
+ if (opts.apiURL) {
+ parsoidOptions.mockUrl = opts.apiURL;
+ }
+ apiServer.exitOnProcessTerm();
+ return
apiServer.startParsoidServer(parsoidOptions).then(function(ret) {
+ argv.parsoidURL = ret.url;
+ });
+ }).then(function() {
+ var formatter = Util.booleanOption(argv.xml) ? xmlFormat :
plainFormat;
+ return fetch(title, argv, formatter);
+ }).then(function(output) {
+ console.log(output);
+ process.exit(0);
+ }).done();
+} else if (typeof module === 'object') {
+ module.exports.fetch = fetch;
+ module.exports.xmlFormat = xmlFormat;
}
--
To view, visit https://gerrit.wikimedia.org/r/202674
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I646f8a916add26ba60171ed31d1593c6bb6b63c5
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Marcoil <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits