Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/91321
Change subject: Public API for page-less html2wt / wt2html ...................................................................... Public API for page-less html2wt / wt2html Cleans up the existing POST api and adds two new paths, _wt2html and _html2wt. Bug: 55758 Change-Id: I7ce70b98be802ba0894826629b1a21a083a4fde3 --- M js/api/ParserService.js 1 file changed, 55 insertions(+), 37 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid refs/changes/21/91321/1 diff --git a/js/api/ParserService.js b/js/api/ParserService.js index 6a1cfbe..281c974 100644 --- a/js/api/ParserService.js +++ b/js/api/ParserService.js @@ -440,28 +440,41 @@ getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); } ); -app.post(/\/_html\/(.*)/, function ( req, res ) { +app.post( /\/_html\/(.*)/, html2wt ); +app.post( /\/_html2wt\/(.*)/, html2wt ); + +function html2wt( req, res ) { var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - var doc = DU.parseHTML(req.body.content.replace(/\r/g, '')); - res.write('<pre style="background-color: #efefef">'); + var doc; + try { + doc = DU.parseHTML( req.body.content.replace( /\r/g, '' ) ); + } catch ( e ) { + console.log( 'There was an error in the HTML5 parser.' ); + env.errCB( e ); + return; + } + // Always use the non-selective serializer for this mode - new WikitextSerializer({env: env}).serializeDOM( - doc.body, - function( c ) { - res.write( htmlSpecialChars( c ) ); - }, - function() { - res.write('</pre>'); - res.write( "<hr>Your HTML DOM:" ); - textarea( res, req.body.content.replace(/\r/g, '') ); - res.end(''); - } + var out = []; + try { + new WikitextSerializer({ env: env }).serializeDOM( + doc.body, + function( chunk ) { + out.push( chunk ); + }, + function() { + res.setHeader( 'Content-Type', 'text/x-mediawiki; charset=UTF-8' ); + res.setHeader( 'X-Parsoid-Performance', env.getPerformanceHeader() ); + res.end( out.join('') ); + } ); + } catch ( e ) { + env.errCB( e ); + } }; getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); +} // Form-based wikitext -> HTML DOM interface for manual testing app.get(/\/_wikitext\/(.*)/, function ( req, res ) { @@ -475,33 +488,38 @@ getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); } ); -app.post(/\/_wikitext\/(.*)/, function ( req, res ) { +app.post( /\/_wikitext\/(.*)/, wt2html ); +app.post( /\/_wt2html\/(.*)/, wt2html ); + +function wt2html( req, res ) { var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - var parser = Util.getParserPipeline(env, 'text/x-mediawiki/full'), - src = req.body.content.replace(/\r/g, ''); - parser.on('document', function ( document ) { - if (req.body.format==='html') { - res.write(DU.serializeNode(document)); - } else { - res.write('<pre style="white-space: pre-wrap; white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap; word-wrap: break-word;">'); - res.write(htmlSpecialChars(DU.serializeNode(document.body))); - res.write('</pre>'); - res.write('<hr/>'); - res.write(document.body.innerHTML); - res.write('<hr style="clear:both;"/>Your wikitext:'); - textarea( res, src ); - } - res.end(''); - }); - if (env.conf.parsoid.allowCORS) { + var src = req.body.content.replace( /\r/g, '' ); + console.log( 'starting parsing of ' + src ); + + if ( env.conf.parsoid.allowCORS ) { // allow cross-domain requests (CORS) so that parsoid service // can be used by third-party sites res.setHeader('Access-Control-Allow-Origin', env.conf.parsoid.allowCORS); } + + var parser = Util.getParserPipeline( env, 'text/x-mediawiki/full' ); + parser.on('document', function ( document ) { + var out; + if ( req.body.format === 'html' ) { + out = DU.serializeNode( document ); + } else { + out = DU.serializeNode( document.body ); + } + + res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' ); + res.setHeader( 'X-Parsoid-Performance', env.getPerformanceHeader() ); + res.end( out ); + + console.warn( "completed parsing of " + src + " in " + env.performance.duration + " ms" ); + }); + try { - console.log('starting parsing of ' + req.params[0]); // FIXME: This does not handle includes or templates correctly env.setPageSrcInfo( src ); parser.processToplevelDoc( src ); @@ -513,7 +531,7 @@ }; getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); +} // Round-trip article testing app.get( new RegExp('/_rt/(' + getInterwikiRE() + ')/(.*)'), function(req, res) { -- To view, visit https://gerrit.wikimedia.org/r/91321 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7ce70b98be802ba0894826629b1a21a083a4fde3 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits