Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/91321


Change subject: Public API for page-less html2wt / wt2html
......................................................................

Public API for page-less html2wt / wt2html

Cleans up the existing POST api and adds two new paths, _wt2html and
_html2wt.

Bug: 55758
Change-Id: I7ce70b98be802ba0894826629b1a21a083a4fde3
---
M js/api/ParserService.js
1 file changed, 55 insertions(+), 37 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/21/91321/1

diff --git a/js/api/ParserService.js b/js/api/ParserService.js
index 6a1cfbe..281c974 100644
--- a/js/api/ParserService.js
+++ b/js/api/ParserService.js
@@ -440,28 +440,41 @@
        getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, 
req );
 } );
 
-app.post(/\/_html\/(.*)/, function ( req, res ) {
+app.post( /\/_html\/(.*)/, html2wt );
+app.post( /\/_html2wt\/(.*)/, html2wt );
+
+function html2wt( req, res ) {
        var cb = function ( env ) {
-               res.setHeader('Content-Type', 'text/html; charset=UTF-8');
-               var doc = DU.parseHTML(req.body.content.replace(/\r/g, ''));
-               res.write('<pre style="background-color: #efefef">');
+               var doc;
+               try {
+                       doc = DU.parseHTML( req.body.content.replace( /\r/g, '' 
) );
+               } catch ( e ) {
+                       console.log( 'There was an error in the HTML5 parser.' 
);
+                       env.errCB( e );
+                       return;
+               }
+
                // Always use the non-selective serializer for this mode
-               new WikitextSerializer({env: env}).serializeDOM(
-                       doc.body,
-                       function( c ) {
-                               res.write( htmlSpecialChars( c ) );
-                       },
-                       function() {
-                               res.write('</pre>');
-                               res.write( "<hr>Your HTML DOM:" );
-                               textarea( res, req.body.content.replace(/\r/g, 
'') );
-                               res.end('');
-                       }
+               var out = [];
+               try {
+                       new WikitextSerializer({ env: env }).serializeDOM(
+                               doc.body,
+                               function( chunk ) {
+                                       out.push( chunk );
+                               },
+                               function() {
+                                       res.setHeader( 'Content-Type', 
'text/x-mediawiki; charset=UTF-8' );
+                                       res.setHeader( 'X-Parsoid-Performance', 
env.getPerformanceHeader() );
+                                       res.end( out.join('') );
+                               }
                        );
+               } catch ( e ) {
+                       env.errCB( e );
+               }
        };
 
        getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, 
req );
-} );
+}
 
 // Form-based wikitext -> HTML DOM interface for manual testing
 app.get(/\/_wikitext\/(.*)/, function ( req, res ) {
@@ -475,33 +488,38 @@
        getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, 
req );
 } );
 
-app.post(/\/_wikitext\/(.*)/, function ( req, res ) {
+app.post( /\/_wikitext\/(.*)/, wt2html );
+app.post( /\/_wt2html\/(.*)/, wt2html );
+
+function wt2html( req, res ) {
        var cb = function ( env ) {
-               res.setHeader('Content-Type', 'text/html; charset=UTF-8');
-               var parser = Util.getParserPipeline(env, 
'text/x-mediawiki/full'),
-                       src = req.body.content.replace(/\r/g, '');
-               parser.on('document', function ( document ) {
-                       if (req.body.format==='html') {
-                               res.write(DU.serializeNode(document));
-                       } else {
-                               res.write('<pre style="white-space: pre-wrap; 
white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap; 
word-wrap: break-word;">');
-                               
res.write(htmlSpecialChars(DU.serializeNode(document.body)));
-                               res.write('</pre>');
-                               res.write('<hr/>');
-                               res.write(document.body.innerHTML);
-                               res.write('<hr style="clear:both;"/>Your 
wikitext:');
-                               textarea( res, src );
-                       }
-                       res.end('');
-               });
-               if (env.conf.parsoid.allowCORS) {
+               var src = req.body.content.replace( /\r/g, '' );
+               console.log( 'starting parsing of ' + src );
+
+               if ( env.conf.parsoid.allowCORS ) {
                        // allow cross-domain requests (CORS) so that parsoid 
service
                        // can be used by third-party sites
                        res.setHeader('Access-Control-Allow-Origin',
                                                  env.conf.parsoid.allowCORS);
                }
+
+               var parser = Util.getParserPipeline( env, 
'text/x-mediawiki/full' );
+               parser.on('document', function ( document ) {
+                       var out;
+                       if ( req.body.format === 'html' ) {
+                               out = DU.serializeNode( document );
+                       } else {
+                               out = DU.serializeNode( document.body );
+                       }
+
+                       res.setHeader( 'Content-Type', 'text/html; 
charset=UTF-8' );
+                       res.setHeader( 'X-Parsoid-Performance', 
env.getPerformanceHeader() );
+                       res.end( out );
+
+                       console.warn( "completed parsing of " + src + " in " + 
env.performance.duration + " ms" );
+               });
+
                try {
-                       console.log('starting parsing of ' + req.params[0]);
                        // FIXME: This does not handle includes or templates 
correctly
                        env.setPageSrcInfo( src );
                        parser.processToplevelDoc( src );
@@ -513,7 +531,7 @@
        };
 
        getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, 
req );
-} );
+}
 
 // Round-trip article testing
 app.get( new RegExp('/_rt/(' + getInterwikiRE() + ')/(.*)'), function(req, 
res) {

-- 
To view, visit https://gerrit.wikimedia.org/r/91321
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7ce70b98be802ba0894826629b1a21a083a4fde3
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to