parser

gwicke Thu, 08 Mar 2012 01:00:51 -0800

https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113351


Revision: 113351
Author:   gwicke
Date:     2012-03-08 09:00:45 +0000 (Thu, 08 Mar 2012)
Log Message:
-----------
A bit more documentation and naming cleanup in the tokenizer wrapper.

Modified Paths:
--------------
    trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js
    trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js

Modified: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js        
2012-03-08 08:49:42 UTC (rev 113350)
+++ trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js        
2012-03-08 09:00:45 UTC (rev 113351)
@@ -249,10 +249,10 @@
        //console.warn('extlink href: ' + href );
        //console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
        // validate the href
-       if ( this.imageParser.parseURL( href ) ) {
+       if ( this.imageParser.tokenizeURL( href ) ) {
                if ( content.length === 1 && 
                                content[0].constructor === String &&
-                               this.imageParser.parseURL( content[0] ) &&
+                               this.imageParser.tokenizeURL( content[0] ) &&
                                this._isImageLink( content[0] ) )
                {
                        var src = content[0];

Modified: 
trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js     
2012-03-08 08:49:42 UTC (rev 113350)
+++ trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js     
2012-03-08 09:00:45 UTC (rev 113351)
@@ -32,16 +32,31 @@
  */
 PegTokenizer.prototype.process = function( text ) {
        var out, err;
-       if ( !this.parser ) {
+       if ( !this.tokenizer ) {
+               // Construct a singleton static tokenizer.
                var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' 
);
                this.src = fs.readFileSync( pegSrcPath, 'utf8' );
-               // Only create a single parser, as parse() is a static method.
-               var parserSource = PEG.buildParser(this.src).toSource();
-               //console.warn( parserSource );
-               parserSource = parserSource.replace( 'parse: function(input, 
startRule) {',
+               var tokenizerSource = PEG.buildParser(this.src).toSource();
+
+               /* We patch the generated source to assign the arguments array 
for the
+               * parse function to a function-scoped variable. We use this to 
pass
+               * in callbacks and other information, which can be used from 
actions
+               * run when matching a production. In particular, we pass in a
+               * callback called for a chunk of tokens in toplevelblock. 
Setting this
+               * callback per call to parse() keeps the tokenizer reentrant, 
so that it
+               * can be reused to expand templates while a main parse is 
ongoing.
+               * PEG tokenizer construction is very expensive, so having a 
single
+               * reentrant tokenizer is a big win.
+               *
+               * We could also make modules available to the tokenizer by 
prepending
+               * requires to the source.
+               */
+               tokenizerSource = tokenizerSource.replace( 'parse: 
function(input, startRule) {',
                                        'parse: function(input, startRule) { 
var __parseArgs = arguments;' );
-               //console.warn( parserSource );
-               PegTokenizer.prototype.parser = eval( parserSource );
+               //console.warn( tokenizerSource );
+               PegTokenizer.prototype.tokenizer = eval( tokenizerSource );
+               // alias the parse method
+               this.tokenizer.tokenize = this.tokenizer.parse;
        }
 
        // Some input normalization: force a trailing newline
@@ -52,7 +67,7 @@
        // XXX: Commented out exception handling during development to get
        // reasonable traces.
        //try {
-               this.parser.parse(text, 'start', 
+               this.tokenizer.tokenize(text, 'start', 
                                // callback
                                this.emit.bind( this, 'chunk' ),
                                // inline break test
@@ -68,12 +83,15 @@
 };
 
 PegTokenizer.prototype.processImageOptions = function( text ) {
-               return this.parser.parse(text, 'img_options', null, this );
+               return this.tokenizer.tokenize(text, 'img_options', null, this 
);
 };
 
-PegTokenizer.prototype.parseURL = function( text ) {
+/**
+ * Tokenize a URL
+ */
+PegTokenizer.prototype.tokenizeURL = function( text ) {
        try {
-               return this.parser.parse(text, 'url', null, this );
+               return this.tokenizer.tokenize(text, 'url', null, this );
        } catch ( e ) {
                return false;
        }


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [113351] trunk/extensions/VisualEditor/modules/parser

Reply via email to