https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113639

Revision: 113639
Author:   gwicke
Date:     2012-03-12 17:31:45 +0000 (Mon, 12 Mar 2012)
Log Message:
-----------
Improved template tokenization. The parser can now template-expand
[[:en:Barack Obama]] without exceeding 1.7GB of memory (which is the node
limit).

Modified Paths:
--------------
    trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
    trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
    trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
    trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
    trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt

Modified: 
trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js    
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js    
2012-03-12 17:31:45 UTC (rev 113639)
@@ -429,7 +429,7 @@
        return [ "MediaWiki" ];
 };
 ParserFunctions.prototype['pf_anchorencode'] = function ( target, argList, 
argDict ) {
-       return [target];
+       return [ target.trim() ];
 };
 ParserFunctions.prototype['pf_protectionlevel'] = function ( target, argList, 
argDict ) {
        return [''];

Modified: 
trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js    
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js    
2012-03-12 17:31:45 UTC (rev 113639)
@@ -168,9 +168,9 @@
        var prefix = target.split(':', 1)[0].toLowerCase().trim();
        if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
                var funcArg = target.substr( prefix.length + 1 );
-               this.manager.env.tp( 'func prefix: ', prefix,
-                               ' args=', tplExpandData.expandedArgs,
-                               ' funcArg=', funcArg);
+               this.manager.env.tp( 'func prefix/args: ', prefix,
+                               tplExpandData.expandedArgs,
+                               'funcArg:', funcArg);
                //this.manager.env.dp( 'entering prefix', funcArg, args  );
                res = this.parserFunctions[ 'pf_' + prefix ]( funcArg, 
                                tplExpandData.expandedArgs, args, 
tplExpandData.origToken.attribs );

Modified: 
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
===================================================================
--- 
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js    
    2012-03-12 17:27:47 UTC (rev 113638)
+++ 
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js    
    2012-03-12 17:31:45 UTC (rev 113639)
@@ -177,8 +177,12 @@
                forceNS = '';
        }
 
+
        name = name.trim().replace(/[\s_]+/g, '_');
 
+       // XXX: strip subst for now..
+       name = name.replace( /^subst:/, '' );
+
        // Implement int: as alias for MediaWiki:
        if ( name.substr( 0, 4 ) === 'int:' ) {
                name = 'MediaWiki:' + name.substr( 4 );

Modified: 
trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js     
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js     
2012-03-12 17:31:45 UTC (rev 113639)
@@ -119,6 +119,12 @@
                                        counters.tableCellArg
                                  ) 
                                ) || null;
+               case '{':
+                       return (
+                                       counters.pipe ||
+                                       counters.template                       
           
+                               ) && input.substr( pos, 5 ) === '{{!}}' 
+                               || null;
                case "!":
                        return counters.table && input[pos + 1] === "!" ||
                                null;

Modified: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt 
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt 
2012-03-12 17:31:45 UTC (rev 113639)
@@ -505,7 +505,7 @@
  * in nested inline productions.
  */
 inline_breaks
-  = & [=|!}:\r\n\]<]
+  = & [=|!}{:\r\n\]<]
     & { // Important hack: disable caching for this production, as the default
         // cache key does not take into account flag states!
         cacheKey = ''; 
@@ -515,7 +515,7 @@
 
 inline
   = c:(urltext / (! inline_breaks (inline_element / . )))+ {
-      //console.warn('inline out:' + pp(out));
+      //console.warn('inline out:' + pp(c));
       return flatten_stringlist( c );
 }
 
@@ -708,12 +708,19 @@
  * 6: {{{{{{·}}}}}} → {{{·{{{·}}}·}}}
  * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·}
  */
-tplarg_or_template = & '{{{{{' template / tplarg / template
+tplarg_or_template 
+    = 
+    ! '{{{{{{{' (
+        &'{{{{{{' tplarg
+        / & '{{{{{' template 
+        / tplarg 
+        / template
+    )
 
 template
   = "{{" (newline / space)* target:template_param_text
-    params:(( newline / space )* "|"
-                r:( &"|" { return new KV( '', '') } // empty argument
+    params:(( newline / space )* pipe
+                r:( &pipe { return new KV( '', '') } // empty argument
                     / ( newline / space )* p:template_param { return p } 
                   ) { return r } 
             )* 
@@ -734,8 +741,8 @@
 
 tplarg 
   = "{{{" 
-    name:template_param_text 
-    params:( ( space / newline )* "|" ( space / newline )* p:template_param { 
return p })* 
+    name:template_param_text?
+    params:( ( space / newline )* pipe ( space / newline )* p:template_param { 
return p })* 
     ( space / newline )* 
     "}}}" {
       name = flatten( name );
@@ -815,7 +822,7 @@
     target:wikilink_preprocessor_text
     lcontent:( 
                & { return posStack.push('lcontent' , pos); }
-               lcs:( "|" lt:link_text { return new KV( '', lt ); } )+ { 
+               lcs:( pipe lt:link_text { return new KV( '', lt ); } )+ { 
                    return { pos: posStack.pop('lcontent' , pos), content: lcs 
};
                }
             
@@ -920,7 +927,7 @@
 / & { return stops.dec( 'pipe' ); }
 
 img_option 
-  = "|" space*
+  = pipe space*
   o:(  
       img_attribute
     / img_format
@@ -1441,7 +1448,7 @@
 
 table_cell_args
   = & { return stops.inc('tableCellArg'); }
-    as:generic_attribute* space* "|" !"|" { 
+    as:generic_attribute* space* pipe !pipe { 
         stops.dec('tableCellArg');
         return as;
     }
@@ -1715,35 +1722,39 @@
 // Returns either a list of tokens, or a plain string (if nothing is to be
 // processed).
 preprocessor_text 
-  = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
-  / directive
-  / !inline_breaks text_char )+ {
+  = r:( t:[^<~[{\n\r\t|!\]}{ &=]+ { return t.join(''); }
+  / !inline_breaks (
+      directive
+    / text_char )
+  )+ {
       return flatten ( r );
   }
 
 spaceless_preprocessor_text
-  = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
-  / directive
-  / !inline_breaks !' ' text_char )+ {
+  = r:( t:[^'<~[{\n\r|!\]}{\t &=]+ { return t.join(''); }
+  / !inline_breaks (
+      directive
+    / !' ' text_char )
+  )+ {
       return flatten_string ( r );
   }
 
 
 wikilink_preprocessor_text 
-  = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
-  /// urlencoded_char
-  / directive
-  / !inline_breaks !"|" !"]]" text_char )+ {
+  = r:( t:[^<~[{\n\r\t|!\]}{ &=]+ { return t.join(''); }
+        /// urlencoded_char
+        / !inline_breaks ( directive / !"]]" text_char ) 
+    )+ {
       return flatten_stringlist ( r );
   }
 
 extlink_preprocessor_text
   // added special separator character class inline: separates url from
   // description / text
-  = r:( t:[^'<~[{\n\r|!\]}\t&="' 
\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); }
-  / directive
+  = r:( t:[^'<~[{\n\r|!\]}{\t&="' 
\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); }
+  / !inline_breaks ( directive / no_punctuation_char )
   /// urlencoded_char
-  / !inline_breaks no_punctuation_char
+  // !inline_breaks no_punctuation_char
   / s:[.:,] !(space / eolf) { return s } 
   / [&%] )+ {
       return flatten_string ( r );
@@ -1752,21 +1763,32 @@
 // Attribute values with preprocessor support
 attribute_preprocessor_text
   = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return 
ts.join(''); }
-  / directive
-  / !inline_breaks [&%] )+ {
+  / !inline_breaks ( 
+          directive
+        / !inline_breaks [&%] 
+    )
+  )+ 
+  {
       //console.warn('prep');
       return flatten_string ( r );
   }
+
 attribute_preprocessor_text_single
   = r:( t:[^{&']+ { return t.join(''); }
-  / directive
-  / !inline_breaks [{&] )* {
+  / !inline_breaks ( 
+      directive
+    / [{&] )
+  )* 
+  {
       return flatten_string ( r );
   }
 attribute_preprocessor_text_double
   = r:( t:[^{&"]+ { return t.join(''); }
-  / directive
-  / !inline_breaks [{&] )* {
+  / !inline_breaks ( 
+      directive
+    / [{&] )
+  )* 
+  {
       //console.warn( 'double:' + pp(r) );
       return flatten_string ( r );
   }
@@ -1774,21 +1796,28 @@
 // Variants with the entire attribute on a single line
 attribute_preprocessor_text_line
   = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return 
ts.join(''); }
-  / directive
-  / !inline_breaks !'\n' [&%] )+ {
+  / !inline_breaks (
+      directive
+    / !'\n' [&%] )
+  )+ {
       //console.warn('prep');
       return flatten_string ( r );
   }
+
 attribute_preprocessor_text_single_line
   = r:( t:[^{&']+ { return t.join(''); }
-  / directive
-  / !inline_breaks !'\n' [{&] )* {
+  / !inline_breaks ( 
+      directive
+    / !'\n' [{&] )
+  )* {
       return flatten_string ( r );
   }
 attribute_preprocessor_text_double_line
   = r:( t:[^{&"]+ { return t.join(''); }
-  / directive
-  / !inline_breaks !'\n' [{&] )* {
+  / !inline_breaks (
+      directive
+    / !'\n' [{&] )
+  )* {
       //console.warn( 'double:' + pp(r) );
       return flatten_string ( r );
   }


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to