https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113639
Revision: 113639
Author: gwicke
Date: 2012-03-12 17:31:45 +0000 (Mon, 12 Mar 2012)
Log Message:
-----------
Improved template tokenization. The parser can now template-expand
[[:en:Barack Obama]] without exceeding 1.7GB of memory (which is the node
limit).
Modified Paths:
--------------
trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
Modified:
trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
2012-03-12 17:31:45 UTC (rev 113639)
@@ -429,7 +429,7 @@
return [ "MediaWiki" ];
};
ParserFunctions.prototype['pf_anchorencode'] = function ( target, argList,
argDict ) {
- return [target];
+ return [ target.trim() ];
};
ParserFunctions.prototype['pf_protectionlevel'] = function ( target, argList,
argDict ) {
return [''];
Modified:
trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
2012-03-12 17:31:45 UTC (rev 113639)
@@ -168,9 +168,9 @@
var prefix = target.split(':', 1)[0].toLowerCase().trim();
if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
var funcArg = target.substr( prefix.length + 1 );
- this.manager.env.tp( 'func prefix: ', prefix,
- ' args=', tplExpandData.expandedArgs,
- ' funcArg=', funcArg);
+ this.manager.env.tp( 'func prefix/args: ', prefix,
+ tplExpandData.expandedArgs,
+ 'funcArg:', funcArg);
//this.manager.env.dp( 'entering prefix', funcArg, args );
res = this.parserFunctions[ 'pf_' + prefix ]( funcArg,
tplExpandData.expandedArgs, args,
tplExpandData.origToken.attribs );
Modified:
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
===================================================================
---
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
2012-03-12 17:27:47 UTC (rev 113638)
+++
trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
2012-03-12 17:31:45 UTC (rev 113639)
@@ -177,8 +177,12 @@
forceNS = '';
}
+
name = name.trim().replace(/[\s_]+/g, '_');
+ // XXX: strip subst for now..
+ name = name.replace( /^subst:/, '' );
+
// Implement int: as alias for MediaWiki:
if ( name.substr( 0, 4 ) === 'int:' ) {
name = 'MediaWiki:' + name.substr( 4 );
Modified:
trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
2012-03-12 17:31:45 UTC (rev 113639)
@@ -119,6 +119,12 @@
counters.tableCellArg
)
) || null;
+ case '{':
+ return (
+ counters.pipe ||
+ counters.template
+ ) && input.substr( pos, 5 ) === '{{!}}'
+ || null;
case "!":
return counters.table && input[pos + 1] === "!" ||
null;
Modified: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
===================================================================
--- trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
2012-03-12 17:27:47 UTC (rev 113638)
+++ trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
2012-03-12 17:31:45 UTC (rev 113639)
@@ -505,7 +505,7 @@
* in nested inline productions.
*/
inline_breaks
- = & [=|!}:\r\n\]<]
+ = & [=|!}{:\r\n\]<]
& { // Important hack: disable caching for this production, as the default
// cache key does not take into account flag states!
cacheKey = '';
@@ -515,7 +515,7 @@
inline
= c:(urltext / (! inline_breaks (inline_element / . )))+ {
- //console.warn('inline out:' + pp(out));
+ //console.warn('inline out:' + pp(c));
return flatten_stringlist( c );
}
@@ -708,12 +708,19 @@
* 6: {{{{{{·}}}}}} → {{{·{{{·}}}·}}}
* 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·}
*/
-tplarg_or_template = & '{{{{{' template / tplarg / template
+tplarg_or_template
+ =
+ ! '{{{{{{{' (
+ &'{{{{{{' tplarg
+ / & '{{{{{' template
+ / tplarg
+ / template
+ )
template
= "{{" (newline / space)* target:template_param_text
- params:(( newline / space )* "|"
- r:( &"|" { return new KV( '', '') } // empty argument
+ params:(( newline / space )* pipe
+ r:( &pipe { return new KV( '', '') } // empty argument
/ ( newline / space )* p:template_param { return p }
) { return r }
)*
@@ -734,8 +741,8 @@
tplarg
= "{{{"
- name:template_param_text
- params:( ( space / newline )* "|" ( space / newline )* p:template_param {
return p })*
+ name:template_param_text?
+ params:( ( space / newline )* pipe ( space / newline )* p:template_param {
return p })*
( space / newline )*
"}}}" {
name = flatten( name );
@@ -815,7 +822,7 @@
target:wikilink_preprocessor_text
lcontent:(
& { return posStack.push('lcontent' , pos); }
- lcs:( "|" lt:link_text { return new KV( '', lt ); } )+ {
+ lcs:( pipe lt:link_text { return new KV( '', lt ); } )+ {
return { pos: posStack.pop('lcontent' , pos), content: lcs
};
}
@@ -920,7 +927,7 @@
/ & { return stops.dec( 'pipe' ); }
img_option
- = "|" space*
+ = pipe space*
o:(
img_attribute
/ img_format
@@ -1441,7 +1448,7 @@
table_cell_args
= & { return stops.inc('tableCellArg'); }
- as:generic_attribute* space* "|" !"|" {
+ as:generic_attribute* space* pipe !pipe {
stops.dec('tableCellArg');
return as;
}
@@ -1715,35 +1722,39 @@
// Returns either a list of tokens, or a plain string (if nothing is to be
// processed).
preprocessor_text
- = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
- / directive
- / !inline_breaks text_char )+ {
+ = r:( t:[^<~[{\n\r\t|!\]}{ &=]+ { return t.join(''); }
+ / !inline_breaks (
+ directive
+ / text_char )
+ )+ {
return flatten ( r );
}
spaceless_preprocessor_text
- = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
- / directive
- / !inline_breaks !' ' text_char )+ {
+ = r:( t:[^'<~[{\n\r|!\]}{\t &=]+ { return t.join(''); }
+ / !inline_breaks (
+ directive
+ / !' ' text_char )
+ )+ {
return flatten_string ( r );
}
wikilink_preprocessor_text
- = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
- /// urlencoded_char
- / directive
- / !inline_breaks !"|" !"]]" text_char )+ {
+ = r:( t:[^<~[{\n\r\t|!\]}{ &=]+ { return t.join(''); }
+ /// urlencoded_char
+ / !inline_breaks ( directive / !"]]" text_char )
+ )+ {
return flatten_stringlist ( r );
}
extlink_preprocessor_text
// added special separator character class inline: separates url from
// description / text
- = r:( t:[^'<~[{\n\r|!\]}\t&="'
\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); }
- / directive
+ = r:( t:[^'<~[{\n\r|!\]}{\t&="'
\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); }
+ / !inline_breaks ( directive / no_punctuation_char )
/// urlencoded_char
- / !inline_breaks no_punctuation_char
+ // !inline_breaks no_punctuation_char
/ s:[.:,] !(space / eolf) { return s }
/ [&%] )+ {
return flatten_string ( r );
@@ -1752,21 +1763,32 @@
// Attribute values with preprocessor support
attribute_preprocessor_text
= r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return
ts.join(''); }
- / directive
- / !inline_breaks [&%] )+ {
+ / !inline_breaks (
+ directive
+ / !inline_breaks [&%]
+ )
+ )+
+ {
//console.warn('prep');
return flatten_string ( r );
}
+
attribute_preprocessor_text_single
= r:( t:[^{&']+ { return t.join(''); }
- / directive
- / !inline_breaks [{&] )* {
+ / !inline_breaks (
+ directive
+ / [{&] )
+ )*
+ {
return flatten_string ( r );
}
attribute_preprocessor_text_double
= r:( t:[^{&"]+ { return t.join(''); }
- / directive
- / !inline_breaks [{&] )* {
+ / !inline_breaks (
+ directive
+ / [{&] )
+ )*
+ {
//console.warn( 'double:' + pp(r) );
return flatten_string ( r );
}
@@ -1774,21 +1796,28 @@
// Variants with the entire attribute on a single line
attribute_preprocessor_text_line
= r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return
ts.join(''); }
- / directive
- / !inline_breaks !'\n' [&%] )+ {
+ / !inline_breaks (
+ directive
+ / !'\n' [&%] )
+ )+ {
//console.warn('prep');
return flatten_string ( r );
}
+
attribute_preprocessor_text_single_line
= r:( t:[^{&']+ { return t.join(''); }
- / directive
- / !inline_breaks !'\n' [{&] )* {
+ / !inline_breaks (
+ directive
+ / !'\n' [{&] )
+ )* {
return flatten_string ( r );
}
attribute_preprocessor_text_double_line
= r:( t:[^{&"]+ { return t.join(''); }
- / directive
- / !inline_breaks !'\n' [{&] )* {
+ / !inline_breaks (
+ directive
+ / !'\n' [{&] )
+ )* {
//console.warn( 'double:' + pp(r) );
return flatten_string ( r );
}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs