Cscott has uploaded a new change for review. https://gerrit.wikimedia.org/r/61799
Change subject: WIP redirects (bug 45808). ...................................................................... WIP redirects (bug 45808). Change-Id: I1bd36f32a5e46b90261895e5499a0308875e5e05 --- M js/lib/mediawiki.WikiConfig.js M js/lib/pegTokenizer.pegjs.txt 2 files changed, 66 insertions(+), 3 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid refs/changes/99/61799/1 diff --git a/js/lib/mediawiki.WikiConfig.js b/js/lib/mediawiki.WikiConfig.js index b0fdba8..65dd518 100644 --- a/js/lib/mediawiki.WikiConfig.js +++ b/js/lib/mediawiki.WikiConfig.js @@ -6,6 +6,12 @@ Util = require( './mediawiki.Util.js' ).Util, request = require( 'request' ); +// escape 'special' characters in a regexp, returning a regexp which matches +// the string exactly +var re_escape = function(s) { + return s.replace(/[\^\\$*+?.()|{}\[\]]/g, '\\$&'); +}; + /** * @class * @@ -105,6 +111,11 @@ conf.magicWords[alias] = mw.name; conf.mwAliases[mw.name].push( alias ); } + conf.mwRegexps[mw.name] = + new RegExp( '^(' + + conf.mwAliases[mw.name].map(re_escape).join('|') + + ')$', + mw['case-sensitive'] === '' ? '' : 'i' ); } if ( mws.length > 0 ) { @@ -274,6 +285,11 @@ mwAliases: null, /** + * @property {Object/null} mwRegexp RegExp matching aliases, indexed by canonical magic word name. + */ + mwRegexps: null, + + /** * @property {Object/null} specialPages Special page names on this wiki, indexed by aliases. */ specialPages: null, @@ -318,6 +334,7 @@ this.namespaceIds = {}; this.magicWords = {}; this.mwAliases = {}; + this.mwRegexps = {}; this.specialPages = {}; this.extensionTags = {}; this.interpolatedList = []; @@ -336,8 +353,21 @@ * @param {string} alias * @returns {string} */ - getMagicWord: function ( alias ) { + getMagicWordIdFromAlias: function ( alias ) { return this.magicWords[alias] || null; + }, + + /** + * @method + * + * Get a regexp matching a localized magic word, given its id. + * + * @param {string} id + * @return {RegExp} + */ + getMagicWordMatcher: function ( id ) { + // if 'id' is not found, return a regexp which will never match. + return this.mwRegexps[id] || /[]/; }, /** @@ -369,7 +399,7 @@ if ( alias === null ) { return null; } - canonical = this.getMagicWord( alias ); + canonical = this.getMagicWordIdFromAlias( alias ); if ( canonical !== null ) { return { k: canonical, v: value, a: alias }; } diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt index d6f8ddc..475b932 100644 --- a/js/lib/pegTokenizer.pegjs.txt +++ b/js/lib/pegTokenizer.pegjs.txt @@ -315,6 +315,11 @@ // cache the input length var inputLength = input.length; + // pseudo-production that matches at start of input + var isSOF = function (pos) { + return pos === 0; + }; + // pseudo-production that matches at end of input var isEOF = function (pos) { return pos === inputLength; @@ -451,6 +456,25 @@ } /* + * Redirects can only occur as the first thing in a document. See + * WikitextContent::getRedirectTarget() + * The matcher uses a regexp with \s, which is the php_space production. + */ +redirect + = sof redirect_word php_space* ( ":" php_space* )? wl:wikilink { + // XXX do something here + return null; +} + +/* The 'redirect' magic word. + * The leading whitespace allowed is due to the PHP trim() function. + */ +redirect_word = [ \t\n\r\0\x0b]* rw:(php_nonspace+) & { + rw = rw.join(''); + return pegArgs.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw ); +} + +/* * This production exists to support tokenizing the document in chunks. * It stops tokenization after each block and yields to the node.js * event-loop to schedule other pending event handlers. @@ -538,7 +562,8 @@ * The actual contents of each block. */ block - = block_lines + = redirect // has to be first; otherwise gets parsed as a <ol> + / block_lines / & '<' r:( pre // tag variant can start anywhere / comment &eolf / nowiki @@ -2169,6 +2194,12 @@ } } +/* \s in PHP preg_* functions */ +php_space = [ \t\n\r\x0c] + +/* \S in PHP preg_* functions */ +php_nonspace = [^ \t\n\r\x0c] + // Extra newlines followed by at least another newline. Usually used to // compress surplus newlines into a meta tag, so that they don't trigger // paragraphs. @@ -2329,6 +2360,8 @@ return [inclTag].concat(inclContentToks); } +sof = & { return isSOF(pos); } { return true; } + eof = & { return isEOF(pos); } { return true; } newline = '\n' / '\r\n' -- To view, visit https://gerrit.wikimedia.org/r/61799 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1bd36f32a5e46b90261895e5499a0308875e5e05 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: Cscott <wikime...@cscott.net> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits