Cscott has uploaded a new change for review.
https://gerrit.wikimedia.org/r/61799
Change subject: WIP redirects (bug 45808).
......................................................................
WIP redirects (bug 45808).
Change-Id: I1bd36f32a5e46b90261895e5499a0308875e5e05
---
M js/lib/mediawiki.WikiConfig.js
M js/lib/pegTokenizer.pegjs.txt
2 files changed, 66 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/99/61799/1
diff --git a/js/lib/mediawiki.WikiConfig.js b/js/lib/mediawiki.WikiConfig.js
index b0fdba8..65dd518 100644
--- a/js/lib/mediawiki.WikiConfig.js
+++ b/js/lib/mediawiki.WikiConfig.js
@@ -6,6 +6,12 @@
Util = require( './mediawiki.Util.js' ).Util,
request = require( 'request' );
+// escape 'special' characters in a regexp, returning a regexp which matches
+// the string exactly
+var re_escape = function(s) {
+ return s.replace(/[\^\\$*+?.()|{}\[\]]/g, '\\$&');
+};
+
/**
* @class
*
@@ -105,6 +111,11 @@
conf.magicWords[alias] = mw.name;
conf.mwAliases[mw.name].push( alias );
}
+ conf.mwRegexps[mw.name] =
+ new RegExp( '^(' +
+
conf.mwAliases[mw.name].map(re_escape).join('|') +
+ ')$',
+ mw['case-sensitive'] === '' ?
'' : 'i' );
}
if ( mws.length > 0 ) {
@@ -274,6 +285,11 @@
mwAliases: null,
/**
+ * @property {Object/null} mwRegexp RegExp matching aliases, indexed by
canonical magic word name.
+ */
+ mwRegexps: null,
+
+ /**
* @property {Object/null} specialPages Special page names on this
wiki, indexed by aliases.
*/
specialPages: null,
@@ -318,6 +334,7 @@
this.namespaceIds = {};
this.magicWords = {};
this.mwAliases = {};
+ this.mwRegexps = {};
this.specialPages = {};
this.extensionTags = {};
this.interpolatedList = [];
@@ -336,8 +353,21 @@
* @param {string} alias
* @returns {string}
*/
- getMagicWord: function ( alias ) {
+ getMagicWordIdFromAlias: function ( alias ) {
return this.magicWords[alias] || null;
+ },
+
+ /**
+ * @method
+ *
+ * Get a regexp matching a localized magic word, given its id.
+ *
+ * @param {string} id
+ * @return {RegExp}
+ */
+ getMagicWordMatcher: function ( id ) {
+ // if 'id' is not found, return a regexp which will never match.
+ return this.mwRegexps[id] || /[]/;
},
/**
@@ -369,7 +399,7 @@
if ( alias === null ) {
return null;
}
- canonical = this.getMagicWord( alias );
+ canonical = this.getMagicWordIdFromAlias( alias
);
if ( canonical !== null ) {
return { k: canonical, v: value, a:
alias };
}
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index d6f8ddc..475b932 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -315,6 +315,11 @@
// cache the input length
var inputLength = input.length;
+ // pseudo-production that matches at start of input
+ var isSOF = function (pos) {
+ return pos === 0;
+ };
+
// pseudo-production that matches at end of input
var isEOF = function (pos) {
return pos === inputLength;
@@ -451,6 +456,25 @@
}
/*
+ * Redirects can only occur as the first thing in a document. See
+ * WikitextContent::getRedirectTarget()
+ * The matcher uses a regexp with \s, which is the php_space production.
+ */
+redirect
+ = sof redirect_word php_space* ( ":" php_space* )? wl:wikilink {
+ // XXX do something here
+ return null;
+}
+
+/* The 'redirect' magic word.
+ * The leading whitespace allowed is due to the PHP trim() function.
+ */
+redirect_word = [ \t\n\r\0\x0b]* rw:(php_nonspace+) & {
+ rw = rw.join('');
+ return pegArgs.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw );
+}
+
+/*
* This production exists to support tokenizing the document in chunks.
* It stops tokenization after each block and yields to the node.js
* event-loop to schedule other pending event handlers.
@@ -538,7 +562,8 @@
* The actual contents of each block.
*/
block
- = block_lines
+ = redirect // has to be first; otherwise gets parsed as a <ol>
+ / block_lines
/ & '<' r:( pre // tag variant can start anywhere
/ comment &eolf
/ nowiki
@@ -2169,6 +2194,12 @@
}
}
+/* \s in PHP preg_* functions */
+php_space = [ \t\n\r\x0c]
+
+/* \S in PHP preg_* functions */
+php_nonspace = [^ \t\n\r\x0c]
+
// Extra newlines followed by at least another newline. Usually used to
// compress surplus newlines into a meta tag, so that they don't trigger
// paragraphs.
@@ -2329,6 +2360,8 @@
return [inclTag].concat(inclContentToks);
}
+sof = & { return isSOF(pos); } { return true; }
+
eof = & { return isEOF(pos); } { return true; }
newline = '\n' / '\r\n'
--
To view, visit https://gerrit.wikimedia.org/r/61799
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1bd36f32a5e46b90261895e5499a0308875e5e05
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits