Cscott has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/61799


Change subject: WIP redirects (bug 45808).
......................................................................

WIP redirects (bug 45808).

Change-Id: I1bd36f32a5e46b90261895e5499a0308875e5e05
---
M js/lib/mediawiki.WikiConfig.js
M js/lib/pegTokenizer.pegjs.txt
2 files changed, 66 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/99/61799/1

diff --git a/js/lib/mediawiki.WikiConfig.js b/js/lib/mediawiki.WikiConfig.js
index b0fdba8..65dd518 100644
--- a/js/lib/mediawiki.WikiConfig.js
+++ b/js/lib/mediawiki.WikiConfig.js
@@ -6,6 +6,12 @@
        Util = require( './mediawiki.Util.js' ).Util,
        request = require( 'request' );
 
+// escape 'special' characters in a regexp, returning a regexp which matches
+// the string exactly
+var re_escape = function(s) {
+       return s.replace(/[\^\\$*+?.()|{}\[\]]/g, '\\$&');
+};
+
 /**
  * @class
  *
@@ -105,6 +111,11 @@
                        conf.magicWords[alias] = mw.name;
                        conf.mwAliases[mw.name].push( alias );
                }
+               conf.mwRegexps[mw.name] =
+                       new RegExp( '^(' +
+                                               
conf.mwAliases[mw.name].map(re_escape).join('|') +
+                                               ')$',
+                                               mw['case-sensitive'] === '' ? 
'' : 'i' );
        }
 
        if ( mws.length > 0 ) {
@@ -274,6 +285,11 @@
        mwAliases: null,
 
        /**
+        * @property {Object/null} mwRegexp RegExp matching aliases, indexed by 
canonical magic word name.
+        */
+       mwRegexps: null,
+
+       /**
         * @property {Object/null} specialPages Special page names on this 
wiki, indexed by aliases.
         */
        specialPages: null,
@@ -318,6 +334,7 @@
                this.namespaceIds = {};
                this.magicWords = {};
                this.mwAliases = {};
+               this.mwRegexps = {};
                this.specialPages = {};
                this.extensionTags = {};
                this.interpolatedList = [];
@@ -336,8 +353,21 @@
         * @param {string} alias
         * @returns {string}
         */
-       getMagicWord: function ( alias ) {
+       getMagicWordIdFromAlias: function ( alias ) {
                return this.magicWords[alias] || null;
+       },
+
+       /**
+        * @method
+        *
+        * Get a regexp matching a localized magic word, given its id.
+        *
+        * @param {string} id
+        * @return {RegExp}
+        */
+       getMagicWordMatcher: function ( id ) {
+               // if 'id' is not found, return a regexp which will never match.
+               return this.mwRegexps[id] || /[]/;
        },
 
        /**
@@ -369,7 +399,7 @@
                                if ( alias === null ) {
                                        return null;
                                }
-                               canonical = this.getMagicWord( alias );
+                               canonical = this.getMagicWordIdFromAlias( alias 
);
                                if ( canonical !== null ) {
                                        return { k: canonical, v: value, a: 
alias };
                                }
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index d6f8ddc..475b932 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -315,6 +315,11 @@
     // cache the input length
     var inputLength = input.length;
 
+    // pseudo-production that matches at start of input
+    var isSOF = function (pos) {
+        return pos === 0;
+    };
+
     // pseudo-production that matches at end of input
     var isEOF = function (pos) {
         return pos === inputLength;
@@ -451,6 +456,25 @@
   }
 
 /*
+ * Redirects can only occur as the first thing in a document.  See
+ * WikitextContent::getRedirectTarget()
+ * The matcher uses a regexp with \s, which is the php_space production.
+ */
+redirect
+  = sof redirect_word php_space* ( ":" php_space* )? wl:wikilink {
+    // XXX do something here
+    return null;
+}
+
+/* The 'redirect' magic word.
+ * The leading whitespace allowed is due to the PHP trim() function.
+ */
+redirect_word = [ \t\n\r\0\x0b]* rw:(php_nonspace+) & {
+    rw = rw.join('');
+    return pegArgs.env.conf.wiki.getMagicWordMatcher( 'redirect' ).test( rw );
+}
+
+/*
  * This production exists to support tokenizing the document in chunks.
  * It stops tokenization after each block and yields to the node.js
  * event-loop to schedule other pending event handlers.
@@ -538,7 +562,8 @@
  * The actual contents of each block.
  */
 block
-  = block_lines
+  = redirect // has to be first; otherwise gets parsed as a <ol>
+    / block_lines
     / & '<' r:( pre // tag variant can start anywhere
             / comment &eolf
             / nowiki
@@ -2169,6 +2194,12 @@
       }
   }
 
+/* \s in PHP preg_* functions */
+php_space = [ \t\n\r\x0c]
+
+/* \S in PHP preg_* functions */
+php_nonspace = [^ \t\n\r\x0c]
+
 // Extra newlines followed by at least another newline. Usually used to
 // compress surplus newlines into a meta tag, so that they don't trigger
 // paragraphs.
@@ -2329,6 +2360,8 @@
       return [inclTag].concat(inclContentToks);
   }
 
+sof = & { return isSOF(pos); } { return true; }
+
 eof = & { return isEOF(pos); } { return true; }
 
 newline = '\n' / '\r\n'

-- 
To view, visit https://gerrit.wikimedia.org/r/61799
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1bd36f32a5e46b90261895e5499a0308875e5e05
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <wikime...@cscott.net>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to