Anomie has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/97317


Change subject: Allow 'noprotocol' option in captcha-addurl-whitelist
......................................................................

Allow 'noprotocol' option in captcha-addurl-whitelist

ConfirmEdit triggers a captcha when users without the appropriate
userright add new external links. But sometimes there are links that
these users should be able to add without a captcha, e.g. other wikis in
the same farm, so MediaWiki:Captcha-addurl-whitelist allows wiki admins
to whitelist links matching regular expressions.

However, these whitelist entries are automatically prefixed with
"^https?:\/\/+[a-z0-9_\-.]*". While this is usually what is wanted, it
prevents wiki admins from whitelisting other types of links such as tel:
or urn:. Following the example of TitleBlacklist, this change adds
options delimited with <> to the whitelist lines, and adds an option
'noprotocol' that causes that line to be prefixed with only "^" instead.

Bug: 56485
Change-Id: Ia0d484f7a4670df2c7121d1284e4fcf969e818c3
---
M Captcha.php
1 file changed, 62 insertions(+), 25 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ConfirmEdit 
refs/changes/17/97317/1

diff --git a/Captcha.php b/Captcha.php
index 3be6d91..b15de61 100644
--- a/Captcha.php
+++ b/Captcha.php
@@ -355,26 +355,38 @@
         */
        function filterLink( $url ) {
                global $wgCaptchaWhitelist;
-               $source = wfMessage( 'captcha-addurl-whitelist' 
)->inContentLanguage()->text();
+               static $regexes = null;
 
-               $whitelist = wfMessage( 'captcha-addurl-whitelist', $source 
)->isDisabled()
-                       ? false
-                       : $this->buildRegexes( explode( "\n", $source ) );
+               if ( $regexes === null ) {
+                       $source = wfMessage( 'captcha-addurl-whitelist' 
)->inContentLanguage();
 
-               $cwl = $wgCaptchaWhitelist !== false ? preg_match( 
$wgCaptchaWhitelist, $url ) : false;
-               $wl  = $whitelist          !== false ? preg_match( $whitelist, 
$url )          : false;
+                       $regexes = $source->isDisabled()
+                               ? array()
+                               : $this->buildRegexes( explode( "\n", 
$source->plain() ) );
 
-               return !( $cwl || $wl );
+                       if ( $wgCaptchaWhitelist !== false ) {
+                               array_unshift( $regexes, $wgCaptchaWhitelist );
+                       }
+               }
+
+               foreach ( $regexes as $regex ) {
+                       if ( preg_match( $regex, $url ) ) {
+                               return false;
+                       }
+               }
+
+               return true;
        }
 
        /**
         * Build regex from whitelist
         * @param $lines string from [[MediaWiki:Captcha-addurl-whitelist]]
-        * @return string Regex or bool false if whitelist is empty
+        * @return array Regexes
         * @access private
         */
        function buildRegexes( $lines ) {
                # Code duplicated from the SpamBlacklist extension (r19197)
+               # and later modified.
 
                # Strip comments and whitespace, then remove blanks
                $lines = array_filter( array_map( 'trim', preg_replace( 
'/#.*$/', '', $lines ) ) );
@@ -382,34 +394,59 @@
                # No lines, don't make a regex which will match everything
                if ( count( $lines ) == 0 ) {
                        wfDebug( "No lines\n" );
-                       return false;
+                       return array();
                } else {
                        # Make regex
                        # It's faster using the S modifier even though it will 
usually only be run once
                        // $regex = 'http://+[a-z0-9_\-.]*(' . implode( '|', 
$lines ) . ')';
                        // return '/' . str_replace( '/', '\/', 
preg_replace('|\\\*/|', '/', $regex) ) . '/Si';
-                       $regexes = '';
-                       $regexStart = '/^https?:\/\/+[a-z0-9_\-.]*(';
-                       $regexEnd = ')/Si';
+                       $regexes = array();
+                       $regexStart = array(
+                               'normal' => '/^https?:\/\/+[a-z0-9_\-.]*(?:',
+                               'noprotocol' => '/^(?:',
+                       );
+                       $regexEnd = array(
+                               'normal' => ')/Si',
+                               'noprotocol' => ')/Si',
+                       );
                        $regexMax = 4096;
-                       $build = false;
+                       $build = array();
                        foreach ( $lines as $line ) {
+                               # Extract flags from the line
+                               $options = array();
+                               if ( preg_match( '/^(.*?)\s*<([^<>]*)>$/', 
$line, $matches ) ) {
+                                       if ( $matches[1] === '' ) {
+                                               wfDebug( "Line with empty 
regex\n" );
+                                               continue;
+                                       }
+                                       $line = $matches[1];
+                                       $opts = preg_split( '/\s*\|\s*/', trim( 
$matches[2] ) );
+                                       foreach ( $opts as $opt ) {
+                                               $opt = strtolower( $opt );
+                                               if ( $opt == 'noprotocol' ) {
+                                                       $options['noprotocol'] 
= true;
+                                               }
+                                       }
+                               }
+
+                               $key = isset( $options['noprotocol'] ) ? 
'noprotocol' : 'normal';
+
                                // FIXME: not very robust size check, but 
should work. :)
-                               if ( $build === false ) {
-                                       $build = $line;
-                               } elseif ( strlen( $build ) + strlen( $line ) > 
$regexMax ) {
-                                       $regexes .= $regexStart .
-                                               str_replace( '/', '\/', 
preg_replace( '|\\\*/|', '/', $build ) ) .
-                                               $regexEnd;
-                                       $build = $line;
+                               if ( !isset( $build[$key] ) ) {
+                                       $build[$key] = $line;
+                               } elseif ( strlen( $build[$key] ) + strlen( 
$line ) > $regexMax ) {
+                                       $regexes[] = $regexStart[$key] .
+                                               str_replace( '/', '\/', 
preg_replace( '|\\\*/|', '/', $build[$key] ) ) .
+                                               $regexEnd[$key];
+                                       $build[$key] = $line;
                                } else {
-                                       $build .= '|' . $line;
+                                       $build[$key] .= '|' . $line;
                                }
                        }
-                       if ( $build !== false ) {
-                               $regexes .= $regexStart .
-                                       str_replace( '/', '\/', preg_replace( 
'|\\\*/|', '/', $build ) ) .
-                                       $regexEnd;
+                       foreach ( $build as $key => $value ) {
+                               $regexes[] = $regexStart[$key] .
+                                       str_replace( '/', '\/', preg_replace( 
'|\\\*/|', '/', $build[$key] ) ) .
+                                       $regexEnd[$key];
                        }
                        return $regexes;
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/97317
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia0d484f7a4670df2c7121d1284e4fcf969e818c3
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ConfirmEdit
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to