https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114156

Revision: 114156
Author:   van-de-bugger
Date:     2012-03-19 17:15:33 +0000 (Mon, 19 Mar 2012)
Log Message:
-----------
Fix for bug 12500, almost reviewed there.

Modified Paths:
--------------
    trunk/extensions/AntiSpoof/AntiSpoof.i18n.php
    trunk/extensions/AntiSpoof/AntiSpoof_body.php

Modified: trunk/extensions/AntiSpoof/AntiSpoof.i18n.php
===================================================================
--- trunk/extensions/AntiSpoof/AntiSpoof.i18n.php       2012-03-19 17:10:15 UTC 
(rev 114155)
+++ trunk/extensions/AntiSpoof/AntiSpoof.i18n.php       2012-03-19 17:15:33 UTC 
(rev 114156)
@@ -15,11 +15,14 @@
        'antispoof-conflict-bottom' => 'Please choose another name.',
        'antispoof-name-illegal'  => 'The name "$1" is not allowed to prevent 
confusing or spoofed usernames: $2.
 Please choose another name.',
+       'antispoof-bad-char'      => '"$1" ($2)',
+       'antispoof-bad-char-non-printable' => '$1',
        'antispoof-badtype'       => 'Bad data type',
        'antispoof-empty'         => 'Empty string',
-       'antispoof-blacklisted'   => 'Contains blacklisted character',
-       'antispoof-combining'     => 'Begins with combining mark',
-       'antispoof-unassigned'    => 'Contains unassigned or deprecated 
character',
+       'antispoof-blacklisted'   => 'Contains blacklisted character $1',
+       'antispoof-combining'     => 'Begins with combining mark $1',
+       'antispoof-unassigned'    => 'Contains unassigned character $1',
+       'antispoof-deprecated'    => 'Contains deprecated character $1',
        'antispoof-noletters'     => 'Does not contain any letters',
        'antispoof-mixedscripts'  => 'Contains incompatible mixed scripts',
        'antispoof-tooshort'      => 'Canonicalized name too short',
@@ -43,11 +46,21 @@
        'antispoof-name-illegal' => 'Account creation error message because a 
user account creation rule was violated. Parameters:
 * $1 is the username that someone wanted to create
 * $2 is the error message. One of {{msg-mw|antispoof-badtype}}, 
{{msg-mw|antispoof-empty}}, {{msg-mw|antispoof-blacklisted}} and others.',
+       'antispoof-bad-char'      => 'It is not a complete message but a 
template for designator of a bad character, so localization can format it 
properly. Parameters:
+* $1 is the bad character itself.
+* $2 is the Unicode code point of bad character ("U+" followed by hex 
number).',
+       'antispoof-bad-char-non-printable' => 'The same as antispooof-bad-char, 
but for non-printable characters. Since non-printable characters do not have 
visual representation, template has only one parameter:
+* $1 is the Unicode code point of bad character ("U+" followed by hex 
number).',
        'antispoof-badtype' => 'Reason for failed account creation.',
        'antispoof-empty' => 'Reason for failed account creation.',
-       'antispoof-blacklisted' => 'Reason for failed account creation.',
-       'antispoof-combining' => 'Reason for failed account creation.',
-       'antispoof-unassigned' => 'Reason for failed account creation.',
+       'antispoof-blacklisted' => 'Reason for failed account creation. 
Parameters:
+* $1 — bad character designator (built with either antispoof-bad-char or 
…-non-printable).',
+       'antispoof-combining' => 'Reason for failed account creation. 
Parameters:
+* $1 — bad character designator (built with either antispoof-bad-char or 
…-non-printable).',
+       'antispoof-unassigned' => 'Reason for failed account creation. 
Parameters:
+* $1 — bad character designator (built with either antispoof-bad-char or 
…-non-printable).',
+       'antispoof-deprecated' => 'Reason for failed account creation. 
Parameters:
+* $1 — bad character designator (built with either antispoof-bad-char or 
…-non-printable).',
        'antispoof-noletters' => 'Reason for failed account creation.',
        'antispoof-mixedscripts' => 'Reason for failed account creation.',
        'antispoof-tooshort' => 'Reason for failed account creation.',

Modified: trunk/extensions/AntiSpoof/AntiSpoof_body.php
===================================================================
--- trunk/extensions/AntiSpoof/AntiSpoof_body.php       2012-03-19 17:10:15 UTC 
(rev 114155)
+++ trunk/extensions/AntiSpoof/AntiSpoof_body.php       2012-03-19 17:15:33 UTC 
(rev 114156)
@@ -306,6 +306,29 @@
                return $out;
        }
 
+       /*
+        * Helper function for checkUnicodeString: Return an error on a bad 
character.
+        * TODO: I would like to show Unicode character name, but it is not 
clear how to get it.
+        * @param $msgId -- string, message identifier.
+        * @param $point -- number, codepoint of the bad character.
+        * @return Formatted error message.
+        */
+       private static function badCharErr( $msgId, $point ) {
+               $symbol = codepointToUtf8( $point );
+               // Combining marks are combined with the previous character. If 
abusing character is a
+               // combining mark, prepend it with space to show them correctly.
+               if ( self::getScriptCode( $point ) == "SCRIPT_COMBINING_MARKS" 
) {
+                       $symbol = ' ' . $symbol;
+               }
+               $code = sprintf( 'U+%04X', $point );
+               if ( preg_match( '/\A\p{C}\z/u', $symbol ) ) {
+                       $char = wfMsg( 'antispoof-bad-char-non-printable', 
$code );
+               } else {
+                       $char = wfMsg( 'antispoof-bad-char', $symbol, $code );
+               }
+               return array( "ERROR", wfMsg( $msgId, $char ) );
+       }
+
        /**
         * TODO: does too much in one routine, refactor...
         * @param $testName
@@ -321,8 +344,10 @@
                        return array( "ERROR", wfMsg( 'antispoof-empty' ) );
                }
 
-               if ( array_intersect( self::stringToList( $testName ), 
self::$character_blacklist ) ) {
-                       return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) 
);
+               foreach ( self::stringToList( $testName ) as $char ) {
+                       if ( in_array( $char, self::$character_blacklist ) ) {
+                               return self::badCharErr( 
'antispoof-blacklisted', $char );
+                       }
                }
 
                # Perform Unicode _compatibility_ decomposition
@@ -330,23 +355,31 @@
                $testChars = self::stringToList( $testName );
 
                # Be paranoid: check again, just in case Unicode normalization 
code changes...
-               if ( array_intersect( $testChars, self::$character_blacklist ) 
) {
-                       return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) 
);
+               foreach ( $testChars as $char ) {
+                       if ( in_array( $char, self::$character_blacklist ) ) {
+                               return self::badCharErr( 
'antispoof-blacklisted', $char );
+                       }
                }
 
                # Check for this: should not happen in any valid Unicode string
                if ( self::getScriptCode( $testChars[0] ) == 
"SCRIPT_COMBINING_MARKS" ) {
-                       return array( "ERROR", wfMsg( 'antispoof-combining' ) );
+                       return self::badCharErr( 'antispoof-combining', 
$testChars[0] );
                }
 
                # Strip all combining characters in order to crudely strip 
accents
                # Note: NFKD normalization should have decomposed all accented 
chars earlier
                $testChars = self::stripScript( $testChars, 
"SCRIPT_COMBINING_MARKS" );
 
-               $testScripts = array_unique( array_map( array( 'AntiSpoof', 
'getScriptCode' ), $testChars ) );
-               if ( in_array( "SCRIPT_UNASSIGNED", $testScripts ) || in_array( 
"SCRIPT_DEPRECATED", $testScripts ) ) {
-                       return array( "ERROR", wfMsg( 'antispoof-unassigned' ) 
);
+               $testScripts = array_map( array( 'AntiSpoof', 'getScriptCode' 
), $testChars );
+               $unassigned = array_search( "SCRIPT_UNASSIGNED", $testScripts );
+               if ( $unassigned !== False ) {
+                       return self::badCharErr( 'antispoof-unassigned', 
$testChars[$unassigned] );
                }
+               $deprecated = array_search( "SCRIPT_DEPRECTED", $testScripts );
+               if ( $deprecated !== False ) {
+                       return self::badCharErr( 'antispoof-deprecated', 
$testChars[$deprecated] );
+               }
+               $testScripts = array_unique( $testScripts );
 
                # We don't mind ASCII punctuation or digits
                $testScripts = array_diff( $testScripts,


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to