http://www.mediawiki.org/wiki/Special:Code/MediaWiki/70126

Revision: 70126
Author:   mah
Date:     2010-07-29 02:44:23 +0000 (Thu, 29 Jul 2010)

Log Message:
-----------
Add detection for unicode normalization.  Next step: use what we find! :)
I think I want to point to an as-yet-to-be-created page on MediaWiki.org to 
help people understand what to do if they're stuck with pure PHP normalization, 
but any pointers here would help.

Modified Paths:
--------------
    trunk/phase3/includes/installer/Installer.i18n.php
    trunk/phase3/includes/installer/Installer.php

Modified: trunk/phase3/includes/installer/Installer.i18n.php
===================================================================
--- trunk/phase3/includes/installer/Installer.i18n.php  2010-07-29 00:08:05 UTC 
(rev 70125)
+++ trunk/phase3/includes/installer/Installer.i18n.php  2010-07-29 02:44:23 UTC 
(rev 70126)
@@ -79,6 +79,10 @@
        'config-env-latest-old'           => "'''Warning:''' You are installing 
an outdated version of Mediawiki.",
        'config-env-latest-help'          => 'You are installing version $1, 
but the latest version is $2.
 You are advised to use the latest release, which can be downloaded from 
[http://www.mediawiki.org/wiki/Download mediawiki.org]',
+       'config-unicode-php'              => "Using pure PHP to normalize 
Unicode characters.",
+       'config-unicode-pure-php-warning' => "'''Warning''': Either the PECL 
Intl extension is not available, or it uses an older version of 
[http://site.icu-project.org/ the ICU project's] library for handling Unicode 
normalization.  If you run a high-traffic site, you should read a little on 
[http://www.mediawiki.org/wiki/Unicode_normalization_considerations Unicode 
normalization].",
+       'config-unicode-utf8'             => "Using Brion Vibber's 
utf8_normalize.so for UTF",
+       'config-unicode-intl'             => "Using the 
[http://pecl.php.net/intl intl PECL extension] for UTF-8 normalization.",
        'config-no-db'                    => 'Could not find a suitable 
database driver!',
        'config-no-db-help'               => 'You need to install a database 
driver for PHP.
 The following database types are supported: $1.

Modified: trunk/phase3/includes/installer/Installer.php
===================================================================
--- trunk/phase3/includes/installer/Installer.php       2010-07-29 00:08:05 UTC 
(rev 70125)
+++ trunk/phase3/includes/installer/Installer.php       2010-07-29 02:44:23 UTC 
(rev 70126)
@@ -88,6 +88,7 @@
                'envCheckExtension',
                'envCheckShellLocale',
                'envCheckUploadsDirectory',
+               'envCheckLibicu'
        );      
        
        /**
@@ -812,6 +813,69 @@
        }       
        
        /**
+        * Convert a hex string representing a Unicode code point to that code 
point.
+        * @param string $c
+        * @return string
+        */
+       protected function unicodeChar( $c ) {
+               $c = hexdec($c);
+               if ($c <= 0x7F) {
+                       return chr($c);
+               } else if ($c <= 0x7FF) {
+                       return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
+               } else if ($c <= 0xFFFF) {
+                       return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
+                               . chr(0x80 | $c & 0x3F);
+               } else if ($c <= 0x10FFFF) {
+                       return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 
0x3F)
+                               . chr(0x80 | $c >> 6 & 0x3F)
+                               . chr(0x80 | $c & 0x3F);
+               } else {
+                       return false;
+               }
+       }
+
+
+       /**
+        * Check the libicu version
+        */
+       public function envCheckLibicu() {
+               $utf8 = function_exists( 'utf8_normalize' );
+               $intl = function_exists( 'normalizer_normalize' );
+
+               /**
+                * This needs to be updated something that the latest libicu
+                * will properly normalize.  This normalization was found at
+                * 
http://www.unicode.org/versions/Unicode5.2.0/#Character_Additions
+                * Note that we use the hex representation to create the code
+                * points in order to avoid any Unicode-destroying during 
transite.
+                */
+               $not_normal_c = $this->unicodeChar("FA6C");
+               $normal_c = $this->unicodeChar("242EE");
+
+               $useNormalizer = 'config-unicode-php';
+
+               /**
+                * We're going to prefer the pecl extension here unless
+                * utf8_normalize is more up to date.
+                */
+               if( $utf8 ) {
+                       $utf8 = utf8_normalize( $not_normal_c, UNORM_NFC );
+                       $useNormalizer = 'config-unicode-utf8';
+               }
+               if( $intl ) {
+                       $intl = normalizer_normalize( $not_normal_c, 
Normalizer::FORM_C );
+                       $useNormalizer = 'config-unicode-intl';
+               }
+
+               $this->showMessage( $useNormalizer );
+               if( $useNormalizer === 'config-unicode-php' ) {
+                       $this->showMessage( 'config-unicode-pure-php-warning' );
+               }
+       }
+
+
+       /**
         * Search a path for any of the given executable names. Returns the
         * executable name if found. Also checks the version string returned
         * by each executable.



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to