http://www.mediawiki.org/wiki/Special:Code/MediaWiki/70126
Revision: 70126
Author: mah
Date: 2010-07-29 02:44:23 +0000 (Thu, 29 Jul 2010)
Log Message:
-----------
Add detection for unicode normalization. Next step: use what we find! :)
I think I want to point to an as-yet-to-be-created page on MediaWiki.org to
help people understand what to do if they're stuck with pure PHP normalization,
but any pointers here would help.
Modified Paths:
--------------
trunk/phase3/includes/installer/Installer.i18n.php
trunk/phase3/includes/installer/Installer.php
Modified: trunk/phase3/includes/installer/Installer.i18n.php
===================================================================
--- trunk/phase3/includes/installer/Installer.i18n.php 2010-07-29 00:08:05 UTC
(rev 70125)
+++ trunk/phase3/includes/installer/Installer.i18n.php 2010-07-29 02:44:23 UTC
(rev 70126)
@@ -79,6 +79,10 @@
'config-env-latest-old' => "'''Warning:''' You are installing
an outdated version of Mediawiki.",
'config-env-latest-help' => 'You are installing version $1,
but the latest version is $2.
You are advised to use the latest release, which can be downloaded from
[http://www.mediawiki.org/wiki/Download mediawiki.org]',
+ 'config-unicode-php' => "Using pure PHP to normalize
Unicode characters.",
+ 'config-unicode-pure-php-warning' => "'''Warning''': Either the PECL
Intl extension is not available, or it uses an older version of
[http://site.icu-project.org/ the ICU project's] library for handling Unicode
normalization. If you run a high-traffic site, you should read a little on
[http://www.mediawiki.org/wiki/Unicode_normalization_considerations Unicode
normalization].",
+ 'config-unicode-utf8' => "Using Brion Vibber's
utf8_normalize.so for UTF",
+ 'config-unicode-intl' => "Using the
[http://pecl.php.net/intl intl PECL extension] for UTF-8 normalization.",
'config-no-db' => 'Could not find a suitable
database driver!',
'config-no-db-help' => 'You need to install a database
driver for PHP.
The following database types are supported: $1.
Modified: trunk/phase3/includes/installer/Installer.php
===================================================================
--- trunk/phase3/includes/installer/Installer.php 2010-07-29 00:08:05 UTC
(rev 70125)
+++ trunk/phase3/includes/installer/Installer.php 2010-07-29 02:44:23 UTC
(rev 70126)
@@ -88,6 +88,7 @@
'envCheckExtension',
'envCheckShellLocale',
'envCheckUploadsDirectory',
+ 'envCheckLibicu'
);
/**
@@ -812,6 +813,69 @@
}
/**
+ * Convert a hex string representing a Unicode code point to that code
point.
+ * @param string $c
+ * @return string
+ */
+ protected function unicodeChar( $c ) {
+ $c = hexdec($c);
+ if ($c <= 0x7F) {
+ return chr($c);
+ } else if ($c <= 0x7FF) {
+ return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
+ } else if ($c <= 0xFFFF) {
+ return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
+ . chr(0x80 | $c & 0x3F);
+ } else if ($c <= 0x10FFFF) {
+ return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 &
0x3F)
+ . chr(0x80 | $c >> 6 & 0x3F)
+ . chr(0x80 | $c & 0x3F);
+ } else {
+ return false;
+ }
+ }
+
+
+ /**
+ * Check the libicu version
+ */
+ public function envCheckLibicu() {
+ $utf8 = function_exists( 'utf8_normalize' );
+ $intl = function_exists( 'normalizer_normalize' );
+
+ /**
+ * This needs to be updated something that the latest libicu
+ * will properly normalize. This normalization was found at
+ *
http://www.unicode.org/versions/Unicode5.2.0/#Character_Additions
+ * Note that we use the hex representation to create the code
+ * points in order to avoid any Unicode-destroying during
transite.
+ */
+ $not_normal_c = $this->unicodeChar("FA6C");
+ $normal_c = $this->unicodeChar("242EE");
+
+ $useNormalizer = 'config-unicode-php';
+
+ /**
+ * We're going to prefer the pecl extension here unless
+ * utf8_normalize is more up to date.
+ */
+ if( $utf8 ) {
+ $utf8 = utf8_normalize( $not_normal_c, UNORM_NFC );
+ $useNormalizer = 'config-unicode-utf8';
+ }
+ if( $intl ) {
+ $intl = normalizer_normalize( $not_normal_c,
Normalizer::FORM_C );
+ $useNormalizer = 'config-unicode-intl';
+ }
+
+ $this->showMessage( $useNormalizer );
+ if( $useNormalizer === 'config-unicode-php' ) {
+ $this->showMessage( 'config-unicode-pure-php-warning' );
+ }
+ }
+
+
+ /**
* Search a path for any of the given executable names. Returns the
* executable name if found. Also checks the version string returned
* by each executable.
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs