http://www.mediawiki.org/wiki/Special:Code/MediaWiki/72796

Revision: 72796
Author:   nikerabbit
Date:     2010-09-11 10:10:34 +0000 (Sat, 11 Sep 2010)

Log Message:
-----------
Not so beautiful script for comparing the plural rules in different 
implementations

Added Paths:
-----------
    trunk/extensions/Translate/scripts/plural-comparison.php

Added: trunk/extensions/Translate/scripts/plural-comparison.php
===================================================================
--- trunk/extensions/Translate/scripts/plural-comparison.php                    
        (rev 0)
+++ trunk/extensions/Translate/scripts/plural-comparison.php    2010-09-11 
10:10:34 UTC (rev 72796)
@@ -0,0 +1,204 @@
+<?php
+/**
+ * Script for comparing different plural implementations.
+ *
+ * @author Niklas Laxstrom
+ *
+ * @copyright Copyright © 2010, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 
2.0 or later
+ * @file
+ */
+
+// Standard boilerplate to define $IP
+if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
+       $IP = getenv( 'MW_INSTALL_PATH' );
+} else {
+       $dir = dirname( __FILE__ ); $IP = "$dir/../../..";
+}
+require_once( "$IP/maintenance/Maintenance.php" );
+
+/// Script for comparing different plural implementations.
+class PluralCompare extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = 'Script for comparing different plural 
implementations.';
+       }
+
+       public function execute() {
+               $mwLanguages = $this->loadMediaWiki();
+               $gtLanguages = $this->loadGettext();
+               $clLanguages = $this->loadCLDR();
+
+               $allkeys = array_keys( $mwLanguages + $gtLanguages + 
$clLanguages );
+               sort( $allkeys );
+
+               $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 
'Get', 'CLDR' ) );
+               foreach( $allkeys as $index => $code ) {
+                       $mw = isset( $mwLanguages[$code] ) ? ( 
$mwLanguages[$code] === false ? '.' : '+' ) : '';
+                       $gt = isset( $gtLanguages[$code] ) ? ( 
$gtLanguages[$code] === '(n != 1)' ? '.' : '+' ) : '';
+                       $cl = isset( $clLanguages[$code] ) ? ( 
$clLanguages[$code][0] === 'Default' ? '.' : '+' ) : '';
+                       $this->output( sprintf( "%12s %-3s %-3s %-4s\n", $code, 
$mw, $gt, $cl ) );
+
+                       if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), 
'+' ) < 2 ) {
+                               unset( $allkeys[$index] );
+                       }
+               }
+
+               $this->output( "\n" );
+               $c = count( $allkeys );
+               $this->output( "Proceeding to test differences in $c 
languages\n" );
+
+               foreach ( $allkeys as $code ) {
+                       $output = sprintf( "%3s %3s %3s %4s for [$code]\n", 
'I', 'MW', 'Get', 'CLDR' );
+
+                       if ( isset( $mwLanguages[$code] ) && 
$mwLanguages[$code] !== false ) {
+                               $obj = Language::factory( $code );
+                       } else {
+                               $obj = false;
+                       }
+
+                       if ( isset( $gtLanguages[$code] ) ) {
+                               $gtExp = 'return (int) ' . str_replace( 'n', 
'$i', $gtLanguages[$code] ) . ';';
+                       } else {
+                               $gtExp = false;
+                       }
+
+                       if ( isset( $clLanguages[$code] ) ) {
+                               $cldrExp = $clLanguages[$code][1];
+                       } else {
+                               $cldrExp = false;
+                       }
+
+                       $cldrmap = array();
+                       $error = false;
+
+                       for( $i = 0; $i <= 200; $i++ ) {
+                               $mw = $obj ? $obj->convertPlural( $i, array( 0, 
1, 2, 3, 4, 5 ) ) : '?';
+                               $gt = $gtExp ? eval( $gtExp ) : '?';
+                               $cldr = $cldrExp !== false ? 
$this->evalCLDRRule( $i, $cldrExp ) : '?';
+
+                               if ( self::comp( $mw, $gt ) ) {
+                                       $value = $gt !== '?' ? $gt : $mw;
+                                       if ( !isset( $cldrmap[$cldr] ) ) {
+                                               $cldrmap[$cldr] = $value;
+                                               if ( $cldr !== '?' ) {
+                                                       $output .= sprintf( 
"%3s %-3s %-3s %-6s # Established that %-6s == $mw\n", $i, $mw, $gt, $cldr, 
$cldr );
+                                               }
+                                               continue;
+                                       } elseif ( self::comp( $cldrmap[$cldr], 
$value ) ) {
+                                               continue;
+                                       } elseif ( $i > 4 && $value === 1 && 
self::comp( $cldr, 'other' ) ) {
+                                               if ( $i === 5 ) {
+                                                       $output .= "Supressing 
further output for this language.\n";
+                                               }
+                                               continue;
+                                       }
+                               }
+                               $error = true;
+                               $output .= sprintf( "%3s %-3s %-3s %-6s\n", $i, 
$mw, $gt, $cldr );
+                       }
+
+                       if ( $error ) {
+                               $this->output( "$output\n" );
+                       }
+               }
+
+       }
+
+       public static function comp( $a, $b ) {
+               return $a === '?' || $b === '?' || $a === $b;
+       }
+
+       public function loadCLDR() {
+               $filename = dirname( __FILE__ ) . '/../data/plural-cldr.yaml';
+               $data = TranslateYaml::load( $filename );
+               $languages = array();
+               $ruleExps = array();
+               foreach ( $data['rulesets'] as $name => $rules ) {
+                       $ruleExps[$name] = array();
+                       foreach( $rules as $rulename => $rule ) {
+                               $ruleExps[$name][$rulename] = 
$this->parseCLDRRule( $rule );
+                       }
+               }
+
+               foreach ( $data['locales'] as $code => $rulename ) {
+                       $languages[$code] = array( $rulename, 
$ruleExps[$rulename] );
+               }
+
+               return $languages;
+       }
+
+       public function loadMediaWiki() {
+               $mwLanguages = Language::getLanguageNames( true );
+               foreach ( $mwLanguages as $code => $name ) {
+                       $obj = Language::factory( $code );
+                       $method = new ReflectionMethod( $obj, 'convertPlural' );
+                       if ( $method->getDeclaringClass()->name === 'Language' 
) {
+                               $mwLanguages[$code] = false;
+                       }
+               }
+               return $mwLanguages;
+       }
+
+       public function loadGettext() {
+               $gtData = file_get_contents( dirname( __FILE__ ) . 
'/../data/plural-gettext.txt' );
+               $gtLanguages = array();
+               foreach ( preg_split( '/\n|\r/', $gtData, -1, 
PREG_SPLIT_NO_EMPTY ) as $line ) {
+                       list( $code, $rule ) = explode( "\t", $line );
+                       $rule = preg_replace( '/^.*?plural=/', '', $rule );
+                       $gtLanguages[$code] = $rule;
+               }
+               return $gtLanguages;
+       }
+
+       public function evalCLDRRule( $i, $rules ) {
+               foreach ( $rules as $name => $rule ) {
+                       if ( eval( "return $rule;" ) ) {
+                               return $name;
+                       }
+               }
+
+               return "other";
+       }
+
+       public function parseCLDRRule( $rule ) {
+               $rule = preg_replace( '/\bn\b/', '$i', $rule );
+               $rule = preg_replace( '/([^ ]+) mod (\d+)/', 
'self::mod(\1,\2)', $rule );
+               $rule = preg_replace( '/([^ ]+) is not (\d+)/' , '\1!==\2', 
$rule );
+               $rule = preg_replace( '/([^ ]+) is (\d+)/', '\1===\2', $rule );
+               $rule = preg_replace( '/([^ ]+) not in (\d+)\.\.(\d+)/', 
'!self::in(\1,\2,\3)', $rule );
+               $rule = preg_replace( '/([^ ]+) not within (\d+)\.\.(\d+)/', 
'!self::within(\1,\2,\3)', $rule );
+               $rule = preg_replace( '/([^ ]+) in (\d+)\.\.(\d+)/', 
'self::in(\1,\2,\3)', $rule );
+               $rule = preg_replace( '/([^ ]+) within (\d+)\.\.(\d+)/', 
'self::within(\1,\2,\3)', $rule );
+               // AND takes precedence over OR
+               $andrule = '/([^ ]+) and ([^ ]+)/i';
+               while( preg_match( $andrule, $rule ) ) {
+                       $rule = preg_replace( $andrule, '(\1&&\2)', $rule );
+               }
+               $orrule = '/([^ ]+) or ([^ ]+)/i';
+               while( preg_match( $orrule, $rule ) ) {
+                       $rule = preg_replace( $orrule, '(\1||\2)', $rule );
+               }
+
+               return $rule;
+       }
+
+       public static function in( $num, $low, $high ) {
+               return is_int( $num ) && $num >= $low && $num <= $high;
+       }
+
+       public static function within( $num, $low, $high ) {
+               return $num >= $low && $num <= $high;
+       }
+
+       public static function mod( $num, $mod ) {
+               if ( is_int( $num ) ) {
+                       return (int) fmod( $num, $mod );
+               }
+               return fmod( $num, $mod );
+       }
+
+}
+
+$maintClass = 'PluralCompare';
+require_once( DO_MAINTENANCE );


Property changes on: trunk/extensions/Translate/scripts/plural-comparison.php
___________________________________________________________________
Added: svn:eol-style
   + native



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to