https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103327

Revision: 103327
Author:   hashar
Date:     2011-11-16 15:12:00 +0000 (Wed, 16 Nov 2011)
Log Message:
-----------
bug 28643 improvement to serbian variants conversion

This patch is a PARTIAL merge of /branches/nikola/phase3 ::

r85224 avoid double conversion when text already use the correct variant
r85239 minor fixes to previous
r85308 documentation (@since 1.18 update to 1.19)
r101359 guessVariant doc + boolean typecast
r101369 tests
r103131 additional test

Test plan:
==========
$ ./phpunit.php --filter LanguageSr
PHPUnit 3.6.3 by Sebastian Bergmann.

Configuration read from /srv/trunk/tests/phpunit/suite.xml

.....

Time: 1 second, Memory: 78.50Mb

OK (5 tests, 19 assertions)
$

Modified Paths:
--------------
    trunk/phase3/RELEASE-NOTES-1.19
    trunk/phase3/languages/LanguageConverter.php
    trunk/phase3/languages/classes/LanguageSr.php

Added Paths:
-----------
    trunk/phase3/tests/phpunit/languages/LanguageSrTest.php

Property Changed:
----------------
    trunk/phase3/languages/LanguageConverter.php

Modified: trunk/phase3/RELEASE-NOTES-1.19
===================================================================
--- trunk/phase3/RELEASE-NOTES-1.19     2011-11-16 14:33:57 UTC (rev 103326)
+++ trunk/phase3/RELEASE-NOTES-1.19     2011-11-16 15:12:00 UTC (rev 103327)
@@ -166,6 +166,7 @@
 * (bug 30217) Make pt-br a fallback of pt.
 * (bug 31193) Set fallback language of Assamese from Bengali to English.
 * Update date format for dsb and hsb: month names need the genitive.
+* (bug 28643) Serbian variant conversion improvements (Nikola Smolenski)
 
 === Other changes in 1.19 ===
 * jquery.mwPrototypes module was renamed to jquery.mwExtension.

Modified: trunk/phase3/languages/LanguageConverter.php
===================================================================
--- trunk/phase3/languages/LanguageConverter.php        2011-11-16 14:33:57 UTC 
(rev 103326)
+++ trunk/phase3/languages/LanguageConverter.php        2011-11-16 15:12:00 UTC 
(rev 103327)
@@ -322,6 +322,10 @@
                        }
                }
 
+               if( $this->guessVariant( $text, $toVariant ) ) {
+                       return $text;
+               }
+
                /* we convert everything except:
                   1. HTML markups (anything between < and >)
                   2. HTML entities
@@ -571,7 +575,7 @@
         */
        public function convertTo( $text, $variant ) {
                global $wgDisableLangConversion;
-               if ( $wgDisableLangConversion ) {
+               if ( $wgDisableLangConversion || $this->guessVariant( $text, 
$variant ) ) {
                        return $text;
                }
                return $this->recursiveConvertTopLevel( $text, $variant );
@@ -773,6 +777,20 @@
        }
 
        /**
+        * Guess if a text is written in a variant. This should be implemented 
in subclasses.
+        *
+        * @param string        $text the text to be checked
+        * @param string        $variant language code of the variant to be 
checked for
+        * @return bool true if $text appears to be written in $variant, false 
if not
+        *
+        * @author Nikola Smolenski <[email protected]>
+        * @since 1.19
+        */
+       public function guessVariant($text, $variant) {
+               return false;
+       }
+
+       /**
         * Load default conversion tables.
         * This method must be implemented in derived class.
         *


Property changes on: trunk/phase3/languages/LanguageConverter.php
___________________________________________________________________
Added: svn:mergeinfo
   + /branches/REL1_15/phase3/languages/LanguageConverter.php:51646
/branches/REL1_17/phase3/languages/LanguageConverter.php:81445,81448
/branches/new-installer/phase3/languages/LanguageConverter.php:43664-66004
/branches/nikola/phase3/languages/LanguageConverter.php:85106-103326
/branches/sqlite/languages/LanguageConverter.php:58211-58321

Modified: trunk/phase3/languages/classes/LanguageSr.php
===================================================================
--- trunk/phase3/languages/classes/LanguageSr.php       2011-11-16 14:33:57 UTC 
(rev 103326)
+++ trunk/phase3/languages/classes/LanguageSr.php       2011-11-16 15:12:00 UTC 
(rev 103327)
@@ -173,6 +173,32 @@
 
                return $ret;
        }
+
+       /**
+        * Guess if a text is written in Cyrillic or Latin.
+        * Overrides LanguageConverter::guessVariant()
+        *
+        * @param string  $text The text to be checked
+        * @param string  $variant Language code of the variant to be checked 
for
+        * @return bool  true if $text appears to be written in $variant
+        *
+        * @author Nikola Smolenski <[email protected]>
+        * @since 1.19
+        */
+       public function guessVariant( $text, $variant ) {
+               $numCyrillic = preg_match_all("/[шђчћжШЂЧЋЖ]/u", $text, $dummy);
+               $numLatin = preg_match_all("/[šđč枊ĐČĆŽ]/u", $text, $dummy);
+
+               if( $variant == 'sr-ec' ) {
+                       return (boolean) ($numCyrillic > $numLatin);
+               } else if( $variant == 'sr-el' ) {
+                       return (boolean) ($numLatin > $numCyrillic);
+               } else {
+                       return false;
+               }
+
+       }
+
 }
 
 /**

Copied: trunk/phase3/tests/phpunit/languages/LanguageSrTest.php (from rev 
103131, branches/nikola/phase3/tests/phpunit/languages/LanguageSrTest.php)
===================================================================
--- trunk/phase3/tests/phpunit/languages/LanguageSrTest.php                     
        (rev 0)
+++ trunk/phase3/tests/phpunit/languages/LanguageSrTest.php     2011-11-16 
15:12:00 UTC (rev 103327)
@@ -0,0 +1,165 @@
+<?php
+/**
+ * PHPUnit tests for the Serbian language.
+ * The language can be represented using two scripts:
+ *  - Latin (SR_el)
+ *  - Cyrillic (SR_ec)
+ * Both representations seems to be bijective, hence MediaWiki can convert
+ * from one script to the other.
+ *
+ * @author Antoine Musso <hashar at free dot fr>
+ * @copyright Copyright © 2011, Antoine Musso <hashar at free dot fr>
+ * @file
+ */
+
+require_once dirname(dirname(__FILE__)). '/bootstrap.php';
+
+/** Tests for MediaWiki languages/LanguageTr.php */
+class LanguageSrTest extends MediaWikiTestCase {
+       /* Language object. Initialized before each test */
+       private $lang;
+
+       function setUp() {
+               $this->lang = Language::factory( 'Sr' );
+       }
+       function tearDown() {
+               unset( $this->lang );
+       }
+
+       ##### TESTS #######################################################
+
+       function testEasyConversions( ) {
+               $this->assertCyrillic(
+                       'шђчћжШЂЧЋЖ',
+                       'Cyrillic guessing characters'
+               );
+               $this->assertLatin(
+                       'šđč枊ĐČĆŽ',
+                       'Latin guessing characters'
+               );
+       }
+
+       function testMixedConversions() {
+               $this->assertCyrillic(
+                       'шђчћжШЂЧЋЖ - šđčćž',
+                       'Mostly cyrillic characters'
+               );
+               $this->assertLatin(
+                       'šđč枊ĐČĆŽ - шђчћж',
+                       'Mostly latin characters'
+               );
+       }
+
+       function testSameAmountOfLatinAndCyrillicGetConverted() {
+               $this->assertConverted(
+                       '4 latin: šđčć | 4 cyrillic: шђчћ',
+                       'sr-ec'
+               );
+               $this->assertConverted(
+                       '4 latin: šđčć | 4 cyrillic: шђчћ',
+                       'sr-el'
+               );
+       }
+
+       /**
+        * @author Nikola Smolenski 
+        */
+       function testConversionToCyrillic() {
+               $this->assertEquals( 'абвг',
+                       $this->convertToCyrillic( 'abvg' )
+               );
+               $this->assertEquals( 'абвг',
+                       $this->convertToCyrillic( 'абвг' )
+               );
+               $this->assertEquals( 'abvgшђжчћ',
+                       $this->convertToCyrillic( 'abvgшђжчћ' )
+               );
+               $this->assertEquals( 'абвгшђжчћ',
+                       $this->convertToCyrillic( 'абвгšđžčć' )
+               );
+               //Roman numerals are not converted
+               $this->assertEquals( 'а I б II в III г IV шђжчћ',
+                       $this->convertToCyrillic( 'a I b II v III g IV šđžčć' )
+               );
+       }
+
+       function testConversionToLatin() {
+               $this->assertEquals( 'abcd',
+                       $this->convertToLatin( 'abcd' )
+               );
+               $this->assertEquals( 'abcd',
+                       $this->convertToLatin( 'абцд' )
+               );
+               $this->assertEquals( 'abcdšđžčć',
+                       $this->convertToLatin( 'abcdшђжчћ' )
+               );
+               $this->assertEquals( 'абцдšđžčć',
+                       $this->convertToLatin( 'абцдšđžčć' )
+               );
+
+       }
+
+       ##### HELPERS #####################################################
+       /**
+        *Wrapper to verify text stay the same after applying conversion
+        * @param $text string Text to convert
+        * @param $variant string Language variant 'sr-ec' or 'sr-el'
+        * @param $msg string Optional message
+        */
+       function assertUnConverted( $text, $variant, $msg = '' ) {
+               $this->assertEquals(
+                       $text,
+                       $this->convertTo( $text, $variant ),
+                       $msg
+               );
+       }
+       /**
+        * Wrapper to verify a text is different once converted to a variant.
+        * @param $text string Text to convert
+        * @param $variant string Language variant 'sr-ec' or 'sr-el'
+        * @param $msg string Optional message
+        */
+       function assertConverted( $text, $variant, $msg = '' ) {
+               $this->assertNotEquals(
+                       $text,
+                       $this->convertTo( $text, $variant ),
+                       $msg
+               );
+       }
+
+       /**
+        * Verifiy the given Cyrillic text is not converted when using
+        * using the cyrillic variant and converted to Latin when using
+        * the Latin variant.
+        */
+       function assertCyrillic( $text, $msg = '' ) {
+               $this->assertUnConverted( $text, 'sr-ec', $msg );
+               $this->assertConverted( $text, 'sr-el', $msg );
+       }
+       /**
+        * Verifiy the given Latin text is not converted when using
+        * using the Latin variant and converted to Cyrillic when using
+        * the Cyrillic variant.
+        */
+       function assertLatin( $text, $msg = '' ) {
+               $this->assertUnConverted( $text, 'sr-el', $msg );
+               $this->assertConverted( $text, 'sr-ec', $msg );
+       }
+
+
+       /** Wrapper for converter::convertTo() method*/
+       function convertTo( $text, $variant ) {
+               return $this
+                       ->lang
+                       ->mConverter
+                       ->convertTo(
+                               $text, $variant
+                       );
+       }
+       function convertToCyrillic( $text ) {
+               return $this->convertTo( $text, 'sr-ec' );
+       }
+       function convertToLatin( $text ) {
+               return $this->convertTo( $text, 'sr-el' );
+       }
+}


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to