jenkins-bot has submitted this change and it was merged.

Change subject: (bug 61911) use localized number patterns.
......................................................................


(bug 61911) use localized number patterns.

When splitting quantitiy strings, use regex patterns derived from
MediaWiki language objects.

IMPORTANT: needs https://github.com/DataValues/Number/pull/2 (that is,
it needs the yet-to-be-released version 0.3 of data-values/numbers).

Change-Id: If2345c45b5da77ca83437aa925e4b36631fafd15
---
M composer.json
M lib/includes/formatters/MediaWikiNumberLocalizer.php
M lib/includes/formatters/WikibaseValueFormatterBuilders.php
M lib/includes/parsers/MediaWikiNumberUnlocalizer.php
M lib/tests/phpunit/parsers/MediaWikiNumberUnlocalizerTest.php
M repo/Wikibase.php
6 files changed, 215 insertions(+), 32 deletions(-)

Approvals:
  Tobias Gritschacher: Looks good to me, approved
  WikidataJenkins: Verified
  jenkins-bot: Verified



diff --git a/composer.json b/composer.json
index b0c52c7..652ffee 100644
--- a/composer.json
+++ b/composer.json
@@ -27,7 +27,7 @@
                "data-values/data-values": "~0.1.0",
                "data-values/common": "~0.2.0",
                "data-values/geo": "~0.1.0",
-               "data-values/number": "~0.2.0",
+               "data-values/number": "~0.3.0",
                "data-values/time": "~0.2.0",
                "data-values/validators": "~0.1.0",
                "data-values/data-types": "~0.1.0",
diff --git a/lib/includes/formatters/MediaWikiNumberLocalizer.php 
b/lib/includes/formatters/MediaWikiNumberLocalizer.php
index dbf162d..14a23b9 100644
--- a/lib/includes/formatters/MediaWikiNumberLocalizer.php
+++ b/lib/includes/formatters/MediaWikiNumberLocalizer.php
@@ -18,21 +18,29 @@
 class MediaWikiNumberLocalizer implements Localizer {
 
        /**
+        * @var Language
+        */
+       protected $language;
+
+       /**
+        * @param Language $language
+        */
+       public function __construct( Language $language ) {
+               $this->language = $language;
+       }
+
+       /**
         * @see Localizer::localize()
         *
         * @since 0.5
         *
         * @param string $number a numeric string
-        * @param string $language a language code
-        * @param FormatterOptions $options
         *
         * @return string
         * @throws InvalidArgumentException
         */
-       public function localize( $number, $language, FormatterOptions $options 
) {
-               $language = Language::factory( $language );
-
-               $localiezdNumber = $language->formatNum( $number );
+       public function localizeNumber( $number ) {
+               $localiezdNumber = $this->language->formatNum( $number );
                return $localiezdNumber;
        }
 }
diff --git a/lib/includes/formatters/WikibaseValueFormatterBuilders.php 
b/lib/includes/formatters/WikibaseValueFormatterBuilders.php
index 9308b15..e0dca5a 100644
--- a/lib/includes/formatters/WikibaseValueFormatterBuilders.php
+++ b/lib/includes/formatters/WikibaseValueFormatterBuilders.php
@@ -537,7 +537,8 @@
         */
        protected static function newQuantityFormatter( FormatterOptions 
$options, $builders ) {
                //TODO: use a builder for this DecimalFormatter
-               $localizer = new MediaWikiNumberLocalizer();
+               $language = Language::factory( $options->getOption( 
ValueFormatter::OPT_LANG ) );
+               $localizer = new MediaWikiNumberLocalizer( $language );
                $decimalFormatter = new DecimalFormatter( $options, $localizer 
);
                return new QuantityFormatter( $decimalFormatter, $options );
        }
diff --git a/lib/includes/parsers/MediaWikiNumberUnlocalizer.php 
b/lib/includes/parsers/MediaWikiNumberUnlocalizer.php
index 5efcf63..5af1e20 100644
--- a/lib/includes/parsers/MediaWikiNumberUnlocalizer.php
+++ b/lib/includes/parsers/MediaWikiNumberUnlocalizer.php
@@ -2,8 +2,7 @@
 
 namespace Wikibase\Lib;
 use Language;
-use ValueParsers\ParserOptions;
-use ValueParsers\Unlocalizer;
+use ValueParsers\BasicUnlocalizer;
 
 /**
  * MediaWikiNumberUnlocalizer
@@ -13,21 +12,81 @@
  * @license GPL 2+
  * @author Daniel Kinzler
  */
-class MediaWikiNumberUnlocalizer implements Unlocalizer {
+class MediaWikiNumberUnlocalizer extends BasicUnlocalizer {
+
+       protected static $unlocalizerMap = array(
+               "\xe2\x88\x92" => '-', // convert minus (U+2212) to hyphen
+               "\xe2\x93\x96" => '-', // convert "heavy minus" (U+2796) to 
hyphen
+               "\xe2\x93\x95" => '+', // convert "heavy plus" (U+2795) to plus
+       );
+
+       /**
+        * @var Language
+        */
+       protected $language;
+
+       /**
+        * @param Language $language
+        */
+       public function __construct( Language $language ) {
+               $this->language = $language;
+       }
 
        /**
         * @see Unlocalizer::unlocalize()
         *
         * @param string $number string to process
-        * @param string $langCode language code
-        * @param ParserOptions $options
         *
         * @return string unlocalized string
         */
-       public function unlocalize( $number, $langCode, ParserOptions $options 
) {
-               $lang = Language::factory( $langCode );
+       public function unlocalizeNumber( $number ) {
+               $canonicalizedNumber = $this->language->parseFormattedNumber( 
$number );
 
-               $canonicalizedNumber = $lang->parseFormattedNumber( $number );
+               // convert "pretty" characters not covered by 
parseFormattedNumber
+               $canonicalizedNumber = strtr( $canonicalizedNumber, 
self::$unlocalizerMap );
+
+               // strip any remaining whitespace
+               $canonicalizedNumber = preg_replace( '/\s/u', '', 
$canonicalizedNumber );
+
                return $canonicalizedNumber;
        }
+
+       /**
+        * @see Unlocalizer::getNumberRegex()
+        *
+        * Constructs a regular expression based on 
Language::digitTransformTable()
+        * and Language::separatorTransformTable().
+        *
+        * @param string $delim the regex delimiter, used for escaping.
+        *
+        * @return string regular expression
+        */
+       public function getNumberRegex( $delim = '/' ) {
+               $digitMap = $this->language->digitTransformTable();
+               $separatorMap = $this->language->separatorTransformTable();
+
+               if ( empty( $digitMap ) ) {
+                       $numerals = '0123456789';
+               } else {
+                       $numerals = implode( '', array_keys( $digitMap ) ) // 
accept canonical numerals
+                               . implode( '', array_values( $digitMap ) ); // 
...and localized numerals
+               }
+
+               if ( empty( $separatorMap ) ) {
+                       $separators = '.,';
+               } else {
+                       $separators = implode( '', array_keys( $separatorMap ) 
) // accept canonical separators
+                               . implode( '', array_values( $separatorMap ) ); 
// ...and localized separators
+               }
+
+               $characters = $numerals . $separators;
+
+               // if any whitespace characters are acceptable, also accept a 
regular blank.
+               if ( preg_match( '/\s/u', $characters ) ) {
+                       $characters = $characters . ' ';
+               }
+
+               return '[-+]?[' . preg_quote( $characters, $delim ) . ']+';
+       }
+
 }
diff --git a/lib/tests/phpunit/parsers/MediaWikiNumberUnlocalizerTest.php 
b/lib/tests/phpunit/parsers/MediaWikiNumberUnlocalizerTest.php
index a5c79d8..8cbce4e 100644
--- a/lib/tests/phpunit/parsers/MediaWikiNumberUnlocalizerTest.php
+++ b/lib/tests/phpunit/parsers/MediaWikiNumberUnlocalizerTest.php
@@ -2,10 +2,13 @@
 
 namespace Wikibase\Lib\Test;
 
-use ValueParsers\ParserOptions;
+use Language;
+use ValueParsers\ValueParser;
+use Wikibase\Lib\MediaWikiNumberLocalizer;
 use Wikibase\Lib\MediaWikiNumberUnlocalizer;
 
 /**
+ * @covers Wikibase\Lib\MediaWikiNumberLocalizer
  * @covers Wikibase\Lib\MediaWikiNumberUnlocalizer
  *
  * @group ValueParsers
@@ -19,24 +22,136 @@
 
        public function provideUnlocalize() {
                return array(
-                       array( '123,456.789', 'en', '123456.789' ),
-                       array( '123.456,789', 'de', '123456.789' ),
+                       array( '1', 'en', '1' ),
+                       array( '-1.1', 'en', '-1.1' ),
+
+                       array( '-1.234,56', 'de', '-1234.56' ),
+
+                       array( "\xe2\x88\x921.234,56", 'de', '-1234.56' ),
+                       array( "\xe2\x93\x961.234,56", 'de', '-1234.56' ),
+                       array( "\xe2\x93\x951.234,56", 'de', '+1234.56' ),
+
+                       array( "1\xc2\xa0234,56", 'sv', '1234.56' ),
+                       array( "1 234,56", 'sv', '1234.56' ),
                );
        }
 
        /**
         * @dataProvider provideUnlocalize
-        *
-        * @param $localized
-        * @param $lang
-        * @param $expected
         */
-       public function testUnlocalize( $localized, $lang, $expected ) {
-               $unlocalizer = new MediaWikiNumberUnlocalizer();
-               $options = new ParserOptions();
+       public function testUnlocalize( $localized, $languageCode, $canonical ) 
{
+               $language = Language::factory( $languageCode );
+               $unlocalizer = new MediaWikiNumberUnlocalizer( $language );
 
-               $actual = $unlocalizer->unlocalize( $localized, $lang, $options 
);
+               $unlocalized = $unlocalizer->unlocalizeNumber( $localized );
 
-               $this->assertEquals( $expected, $actual );
+               $this->assertEquals( $canonical, $unlocalized );
+       }
+
+       public function provideLocalizationRoundTrip() {
+               $numbers = array( 12, -4.111, 12345678 );
+               $languages = array(
+                       'en', 'es', 'pt', 'fr', 'de', 'sv', 'ru',  // western 
arabic numerals, but different separators
+                       'ar', 'fa', 'my', 'pi', 'ne', 'kn', // different 
numerals
+               );
+
+               $cases = array();
+               foreach ( $languages as $lang ) {
+                       foreach ( $numbers as $num ) {
+                               $cases[] = array( $num, $lang );
+                       }
+               };
+
+               return $cases;
+       }
+
+       /**
+        * @dataProvider provideLocalizationRoundTrip
+        */
+       public function testLocalizationRoundTrip( $number, $languageCode, 
$canonical = null ) {
+               if ( $canonical === null ) {
+                       $canonical = "$number";
+               }
+
+               $language = Language::factory( $languageCode );
+
+               $localizer = new MediaWikiNumberLocalizer( $language );
+               $unlocalizer = new MediaWikiNumberUnlocalizer( $language );
+
+               $localized = $localizer->localizeNumber( $number );
+               $unlocalized = $unlocalizer->unlocalizeNumber( $localized );
+
+               $this->assertEquals( $canonical, $unlocalized );
+       }
+
+       public function provideGetNumberRegexMatch() {
+               return array(
+                       array( '5' ),
+                       array( '+3' ),
+                       array( '-15' ),
+
+                       array( '5.3' ),
+                       array( '+3.2' ),
+                       array( '-15.77' ),
+
+                       array( '.3' ),
+                       array( '+.2' ),
+                       array( '-.77' ),
+
+                       array( '1,335.3' ),
+                       array( '+1,333.2' ),
+                       array( '-1,315.77' ),
+
+                       array( '12.345,77', 'de' ),
+                       array( "12\xc2\xa0345,77", 'sv' ), // non-breaking 
space, as generated by the formatter
+                       array( "12 345,77", 'sv' ), // regular space, as might 
be entered by users
+               );
+       }
+
+       /**
+        * @dataProvider provideGetNumberRegexMatch
+        */
+       public function testGetNumberRegexMatch( $value, $lang = 'en' ) {
+               $lang = Language::factory( $lang );
+               $unlocalizer = new MediaWikiNumberUnlocalizer( $lang );
+               $regex = $unlocalizer->getNumberRegex();
+
+               $hex = utf8ToHexSequence( $regex );
+               $this->assertTrue( (bool)preg_match( "/^($regex)$/u", $value ), 
"Hex: $hex" );
+       }
+
+       public function provideGetNumberRegexMismatch() {
+               return array(
+                       array( '' ),
+                       array( ' ' ),
+                       array( '+' ),
+                       array( 'e' ),
+
+                       array( '.-' ),
+
+                       array( '0x20' ),
+                       array( '2x2' ),
+                       array( 'x2' ),
+                       array( '2x' ),
+
+                       array( 'e.' ),
+                       array( '.e' ),
+                       array( '12e' ),
+                       array( 'E17' ),
+
+                       array( '+-3' ),
+                       array( '++7' ),
+                       array( '--5' ),
+               );
+       }
+
+       /**
+        * @dataProvider provideGetNumberRegexMismatch
+        */
+       public function testGetNumberRegexMismatch( $value, $lang = 'en' ) {
+               $unlocalizer = new MediaWikiNumberUnlocalizer( 
Language::factory( $lang ) );
+               $regex = $unlocalizer->getNumberRegex();
+
+               $this->assertFalse( (bool)preg_match( "/^($regex)$/u", $value ) 
);
        }
 }
diff --git a/repo/Wikibase.php b/repo/Wikibase.php
index c65b39a..86ad1d3 100644
--- a/repo/Wikibase.php
+++ b/repo/Wikibase.php
@@ -1,4 +1,5 @@
 <?php
+use ValueParsers\ValueParser;
 
 /**
  * Entry point for the Wikibase Repository extension.
@@ -77,10 +78,9 @@
        };
 
        $wgValueParsers['quantity'] = function( ValueParsers\ParserOptions 
$options ) {
-               $unlocalizer = new Wikibase\Lib\MediaWikiNumberUnlocalizer();
-               return new \ValueParsers\QuantityParser(
-                       new \ValueParsers\DecimalParser( $options, $unlocalizer 
),
-                       $options );
+               $language = Language::factory( $options->getOption( 
ValueParser::OPT_LANG ) );
+               $unlocalizer = new Wikibase\Lib\MediaWikiNumberUnlocalizer( 
$language);
+               return new \ValueParsers\QuantityParser( $options, $unlocalizer 
);
        };
 
        $wgValueParsers['bool'] = 'ValueParsers\BoolParser';

-- 
To view, visit https://gerrit.wikimedia.org/r/116986
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If2345c45b5da77ca83437aa925e4b36631fafd15
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de>
Gerrit-Reviewer: Tobias Gritschacher <tobias.gritschac...@wikimedia.de>
Gerrit-Reviewer: WikidataJenkins <wikidata-servi...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to