Nikerabbit has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/376506 )

Change subject: Port BCP47 formatter from PHP to JavaScript
......................................................................

Port BCP47 formatter from PHP to JavaScript

It can be accessed via mw.language.bc47.
To be used in ContentTranslation, see T157212

Change-Id: I37d32ab8a55c7101b903b03869899f00a39afd11
---
M resources/src/mediawiki.language/mediawiki.language.js
M tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js
2 files changed, 129 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/06/376506/1

diff --git a/resources/src/mediawiki.language/mediawiki.language.js 
b/resources/src/mediawiki.language/mediawiki.language.js
index 3726a68..618b69c 100644
--- a/resources/src/mediawiki.language/mediawiki.language.js
+++ b/resources/src/mediawiki.language/mediawiki.language.js
@@ -188,6 +188,38 @@
 
                setSpecialCharacters: function ( data ) {
                        this.specialCharacters = data;
+               },
+
+               bcp47: function ( languageTag ) {
+                       var formatted,
+                               isFirstSegment = true,
+                               isPrivate = false,
+                               segments = languageTag.split( '-' );
+
+                       formatted = segments.map( function ( segment ) {
+                               var newSegment;
+
+                               // when previous segment is x, it is a private 
segment and should be lc
+                               if ( isPrivate ) {
+                                       newSegment = segment.toLowerCase();
+                               // ISO 3166 country code
+                               } else if ( segment.length === 2 && 
!isFirstSegment ) {
+                                       newSegment = segment.toUpperCase();
+                               // ISO 15924 script code
+                               } else if ( segment.length == 4 && 
!isFirstSegment ) {
+                                       newSegment = 
segment.charAt(0).toUpperCase() + segment.substring(1).toLowerCase()
+                               // Use lowercase for other cases
+                               } else {
+                                       newSegment = segment.toLowerCase();
+                }
+
+                               isPrivate = segment.toLowerCase() === 'x';
+                               isFirstSegment = false;
+
+                               return newSegment;
+                       } );
+
+                       return formatted.join( '-' );
                }
        } );
 
diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js 
b/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js
index b965079..3e91d28 100644
--- a/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js
+++ b/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js
@@ -1,7 +1,7 @@
 ( function ( mw, $ ) {
        'use strict';
 
-       var grammarTests;
+       var grammarTests, bcp47Tests;
 
        QUnit.module( 'mediawiki.language', QUnit.newMwEnvironment( {
                setup: function () {
@@ -587,4 +587,100 @@
                assert.equal( mw.language.listToText( [ 'a', 'b' ] ), 'a and 
b', 'Two items' );
                assert.equal( mw.language.listToText( [ 'a', 'b', 'c' ] ), 'a, 
b and c', 'More than two items' );
        } );
+
+       bcp47Tests = [
+               // Extracted from BCP 47 (list not exhaustive)
+               // # 2.1.1
+               [ 'en-ca-x-ca', 'en-CA-x-ca' ],
+               [ 'sgn-be-fr', 'sgn-BE-FR' ],
+               [ 'az-latn-x-latn', 'az-Latn-x-latn' ],
+               // # 2.2
+               [ 'sr-Latn-RS', 'sr-Latn-RS' ],
+               [ 'az-arab-ir', 'az-Arab-IR' ],
+
+               // # 2.2.5
+               [ 'sl-nedis', 'sl-nedis' ],
+               [ 'de-ch-1996', 'de-CH-1996' ],
+
+               // # 2.2.6
+               [
+                       'en-latn-gb-boont-r-extended-sequence-x-private',
+                       'en-Latn-GB-boont-r-extended-sequence-x-private'
+               ],
+
+               // Examples from BCP 47 Appendix A
+               // # Simple language subtag:
+               [ 'DE', 'de' ],
+               [ 'fR', 'fr' ],
+               [ 'ja', 'ja' ],
+
+               // # Language subtag plus script subtag:
+               [ 'zh-hans', 'zh-Hans' ],
+               [ 'sr-cyrl', 'sr-Cyrl' ],
+               [ 'sr-latn', 'sr-Latn' ],
+
+               // # Extended language subtags and their primary language subtag
+               // # counterparts:
+               [ 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ],
+               [ 'cmn-hans-cn', 'cmn-Hans-CN' ],
+               [ 'zh-yue-hk', 'zh-yue-HK' ],
+               [ 'yue-hk', 'yue-HK' ],
+
+               // # Language-Script-Region:
+               [ 'zh-hans-cn', 'zh-Hans-CN' ],
+               [ 'sr-latn-RS', 'sr-Latn-RS' ],
+
+               // # Language-Variant:
+               [ 'sl-rozaj', 'sl-rozaj' ],
+               [ 'sl-rozaj-biske', 'sl-rozaj-biske' ],
+               [ 'sl-nedis', 'sl-nedis' ],
+
+               // # Language-Region-Variant:
+               [ 'de-ch-1901', 'de-CH-1901' ],
+               [ 'sl-it-nedis', 'sl-IT-nedis' ],
+
+               // # Language-Script-Region-Variant:
+               [ 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ],
+
+               // # Language-Region:
+               [ 'de-de', 'de-DE' ],
+               [ 'en-us', 'en-US' ],
+               [ 'es-419', 'es-419' ],
+
+               // # Private use subtags:
+               [ 'de-ch-x-phonebk', 'de-CH-x-phonebk' ],
+               [ 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ],
+               /**
+                       * Previous test does not reflect the BCP 47 which 
states:
+                       *  az-Arab-x-AZE-derbend
+                       * AZE being private, it should be lower case, hence the 
test above
+                       * should probably be:
+                       * [ 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ],
+                       */
+
+               // # Private use registry values:
+               [ 'x-whatever', 'x-whatever' ],
+               [ 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ],
+               [ 'de-qaaa', 'de-Qaaa' ],
+               [ 'sr-latn-qm', 'sr-Latn-QM' ],
+               [ 'sr-qaaa-rs', 'sr-Qaaa-RS' ],
+
+               // # Tags that use extensions
+               [ 'en-us-u-islamcal', 'en-US-u-islamcal' ],
+               [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ],
+               [ 'en-a-myext-b-another', 'en-a-myext-b-another' ],
+
+               // # Invalid:
+               // de-419-DE
+               // a-DE
+               // ar-a-aaa-b-bbb-a-ccc
+       ];
+
+       QUnit.test( 'mw.language.bcp47', function ( assert ) {
+               bcp47Tests.forEach( function ( data ) {
+                       var input = data[0],
+                               expected = data[1];
+                       assert.equal( mw.language.bcp47( input ), expected );
+               } );
+       } );
 }( mediaWiki, jQuery ) );

-- 
To view, visit https://gerrit.wikimedia.org/r/376506
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I37d32ab8a55c7101b903b03869899f00a39afd11
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Nikerabbit <niklas.laxst...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to