Nikerabbit has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/376506 )
Change subject: Port BCP47 formatter from PHP to JavaScript ...................................................................... Port BCP47 formatter from PHP to JavaScript It can be accessed via mw.language.bc47. To be used in ContentTranslation, see T157212 Change-Id: I37d32ab8a55c7101b903b03869899f00a39afd11 --- M resources/src/mediawiki.language/mediawiki.language.js M tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js 2 files changed, 129 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/06/376506/1 diff --git a/resources/src/mediawiki.language/mediawiki.language.js b/resources/src/mediawiki.language/mediawiki.language.js index 3726a68..618b69c 100644 --- a/resources/src/mediawiki.language/mediawiki.language.js +++ b/resources/src/mediawiki.language/mediawiki.language.js @@ -188,6 +188,38 @@ setSpecialCharacters: function ( data ) { this.specialCharacters = data; + }, + + bcp47: function ( languageTag ) { + var formatted, + isFirstSegment = true, + isPrivate = false, + segments = languageTag.split( '-' ); + + formatted = segments.map( function ( segment ) { + var newSegment; + + // when previous segment is x, it is a private segment and should be lc + if ( isPrivate ) { + newSegment = segment.toLowerCase(); + // ISO 3166 country code + } else if ( segment.length === 2 && !isFirstSegment ) { + newSegment = segment.toUpperCase(); + // ISO 15924 script code + } else if ( segment.length == 4 && !isFirstSegment ) { + newSegment = segment.charAt(0).toUpperCase() + segment.substring(1).toLowerCase() + // Use lowercase for other cases + } else { + newSegment = segment.toLowerCase(); + } + + isPrivate = segment.toLowerCase() === 'x'; + isFirstSegment = false; + + return newSegment; + } ); + + return formatted.join( '-' ); } } ); diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js b/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js index b965079..3e91d28 100644 --- a/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js +++ b/tests/qunit/suites/resources/mediawiki/mediawiki.language.test.js @@ -1,7 +1,7 @@ ( function ( mw, $ ) { 'use strict'; - var grammarTests; + var grammarTests, bcp47Tests; QUnit.module( 'mediawiki.language', QUnit.newMwEnvironment( { setup: function () { @@ -587,4 +587,100 @@ assert.equal( mw.language.listToText( [ 'a', 'b' ] ), 'a and b', 'Two items' ); assert.equal( mw.language.listToText( [ 'a', 'b', 'c' ] ), 'a, b and c', 'More than two items' ); } ); + + bcp47Tests = [ + // Extracted from BCP 47 (list not exhaustive) + // # 2.1.1 + [ 'en-ca-x-ca', 'en-CA-x-ca' ], + [ 'sgn-be-fr', 'sgn-BE-FR' ], + [ 'az-latn-x-latn', 'az-Latn-x-latn' ], + // # 2.2 + [ 'sr-Latn-RS', 'sr-Latn-RS' ], + [ 'az-arab-ir', 'az-Arab-IR' ], + + // # 2.2.5 + [ 'sl-nedis', 'sl-nedis' ], + [ 'de-ch-1996', 'de-CH-1996' ], + + // # 2.2.6 + [ + 'en-latn-gb-boont-r-extended-sequence-x-private', + 'en-Latn-GB-boont-r-extended-sequence-x-private' + ], + + // Examples from BCP 47 Appendix A + // # Simple language subtag: + [ 'DE', 'de' ], + [ 'fR', 'fr' ], + [ 'ja', 'ja' ], + + // # Language subtag plus script subtag: + [ 'zh-hans', 'zh-Hans' ], + [ 'sr-cyrl', 'sr-Cyrl' ], + [ 'sr-latn', 'sr-Latn' ], + + // # Extended language subtags and their primary language subtag + // # counterparts: + [ 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ], + [ 'cmn-hans-cn', 'cmn-Hans-CN' ], + [ 'zh-yue-hk', 'zh-yue-HK' ], + [ 'yue-hk', 'yue-HK' ], + + // # Language-Script-Region: + [ 'zh-hans-cn', 'zh-Hans-CN' ], + [ 'sr-latn-RS', 'sr-Latn-RS' ], + + // # Language-Variant: + [ 'sl-rozaj', 'sl-rozaj' ], + [ 'sl-rozaj-biske', 'sl-rozaj-biske' ], + [ 'sl-nedis', 'sl-nedis' ], + + // # Language-Region-Variant: + [ 'de-ch-1901', 'de-CH-1901' ], + [ 'sl-it-nedis', 'sl-IT-nedis' ], + + // # Language-Script-Region-Variant: + [ 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ], + + // # Language-Region: + [ 'de-de', 'de-DE' ], + [ 'en-us', 'en-US' ], + [ 'es-419', 'es-419' ], + + // # Private use subtags: + [ 'de-ch-x-phonebk', 'de-CH-x-phonebk' ], + [ 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ], + /** + * Previous test does not reflect the BCP 47 which states: + * az-Arab-x-AZE-derbend + * AZE being private, it should be lower case, hence the test above + * should probably be: + * [ 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ], + */ + + // # Private use registry values: + [ 'x-whatever', 'x-whatever' ], + [ 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ], + [ 'de-qaaa', 'de-Qaaa' ], + [ 'sr-latn-qm', 'sr-Latn-QM' ], + [ 'sr-qaaa-rs', 'sr-Qaaa-RS' ], + + // # Tags that use extensions + [ 'en-us-u-islamcal', 'en-US-u-islamcal' ], + [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ], + [ 'en-a-myext-b-another', 'en-a-myext-b-another' ], + + // # Invalid: + // de-419-DE + // a-DE + // ar-a-aaa-b-bbb-a-ccc + ]; + + QUnit.test( 'mw.language.bcp47', function ( assert ) { + bcp47Tests.forEach( function ( data ) { + var input = data[0], + expected = data[1]; + assert.equal( mw.language.bcp47( input ), expected ); + } ); + } ); }( mediaWiki, jQuery ) ); -- To view, visit https://gerrit.wikimedia.org/r/376506 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I37d32ab8a55c7101b903b03869899f00a39afd11 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Nikerabbit <niklas.laxst...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits