jenkins-bot has submitted this change and it was merged. Change subject: Make grammar data loadable as an RL module and usable in JS ......................................................................
Make grammar data loadable as an RL module and usable in JS * Load the data of this variable from a JSON file to the same data structure that ResourceLoader uses for digitTransformTable, pluralRules, etc. * Change the JSON structure to ensure the order of the rules. Otherwise JavaScript processes the keys in a random order. * Delete the grammar code from JS and replace it with the same logic that is used in PHP for processing the data. For now this is done only for Russian. The next step will be to make the PHP and JS data processing logic reusable. Bug: T115217 Change-Id: I6b9b29b7017f958d62611671be017f97cee73415 --- M includes/resourceloader/ResourceLoaderLanguageDataModule.php M languages/Language.php M languages/classes/LanguageRu.php D languages/classes/data/grammar.ru.json A languages/data/grammarTransformations/ru.json M resources/src/mediawiki.language/languages/ru.js 6 files changed, 139 insertions(+), 129 deletions(-) Approvals: Nikerabbit: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/resourceloader/ResourceLoaderLanguageDataModule.php b/includes/resourceloader/ResourceLoaderLanguageDataModule.php index 1630269..ef942fa 100644 --- a/includes/resourceloader/ResourceLoaderLanguageDataModule.php +++ b/includes/resourceloader/ResourceLoaderLanguageDataModule.php @@ -41,6 +41,7 @@ 'digitTransformTable' => $language->digitTransformTable(), 'separatorTransformTable' => $language->separatorTransformTable(), 'grammarForms' => $language->getGrammarForms(), + 'grammarTransformations' => $language->getGrammarTransformations(), 'pluralRules' => $language->getPluralRules(), 'digitGroupingPattern' => $language->digitGroupingPattern(), 'fallbackLanguages' => $language->getFallbackLanguages(), diff --git a/languages/Language.php b/languages/Language.php index 7ef2eff..4628812 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -138,6 +138,12 @@ static private $fallbackLanguageCache = []; /** + * Cache for grammar rules data + * @var MapCacheLRU|null + */ + static private $grammarTransformations; + + /** * Cache for language names * @var HashBagOStuff|null */ @@ -3730,6 +3736,7 @@ return $word; } + /** * Get the grammar forms for the content language * @return array Array of grammar forms @@ -3745,6 +3752,45 @@ return []; } + + /** + * Get the grammar transformations data for the language. + * Used like grammar forms, with {{GRAMMAR}} and cases, + * but uses pairs of regexes and replacements instead of code. + * + * @return array[] Array of grammar transformations. + * @since 1.28 + */ + public function getGrammarTransformations() { + $languageCode = $this->getCode(); + + if ( self::$grammarTransformations === null ) { + self::$grammarTransformations = new MapCacheLRU( 10 ); + } + + if ( self::$grammarTransformations->has( $languageCode ) ) { + return self::$grammarTransformations->get( $languageCode ); + } + + $data = []; + + $grammarDataFile = __DIR__ . "/data/grammarTransformations/$languageCode.json"; + if ( is_readable( $grammarDataFile ) ) { + $data = FormatJson::decode( + file_get_contents( $grammarDataFile ), + true + ); + if ( $data === null ) { + throw new MWException( "Invalid grammar data for \"$languageCode\"." ); + $data = []; + } + + self::$grammarTransformations->set( $languageCode, $data ); + } + + return $data; + } + /** * Provides an alternative text depending on specified gender. * Usage {{gender:username|masculine|feminine|unknown}}. diff --git a/languages/classes/LanguageRu.php b/languages/classes/LanguageRu.php index c2560a4..62de390 100644 --- a/languages/classes/LanguageRu.php +++ b/languages/classes/LanguageRu.php @@ -31,7 +31,6 @@ * @ingroup Language */ class LanguageRu extends Language { - /** * Convert from the nominative form of a noun to some other case * Invoked with {{grammar:case|word}} @@ -46,19 +45,22 @@ return $wgGrammarForms['ru'][$case][$word]; } - $grammarDataFile = __DIR__ . '/data/grammar.ru.json'; - $grammarData = FormatJson::decode( file_get_contents( $grammarDataFile ), true ); + $grammarTransformations = $this->getGrammarTransformations(); - if ( array_key_exists( $case, $grammarData ) ) { - foreach ( array_keys( $grammarData[$case] ) as $form ) { + if ( isset( $grammarTransformations[$case] ) ) { + foreach ( array_values( $grammarTransformations[$case] ) as $rule ) { + $form = $rule[0]; + if ( $form === '@metadata' ) { continue; } + $replacement = $rule[1]; + $regex = "/$form/"; if ( preg_match( $regex, $word ) ) { - $word = preg_replace( $regex, $grammarData[$case][$form], $word ); + $word = preg_replace( $regex, $replacement, $word ); break; } diff --git a/languages/classes/data/grammar.ru.json b/languages/classes/data/grammar.ru.json deleted file mode 100644 index 446163b..0000000 --- a/languages/classes/data/grammar.ru.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Alexander Sigachov (alexander.sigachov at Googgle Mail)", - "Amir E. Aharoni (amir.ahar...@mail.huji.ac.il)" - ], - "comment": "These rules don't cover the whole grammar of the language, and are intended only for names of languages and Wikimedia projects." - }, - "genitive": { - "(.+)ь$": "$1я", - "(.+)ия$": "$1ии", - "(.+)ка$": "$1ки", - "(.+)ти$": "$1тей", - "(.+)ды$": "$1дов", - "(.+)д$": "$1да", - "(.+)ник$": "$1ника", - "(.+)ные$": "$1ных" - }, - "prepositional": { - "(.+)ь$": "$1е", - "(.+)ия$": "$1ии", - "(.+)ка$": "$1ке", - "(.+)ти$": "$1тях", - "(.+)ды$": "$1дах", - "(.+)д$": "$1де", - "(.+)ник$": "$1нике", - "(.+)ные$": "$1ных" - }, - "languagegen": { - "@metadata": "язык в родительном падеже: '(с) русского'", - "(.+)кий$": "$1кого", - "иврит$": "иврита", - "идиш$": "идиша", - "(.+)$": "$1" - }, - "languageprep": { - "@metadata": "язык в предложном падеже: '(на) русском'", - "(.+)кий$": "$1ком", - "иврит$": "иврите", - "идиш$": "идише", - "(.+)$": "$1" - }, - "languageadverb": { - "@metadata": "наречие с названием языка: 'по-русски'", - "(.+)кий$": "по-$1ки", - "иврит$": "на иврите", - "идиш$": "на идише", - "(идо|урду|хинди|эсперанто)$": "на $1", - "(.+)$": "на языке $1" - } -} diff --git a/languages/data/grammarTransformations/ru.json b/languages/data/grammarTransformations/ru.json new file mode 100644 index 0000000..deb58b7 --- /dev/null +++ b/languages/data/grammarTransformations/ru.json @@ -0,0 +1,57 @@ +{ + "@metadata": { + "authors": [ + "Alexander Sigachov (alexander.sigachov at Googgle Mail)", + "Amir E. Aharoni (amir.ahar...@mail.huji.ac.il)" + ], + "comment": "These rules don't cover the whole grammar of the language, and are intended only for names of languages and Wikimedia projects." + }, + "genitive": [ + [ "(.+)ь$", "$1я" ], + [ "(.+)ия$", "$1ии" ], + [ "(.+)ка$", "$1ки" ], + [ "(.+)ти$", "$1тей" ], + [ "(.+)ды$", "$1дов" ], + [ "(.+)д$", "$1да" ], + [ "(.+)ник$", "$1ника" ], + [ "(.+)ные$", "$1ных" ] + ], + "prepositional": [ + [ "(.+)ь$", "$1е" ], + [ "(.+)ия$", "$1ии" ], + [ "(.+)ка$", "$1ке" ], + [ "(.+)ти$", "$1тях" ], + [ "(.+)ды$", "$1дах" ], + [ "(.+)д$", "$1де" ], + [ "(.+)ник$", "$1нике" ], + [ "(.+)ные$", "$1ных" ] + ], + "languagegen": [ + [ "@metadata", [ + "comment", "язык в родительном падеже: '(с) русского'" + ] ], + [ "(.+)кий$", "$1кого" ], + [ "иврит$", "иврита" ], + [ "идиш$", "идиша" ], + [ "(.+)$", "$1" ] + ], + "languageprep": [ + [ "@metadata", [ + "comment", "язык в предложном падеже: '(на) русском'" + ] ], + [ "(.+)кий$", "$1ком" ], + [ "иврит$", "иврите" ], + [ "идиш$", "идише" ], + [ "(.+)$", "$1" ] + ], + "languageadverb": [ + [ "@metadata", [ + "comment", "наречие с названием языка: 'по-русски'" + ] ], + [ "(.+)кий$", "по-$1ки" ], + [ "иврит$", "на иврите" ], + [ "идиш$", "на идише" ], + [ "(идо|урду|хинди|эсперанто)$", "на $1" ], + [ "(.+)$", "на языке $1" ] + ] +} diff --git a/resources/src/mediawiki.language/languages/ru.js b/resources/src/mediawiki.language/languages/ru.js index ccc68f1..09d7c0b 100644 --- a/resources/src/mediawiki.language/languages/ru.js +++ b/resources/src/mediawiki.language/languages/ru.js @@ -2,82 +2,37 @@ * Russian (Русский) language functions */ -// These tests were originally made for names of Wikimedia -// websites, so they don't currently cover all the possible -// cases. - mediaWiki.language.convertGrammar = function ( word, form ) { - /*global $ */ 'use strict'; - var grammarForms = mediaWiki.language.getData( 'ru', 'grammarForms' ); - if ( grammarForms && grammarForms[ form ] ) { - return grammarForms[ form ][ word ]; + var forms, transformations, i, rule, sourcePattern, regexp, replacement; + + forms = mediaWiki.language.getData( 'ru', 'grammarForms' ); + if ( forms && forms[ form ] ) { + return forms[ form ][ word ]; } - switch ( form ) { - case 'genitive': // родительный падеж - if ( word.slice( -1 ) === 'ь' ) { - word = word.slice( 0, -1 ) + 'я'; - } else if ( word.slice( -2 ) === 'ия' ) { - word = word.slice( 0, -2 ) + 'ии'; - } else if ( word.slice( -2 ) === 'ка' ) { - word = word.slice( 0, -2 ) + 'ки'; - } else if ( word.slice( -2 ) === 'ти' ) { - word = word.slice( 0, -2 ) + 'тей'; - } else if ( word.slice( -2 ) === 'ды' ) { - word = word.slice( 0, -2 ) + 'дов'; - } else if ( word.slice( -1 ) === 'д' ) { - word = word.slice( 0, -1 ) + 'да'; - } else if ( word.slice( -3 ) === 'ные' ) { - word = word.slice( 0, -3 ) + 'ных'; - } else if ( word.slice( -3 ) === 'ник' ) { - word = word.slice( 0, -3 ) + 'ника'; - } - break; - case 'prepositional': // предложный падеж - if ( word.slice( -1 ) === 'ь' ) { - word = word.slice( 0, -1 ) + 'е'; - } else if ( word.slice( -2 ) === 'ия' ) { - word = word.slice( 0, -2 ) + 'ии'; - } else if ( word.slice( -2 ) === 'ка' ) { - word = word.slice( 0, -2 ) + 'ке'; - } else if ( word.slice( -2 ) === 'ти' ) { - word = word.slice( 0, -2 ) + 'тях'; - } else if ( word.slice( -2 ) === 'ды' ) { - word = word.slice( 0, -2 ) + 'дах'; - } else if ( word.slice( -1 ) === 'д' ) { - word = word.slice( 0, -1 ) + 'де'; - } else if ( word.slice( -3 ) === 'ные' ) { - word = word.slice( 0, -3 ) + 'ных'; - } else if ( word.slice( -3 ) === 'ник' ) { - word = word.slice( 0, -3 ) + 'нике'; - } - break; - case 'languagegen': // язык в родительном падеже ("(с) русского") - if ( word.slice( -3 ) === 'кий' ) { - word = word.slice( 0, -2 ) + 'ого'; - } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) { - word = word + 'а'; - } - break; - case 'languageprep': // язык в предложном падеже ("(на) русском") - if ( word.slice( -3 ) === 'кий' ) { - word = word.slice( 0, -2 ) + 'ом'; - } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) { - word = word + 'е'; - } - break; - case 'languageadverb': // наречие с названием языка ("по-русски") - if ( word.slice( -3 ) === 'кий' ) { - word = 'по-' + word.slice( 0, -1 ); - } else if ( $.inArray( word, [ 'иврит', 'идиш' ] ) > -1 ) { - word = 'на ' + word + 'е'; - } else if ( $.inArray( word, [ 'идо', 'урду', 'хинди', 'эсперанто' ] ) > -1 ) { - word = 'на ' + word; - } else { - word = 'на языке ' + word; - } - break; + + transformations = mediaWiki.language.getData( 'ru', 'grammarTransformations' ); + + if ( !transformations[ form ] ) { + return word; } + + for ( i = 0; i < transformations[ form ].length; i++ ) { + rule = transformations[ form ][ i ]; + sourcePattern = rule[ 0 ]; + + if ( sourcePattern === '@metadata' ) { + continue; + } + + regexp = new RegExp( sourcePattern ); + replacement = rule[ 1 ]; + + if ( word.match( regexp ) ) { + return word.replace( regexp, replacement ); + } + } + return word; }; -- To view, visit https://gerrit.wikimedia.org/r/241499 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I6b9b29b7017f958d62611671be017f97cee73415 Gerrit-PatchSet: 22 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Amire80 <amir.ahar...@mail.huji.ac.il> Gerrit-Reviewer: Amire80 <amir.ahar...@mail.huji.ac.il> Gerrit-Reviewer: Edokter <er...@darcoury.nl> Gerrit-Reviewer: Jack Phoenix <j...@countervandalism.net> Gerrit-Reviewer: Legoktm <legoktm.wikipe...@gmail.com> Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits