Mwalker has uploaded a new change for review.
https://gerrit.wikimedia.org/r/56345
Change subject: Language Fallback Chain is Request Fallback Aware
......................................................................
Language Fallback Chain is Request Fallback Aware
What this means is that the fallback chain behaviour will now be
different if the requested language is the content language.
If this this is the case we will look in MW:Msg/A and MW:Msg before
looking in the CDB cache. Otherwise we will only look in MW:Msg if
there was nothing in the CDB cache.
Something else that happened here; it did not make sense to inject
languages into the fallback chain after the content language; and
it did not make sense to always have english in a on-wiki fallback
chain. This means that the terminal language for on wiki messages
will always be the content language; and the on wiki fallback
chain might not include english! (My justification is that if you're
on dewiki browsing in fr; you're going to want to fallback directly
to de and not have en in the way because why do we assume they
speak en on a non en wiki!?)
Bug: 46579
Change-Id: If88923119179924a5ec091394ccab000ade16b3e
---
M includes/cache/MessageCache.php
M languages/Language.php
M tests/phpunit/includes/cache/MessageCacheTest.php
3 files changed, 167 insertions(+), 92 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/45/56345/1
diff --git a/includes/cache/MessageCache.php b/includes/cache/MessageCache.php
index 7425978..7bcc8aa 100644
--- a/includes/cache/MessageCache.php
+++ b/includes/cache/MessageCache.php
@@ -587,34 +587,39 @@
/**
* Get a message from either the content language or the user language.
The fallback
- * language order is the users language fallback union the content
language fallback.
+ * language order is the users language fallback down to the content
language. The
+ * content language is appended to the end of the list if it did not
exist in it
+ * originally
+ *
* This list is then applied to find keys in the following order
- * 1) MediaWiki:$key/$langcode (for every language except the content
language where
- * we look at MediaWiki:$key)
+ * 1) MediaWiki:$key/$langcode
* 2) Built-in messages via the l10n cache which is also in fallback
order
+ * 3) MediaWiki:$key
+ *
+ * The order of (2) and (3) are reversed if $langcode ===
$wgContentLanguage
*
* @param string $key the message cache key
- * @param $useDB Boolean: If true will look for the message in the DB,
false only
- * get the message from the DB, false to use only the compiled
l10n cache.
+ * @param $useDB Boolean: If true will look for the message on wiki (in
the database)
+ * otherwise use only the compiled l10n cache.
* @param bool|string|object $langcode Code of the language to get the
message for.
* - If string and a valid code, will create a standard language
object
* - If string but not a valid code, will create a basic
language object
* - If boolean and false, create object from the current users
language
* - If boolean and true, create object from the wikis content
language
* - If language object, use it as given
- * @param $isFullKey Boolean: specifies whether $key is a two part key
- * "msg/lang".
+ * @param $isFullKey Boolean: specifies whether $key is a two part key:
"msg/lang".
*
* @throws MWException
* @return string|bool False if the message doesn't exist, otherwise
the message
*/
function get( $key, $useDB = true, $langcode = true, $isFullKey = false
) {
- global $wgLanguageCode, $wgContLang;
+ global $wgContLang;
wfProfileIn( __METHOD__ );
if ( is_int( $key ) ) {
- // "Non-string key given" exception sometimes happens
for numerical strings that become ints somewhere on their way here
+ // "Non-string key given" exception sometimes happens
for numerical strings that
+ // become ints somewhere on their way here
$key = strval( $key );
}
@@ -629,7 +634,6 @@
return false;
}
-
# Obtain the initial language object
if ( $isFullKey ) {
$keyParts = explode( '/', $key );
@@ -642,14 +646,12 @@
}
# Obtain a language object for the requested language from the
passed language code
- # Note that the language code could in fact be a language
object already but we assume
- # it's a string further below.
+ # Note that the $langcode could actually be a language object
already!
$requestedLangObj = wfGetLangObj( $langcode );
if ( !$requestedLangObj ) {
wfProfileOut( __METHOD__ );
throw new MWException( "Bad lang code $langcode given"
);
}
- $langcode = $requestedLangObj->getCode();
# Normalise title-case input (with some inlining)
$lckey = str_replace( ' ', '_', $key );
@@ -661,49 +663,13 @@
$uckey = $wgContLang->ucfirst( $lckey );
}
- # Loop through each language in the fallback list until we find
something useful
- $message = false;
-
- # Try the MediaWiki namespace
- if ( !$this->mDisable && $useDB ) {
- $fallbackChain =
Language::getFallbacksIncludingSiteLanguage( $langcode );
- array_unshift( $fallbackChain, $langcode );
-
- foreach ( $fallbackChain as $langcode ) {
- if ( $langcode === $wgLanguageCode ) {
- # Messages created in the content
language will not have the /lang extension
- $message = $this->getMsgFromNamespace(
$uckey, $langcode );
- } else {
- $message = $this->getMsgFromNamespace(
"$uckey/$langcode", $langcode );
- }
-
- if ( $message !== false ) {
- break;
- }
- }
- }
-
- # Try the array in the language object
- if ( $message === false ) {
- $message = $requestedLangObj->getMessage( $lckey );
- if ( is_null ( $message ) ) {
- $message = false;
- }
- }
-
- # If we still have no message, maybe the key was in fact a full
key so try that
- if( $message === false ) {
- $parts = explode( '/', $lckey );
- # We may get calls for things that are http-urls from
sidebar
- # Let's not load nonexistent languages for those
- # They usually have more than one slash.
- if ( count( $parts ) == 2 && $parts[1] !== '' ) {
- $message = Language::getMessageFor( $parts[0],
$parts[1] );
- if ( is_null( $message ) ) {
- $message = false;
- }
- }
- }
+ # Try all the normal fallbacks to get the message
+ $message = $this->getMessageFromFallbackChain(
+ $requestedLangObj,
+ $uckey,
+ $lckey,
+ ( !$this->mDisable && $useDB )
+ );
# Final fallback
if( $message === false ) {
@@ -726,6 +692,89 @@
}
/**
+ * Obtains the text of a message from any one of a number of fallback
sources.
+ *
+ * If $lookupOnWiki is true we will look in on-wiki sources; otherwise
we will only
+ * look in the language object which pulls from a static source.
+ *
+ * When looking up on-wiki messages we will always look in every
$UMessageName/code
+ * down the fallback chain.
+ *
+ * When looking up on-wiki messages the behaviour changes depending on
if the
+ * $requestedLangObj->code() is the $wgLanguageCode. We will always
look in
+ * $UMessageName/code first; but the order in which we then lookup just
$UMessageName
+ * vs look in the language object will vary on if the requested
language is the
+ * site content language. If it is we do the $UMessageName lookup first
then
+ * the language object. This is to allow customization of local text as
per:
+ * https://bugzilla.wikimedia.org/show_bug.cgi?id=46579
+ *
+ * @param Language $requestedLangObj Language to start the fallback
chain at.
+ * @param string $UMessageName On-wiki name of the message
+ * @param string $LMessageName Language object cache name of the
message
+ * @param bool $lookupOnWiki True if wiki (database) lookups
are allowed.
+ *
+ * @return bool|string The message content if found; otherwise false.
+ */
+ protected function getMessageFromFallbackChain( $requestedLangObj,
$UMessageName, $LMessageName, $lookupOnWiki ) {
+ global $wgLanguageCode;
+
+ $requestedLangCode = $requestedLangObj->getCode();
+ $requestIsForContentLang = ( $requestedLangCode ===
$wgLanguageCode );
+ $message = false;
+
+ # Loop through each language in the fallback list on-wiki until
we find something useful
+ if ( $lookupOnWiki ) {
+ $fallbackChain = Language::getOnWikiFallbackLanguages(
$requestedLangCode );
+ array_unshift( $fallbackChain, $requestedLangCode );
+
+ foreach ( $fallbackChain as $lookupLang ) {
+ $message = $this->getMsgFromNamespace(
"$UMessageName/$lookupLang", $lookupLang );
+
+ # If we have no message; but the request came
in for a message in the content
+ # language we will look in the root page. This
allows for customizations that
+ # will not take precedence in the chain.
+ if ( ( $message === false ) && (
$requestIsForContentLang ) ) {
+ $message = $this->getMsgFromNamespace(
$UMessageName, $requestedLangCode );
+ }
+
+ if ( $message !== false ) {
+ return $message;
+ }
+ }
+ }
+
+ # Try the array in the language object (static messages from
CDB file)
+ $message = $requestedLangObj->getMessage( $LMessageName );
+ if ( !is_null( $message ) ) {
+ return $message;
+ }
+
+ # If we were not requesting originally in the content language;
and still do not
+ # have a valid message -- let's look one last time on-wiki;
except this time in
+ # the root page.
+ if ( !$requestIsForContentLang ) {
+ $message = $this->getMsgFromNamespace( $UMessageName,
$requestedLangCode );
+ if( $message !== false ) {
+ return $message;
+ }
+ }
+
+ # If we still have no message, maybe the key was in fact a full
key so try that.
+ # We may get calls for things that are http-urls from sidebar:
let's not load
+ # nonexistent languages for those. They usually have more than
one slash.
+ $parts = explode( '/', $LMessageName );
+ if ( count( $parts ) == 2 && $parts[1] !== '' ) {
+ $message = Language::getMessageFor( $parts[0],
$parts[1] );
+ if ( !is_null( $message ) ) {
+ return $message;
+ }
+ }
+
+ # All lookups failed :(
+ return false;
+ }
+
+ /**
* Get a message from the MediaWiki namespace, with caching. The key
must
* first be converted to two-part lang/msg form if necessary.
*
diff --git a/languages/Language.php b/languages/Language.php
index 16de816..86ed199 100644
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -3984,14 +3984,19 @@
}
/**
- * Get the ordered list of fallback languages, ending with the fallback
- * language chain for the site language.
+ * Get the ordered list of fallback languages, ending with the wiki's
content
+ * language.
+ *
+ * Note: English is not necessarily part of this chain! The
authoritative language
+ * for a wiki should be the content language, so if a language
has a terminal
+ * fallback of english (and the origin language was not english)
we instead
+ * terminate with the content language.
*
* @since 1.21
* @param $code string Language code
* @return array
*/
- public static function getFallbacksIncludingSiteLanguage( $code ) {
+ public static function getOnWikiFallbackLanguages( $code ) {
global $wgLanguageCode;
// Usually, we will only store a tiny number of fallback
chains, so we
@@ -4000,25 +4005,31 @@
$cacheKey = "{$code}-{$wgLanguageCode}";
if ( !array_key_exists( $cacheKey, $fallbackLanguageCache ) ) {
- $fallbacks = self::getFallbacksFor( $code );
+ $siteFallbacks = array();
- // Take the final 'en' off of the array before splicing
- if ( end( $fallbacks ) === 'en' ) {
- array_pop( $fallbacks );
- }
- // Append the site's fallback chain, including the site
language itself
- $siteFallbacks = self::getFallbacksFor( $wgLanguageCode
);
- array_unshift( $siteFallbacks, $wgLanguageCode );
+ // Shortcut if the $code is the contentLanguage; we
don't return anything
+ // in that case
+ if ( $code !== $wgLanguageCode ) {
+ $fallbacks = self::getFallbacksFor( $code );
- // Eliminate any languages already included in the chain
- $siteFallbacks = array_intersect( array_diff(
$siteFallbacks, $fallbacks ), $siteFallbacks );
- if ( $siteFallbacks ) {
- $fallbacks = array_merge( $fallbacks,
$siteFallbacks );
+ // Normalize the fallback chain by removing the
ending 'en' unless we
+ // are actually looking at an 'en' fallback
chain!
+ if ( ( substr( reset( $fallbacks ), 0, 2 ) !==
'en' ) && ( end( $fallbacks ) === 'en' ) ) {
+ array_pop( $fallbacks );
+ }
+
+ // Now construct the authoritative chain;
ending with the content language
+ foreach ( $fallbacks as $lang ) {
+ if ( $lang !== $wgLanguageCode ) {
+ $siteFallbacks[] = $lang;
+ } else {
+ break;
+ }
+ }
+ $siteFallbacks[] = $wgLanguageCode;
}
- if ( end( $fallbacks ) !== 'en' ) {
- $fallbacks[] = 'en';
- }
- $fallbackLanguageCache[$cacheKey] = $fallbacks;
+
+ $fallbackLanguageCache[$cacheKey] = $siteFallbacks;
}
return $fallbackLanguageCache[$cacheKey];
}
diff --git a/tests/phpunit/includes/cache/MessageCacheTest.php
b/tests/phpunit/includes/cache/MessageCacheTest.php
index ada453c..757a9ee 100644
--- a/tests/phpunit/includes/cache/MessageCacheTest.php
+++ b/tests/phpunit/includes/cache/MessageCacheTest.php
@@ -31,49 +31,61 @@
function addDBData() {
$this->configureLanguages();
- // Set up messages and fallbacks ab -> ru -> de -> en
+ // Set up messages and fallbacks ab -> ru -> de
$this->makePage( 'FallbackLanguageTest-Full', 'ab' );
$this->makePage( 'FallbackLanguageTest-Full', 'ru' );
$this->makePage( 'FallbackLanguageTest-Full', 'de' );
- $this->makePage( 'FallbackLanguageTest-Full', 'en' );
// Fallbacks where ab does not exist
$this->makePage( 'FallbackLanguageTest-Partial', 'ru' );
$this->makePage( 'FallbackLanguageTest-Partial', 'de' );
- $this->makePage( 'FallbackLanguageTest-Partial', 'en' );
- // Fallback to the content language
+ // Fallback to the content language (for both ab and en)
$this->makePage( 'FallbackLanguageTest-ContLang', 'de' );
- $this->makePage( 'FallbackLanguageTest-ContLang', 'en' );
- // Fallback to english
+ // Fallback chain explicitly including english
$this->makePage( 'FallbackLanguageTest-English', 'en' );
+ $this->makePage( 'FallbackLanguageTest-English', 'de' );
// Full key tests -- always want russian
$this->makePage( 'MessageCacheTest-FullKeyTest', 'ab' );
$this->makePage( 'MessageCacheTest-FullKeyTest', 'ru' );
+
+ // In content language -- get derivative
+ $this->makePage( 'FallbackLanguageTest-DervContLang', 'de',
'de/de' );
+ $this->makePage( 'FallbackLanguageTest-DervContLang', 'de',
'de/none', false );
+
+ // In content language -- get base if no derivative
+ $this->makePage( 'FallbackLanguageTest-NoDervContLang', 'de',
'de/none', false );
}
/**
* Helper function for addDBData -- adds a simple page to the database
*
- * @param string $title Title of page to be created
- * @param string $lang Language and content of the created page
+ * @param string $title Title of page to be created
+ * @param string $lang Language and content of the created page
+ * @param string $content Content of the created page, if null
will be a generic string
+ * @param bool $createSubPage Set to false if the page should be
created at the root
*/
- protected function makePage( $title, $lang ) {
+ protected function makePage( $title, $lang, $content = null,
$createSubPage = true ) {
global $wgContLang;
- $title = Title::newFromText(
- ( $lang == $wgContLang->getCode() ) ? $title :
"$title/$lang",
- NS_MEDIAWIKI
- );
+ if ( is_null( $content ) ) {
+ $content = $lang;
+ }
+
+ if ( ( $lang !== $wgContLang->getCode() ) || $createSubPage ) {
+ $title = "$title/$lang";
+ }
+
+ $title = Title::newFromText( $title, NS_MEDIAWIKI );
$wikiPage = new WikiPage( $title );
- $content = ContentHandler::makeContent( $lang, $title );
- $wikiPage->doEditContent( $content, "$lang translation test
case" );
+ $contentHandler = ContentHandler::makeContent( $content, $title
);
+ $wikiPage->doEditContent( $contentHandler, "$lang translation
test case" );
}
/**
- * Test message fallbacks, bug #1495
+ * Test message fallbacks, bug #1495 & #46579
*
* @dataProvider provideMessagesForFallback
*/
@@ -87,8 +99,11 @@
array( 'FallbackLanguageTest-Full', 'ab', 'ab' ),
array( 'FallbackLanguageTest-Partial', 'ab', 'ru' ),
array( 'FallbackLanguageTest-ContLang', 'ab', 'de' ),
- array( 'FallbackLanguageTest-English', 'ab', 'en' ),
+ array( 'FallbackLanguageTest-ContLang', 'en', 'de' ),
+ array( 'FallbackLanguageTest-English', 'en', 'en' ),
array( 'FallbackLanguageTest-None', 'ab', false ),
+ array( 'FallbackLanguageTest-DervContLang', 'de',
'de/de' ), #46579
+ array( 'FallbackLanguageTest-NoDervContLang', 'de',
'de/none' ), #46579
);
}
--
To view, visit https://gerrit.wikimedia.org/r/56345
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If88923119179924a5ec091394ccab000ade16b3e
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Mwalker <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits