Author: Takuya Aramaki (takaram) Committer: GitHub (web-flow) Pusher: saundefined Date: 2024-10-14T18:48:22+05:00
Commit: https://github.com/php/web-php/commit/38ef33f923321519476d353aabf3d9e19fb1d0d9 Raw diff: https://github.com/php/web-php/commit/38ef33f923321519476d353aabf3d9e19fb1d0d9.diff Move `language_choose_code()` into new `LangChooser` class (#1081) Changed paths: A src/LangChooser.php A tests/Unit/LangChooserTest.php M include/langchooser.inc Diff: diff --git a/include/langchooser.inc b/include/langchooser.inc index 40790ad9d2..230a3d707d 100644 --- a/include/langchooser.inc +++ b/include/langchooser.inc @@ -28,170 +28,20 @@ */ +use phpweb\LangChooser; + +require_once __DIR__ . '/../src/autoload.php'; + // Default STRIPPED_URI $_SERVER['STRIPPED_URI'] = htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8'); // The code is encapsulated in a function, // so the variable namespace is not polluted -list($LANG, $EXPL_LANG, $UA_LANGS) = language_choose_code(); +list($LANG, $EXPL_LANG) = (new LangChooser($LANGUAGES, $INACTIVE_ONLINE_LANGUAGES, myphpnet_language(), default_language() ?: ''))->chooseCode( + $_REQUEST['lang'] ?? null, + $_SERVER['REQUEST_URI'], + $_SERVER['HTTP_ACCEPT_LANGUAGE'] ?? null, +); // Compatibility if ($EXPL_LANG == '') { unset($EXPL_LANG); } - -function language_choose_code() -{ - // Contains all the languages picked up by the - // process in priority order (without repeating codes) - $languages = []; - - // Default values for languages - $explicitly_specified = ''; $selected = ''; - - // Specified for the request (GET/POST parameter) - if (!empty($_REQUEST['lang']) && is_string($_REQUEST['lang'])) { - $explicitly_specified = language_add(htmlspecialchars($_REQUEST['lang'], ENT_QUOTES, 'UTF-8'), $languages); - } - - // Specified in a shortcut URL (eg. /en/echo or /pt_br/echo) - if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", htmlspecialchars($_SERVER['REQUEST_URI'],ENT_QUOTES, 'UTF-8'), $flang)) { - - // Put language into preference list - $rlang = language_add($flang[1], $languages); - - // Set explicity specified language - if (empty($explicitly_specified)) { - $explicitly_specified = $rlang; - } - - // Drop out langauge specification from URL, as this is already handled - $_SERVER['STRIPPED_URI'] = preg_replace( - "!^/$flang[1]/!", "/", htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8'), - ); - - } - - // Specified in a manual URL (eg. manual/en/ or manual/pt_br/) - if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8'), $flang)) { - - $flang = language_add($flang[1], $languages); - - // Set explicity specified language - if (empty($explicitly_specified)) { - $explicitly_specified = $flang; - } - } - - // Honor the users own language setting (if available) - if (myphpnet_language()) { - language_add(myphpnet_language(), $languages); - } - - // Specified by the user via the browser's Accept Language setting - // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5" - $browser_langs = []; $parsed_langs = []; - - // Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and - // it no longer breaks if you only have one language set :) - if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { - $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']); - - // Go through all language preference specs - foreach ($browser_accept as $value) { - // The language part is either a code or a code with a quality - // We cannot do anything with a * code, so it is skipped - // If the quality is missing, it is assumed to be 1 according to the RFC - if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), $found)) { - $quality = (isset($found[3]) ? (float) $found[3] : 1.0); - $browser_langs[] = [$found[1], $quality]; - } - unset($found); - } - } - - // Order the codes by quality - usort($browser_langs, "language_accept_order"); - - // For all languages found in the accept-language - foreach ($browser_langs as $langdata) { - - // Translation table for accept-language codes and phpdoc codes - switch ($langdata[0]) { - case "pt-br": - $langdata[0] = 'pt_br'; - break; - case "zh-cn": - $langdata[0] = 'zh'; - break; - case "zh-hk": - $langdata[0] = 'hk'; - break; - case "zh-tw": - $langdata[0] = 'tw'; - break; - } - - // We do not support flavors of languages (except the ones above) - // This is not in conformance to the RFC, but it here for user - // convinience reasons - if (preg_match("!^(.+)-!", $langdata[0], $match)) { - $langdata[0] = $match[1]; - } - - // Add language to priority order - $parsed_langs[] = language_add($langdata[0], $languages); - } - - // Language preferred by this mirror site - language_add(default_language(), $languages); - - // Last default language is English - language_add("en", $languages); - - // Try to find out what language is available on this mirror. - // As most of the language dependant operations involve manual - // page display (lookup, search, shortcuts), we will check for - // the index file of manuals. -/* - foreach ($languages as $language) { - if (file_exists($_SERVER['DOCUMENT_ROOT'] . "/manual/$language/index.php")) { - $selected = $language; - break; - } - } -*/ - $selected = $languages[0]; - - // Return with all found data - return [$selected, $explicitly_specified, $parsed_langs]; -} - -// Add a language to the possible languages' list -function language_add($langcode, &$langs) -{ - global $LANGUAGES, $INACTIVE_ONLINE_LANGUAGES; - - // Make language code lowercase, html encode special chars and remove slashes - $langcode = strtolower(htmlspecialchars($langcode)); - - // The Brazilian Portuguese code needs special attention - if ($langcode == 'pt_br') { $langcode = 'pt_BR'; } - - // Append language code in priority order if it is not - // there already and supported by the PHP site. Try to - // lower number of file_exists() calls to the minumum... - if (!in_array($langcode, $langs, false) && isset($LANGUAGES[$langcode]) - && !isset($INACTIVE_ONLINE_LANGUAGES[$langcode])) { - $langs[] = $langcode; - } - - // Return with language code - return $langcode; -} - -// Order the array of compiled -// accept-language codes by quality value -function language_accept_order($a, $b) -{ - if ($a[1] == $b[1]) { return 0; } - return ($a[1] > $b[1]) ? -1 : 1; -} diff --git a/src/LangChooser.php b/src/LangChooser.php new file mode 100644 index 0000000000..92ddb09de5 --- /dev/null +++ b/src/LangChooser.php @@ -0,0 +1,168 @@ +<?php + +namespace phpweb; + +class LangChooser +{ + private readonly string $preferredLanguage; + + private readonly string $defaultLanguage; + + /** + * @param array<string, string> $availableLanguages + * @param array<string, string> $inactiveLanguages + */ + public function __construct( + private readonly array $availableLanguages, + private readonly array $inactiveLanguages, + string $preferredLanguage, + string $defaultLanguage, + ) + { + $this->defaultLanguage = $this->normalize($defaultLanguage); + $this->preferredLanguage = $this->normalize($preferredLanguage); + } + + /** + * @return array{string, string} + */ + public function chooseCode( + string|array|null $langParam, + string $requestUri, + ?string $acceptLanguageHeader, + ): array + { + // Default values for languages + $explicitly_specified = ''; + + // Specified for the request (GET/POST parameter) + if (is_string($langParam)) { + $langCode = $this->normalize(htmlspecialchars($langParam, ENT_QUOTES, 'UTF-8')); + $explicitly_specified = $langCode; + if ($this->isAvailableLanguage($langCode)) { + return [$langCode, $explicitly_specified]; + } + } + + // Specified in a shortcut URL (eg. /en/echo or /pt_br/echo) + if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", htmlspecialchars($requestUri,ENT_QUOTES, 'UTF-8'), $flang)) { + // Put language into preference list + $rlang = $this->normalize($flang[1]); + + // Set explicitly specified language + if (empty($explicitly_specified)) { + $explicitly_specified = $rlang; + } + + // Drop out language specification from URL, as this is already handled + $_SERVER['STRIPPED_URI'] = preg_replace( + "!^/$flang[1]/!", "/", htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'), + ); + + if ($this->isAvailableLanguage($rlang)) { + return [$rlang, $explicitly_specified]; + } + } + + // Specified in a manual URL (eg. manual/en/ or manual/pt_br/) + if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'), $flang)) { + $flang = $this->normalize($flang[1]); + + // Set explicitly specified language + if (empty($explicitly_specified)) { + $explicitly_specified = $flang; + } + + if ($this->isAvailableLanguage($flang)) { + return [$flang, $explicitly_specified]; + } + } + + // Honor the users own language setting (if available) + if ($this->isAvailableLanguage($this->preferredLanguage)) { + return [$this->preferredLanguage, $explicitly_specified]; + } + + // Specified by the user via the browser's Accept Language setting + // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5" + $browser_langs = []; + + // Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and + // it no longer breaks if you only have one language set :) + if (isset($acceptLanguageHeader)) { + $browser_accept = explode(",", $acceptLanguageHeader); + + // Go through all language preference specs + foreach ($browser_accept as $value) { + // The language part is either a code or a code with a quality + // We cannot do anything with a * code, so it is skipped + // If the quality is missing, it is assumed to be 1 according to the RFC + if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), $found)) { + $quality = (isset($found[3]) ? (float) $found[3] : 1.0); + $browser_langs[] = [$found[1], $quality]; + } + unset($found); + } + } + + // Order the codes by quality + usort($browser_langs, fn ($a, $b) => $b[1] <=> $a[1]); + + // For all languages found in the accept-language + foreach ($browser_langs as $langdata) { + + // Translation table for accept-language codes and phpdoc codes + switch ($langdata[0]) { + case "pt-br": + $langdata[0] = 'pt_br'; + break; + case "zh-cn": + $langdata[0] = 'zh'; + break; + case "zh-hk": + $langdata[0] = 'hk'; + break; + case "zh-tw": + $langdata[0] = 'tw'; + break; + } + + // We do not support flavors of languages (except the ones above) + // This is not in conformance to the RFC, but it here for user + // convenience reasons + if (preg_match("!^(.+)-!", $langdata[0], $match)) { + $langdata[0] = $match[1]; + } + + $lang = $this->normalize($langdata[0]); + if ($this->isAvailableLanguage($lang)) { + return [$lang, $explicitly_specified]; + } + } + + // Language preferred by this mirror site + if ($this->isAvailableLanguage($this->defaultLanguage)) { + return [$this->defaultLanguage, $explicitly_specified]; + } + + // Last default language is English + return ["en", $explicitly_specified]; + } + + private function normalize(string $langCode): string + { + // Make language code lowercase, html encode special chars and remove slashes + $langCode = strtolower(htmlspecialchars($langCode)); + + // The Brazilian Portuguese code needs special attention + if ($langCode == 'pt_br') { + return 'pt_BR'; + } + return $langCode; + } + + private function isAvailableLanguage(string $langCode): bool + { + return isset($this->availableLanguages[$langCode]) && !isset($this->inactiveLanguages[$langCode]); + } +} diff --git a/tests/Unit/LangChooserTest.php b/tests/Unit/LangChooserTest.php new file mode 100644 index 0000000000..9706a67b50 --- /dev/null +++ b/tests/Unit/LangChooserTest.php @@ -0,0 +1,133 @@ +<?php + +declare(strict_types=1); + +namespace phpweb\Test\Unit; + +use phpweb\LangChooser; +use PHPUnit\Framework; +use phpweb\UserPreferences; + +#[Framework\Attributes\CoversClass(LangChooser::class)] +class LangChooserTest extends Framework\TestCase +{ + private const DEFAULT_LANGUAGE_LIST = [ + 'en' => 'English', + 'de' => 'German', + 'ja' => 'Japanese', + 'pt_BR' => 'Brazilian Portuguese', + 'zh' => 'Chinese (Simplified)', + ]; + + public function testChooseCodeWithLangParameter(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', '', 'en'); + $result = $langChooser->chooseCode('de', '/', null); + + self::assertSame(['de', 'de'], $result); + } + + public function testChooseCodeWithShortcutPath(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', '/de/echo', null); + + self::assertSame(['de', 'de'], $result); + } + + #[Framework\Attributes\TestWith(['de', 'de'])] + #[Framework\Attributes\TestWith(['pt_BR', 'pt_BR'])] + public function testChooseCodeWithManualPath(string $pathLang, string $expected): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', "/manual/$pathLang", null); + + self::assertSame([$expected, $expected], $result); + } + + public function testChooseCodeWithUserPreference(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], 'de', 'en'); + $result = $langChooser->chooseCode('', '/', null); + + self::assertSame(['de', ''], $result); + } + + public function testChooseCodeWithAcceptLanguage(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', '/', 'de,ja,en'); + + self::assertSame(['de', ''], $result); + } + + public function testChooseCodeWithAcceptLanguageQuality(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', '/', 'de;q=0.8,ja,en'); + + self::assertSame(['ja', ''], $result); + } + + #[Framework\Attributes\TestWith(['de-at', 'de'])] + #[Framework\Attributes\TestWith(['pt-br', 'pt_BR'])] + #[Framework\Attributes\TestWith(['zh-cn', 'zh'])] + #[Framework\Attributes\TestWith(['zh-tw', 'en'])] + public function testChooseCodeWithAcceptLanguageFollowedByCountryCode(string $acceptLanguage, string $expected): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', '/', $acceptLanguage); + + self::assertSame([$expected, ''], $result); + } + + public function testChooseCodeWithMirrorDefaultLanguage(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'de'); + $result = $langChooser->chooseCode('', '/', null); + + self::assertSame(['de', ''], $result); + } + + public function testChooseCodeWithDefaultLanguage(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'fr'); + $result = $langChooser->chooseCode('', '/', null); + + self::assertSame(['en', ''], $result); + } + + public function testChooseCodeWithLangParameterAndManualPath(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('de', '/manual/en', null); + + self::assertSame(['de', 'de'], $result); + } + + public function testChooseCodeWithManualPathAndUserPreference(): void + { + UserPreferences::$languageCode = 'en'; + + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', '/manual/de', null); + + self::assertSame(['de', 'de'], $result); + } + + public function testChooseCodeWithManualPathAndAcceptLanguage(): void + { + $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 'en'); + $result = $langChooser->chooseCode('', '/manual/de', 'en'); + + self::assertSame(['de', 'de'], $result); + } + + public function testChooseCodeInactiveLanguageIsNotChosen(): void + { + $langChooser = new LangChooser(['en' => 'English', 'de' => 'German', 'pl' => 'Polish'], ['pl' => 'Polish'], '', ''); + $result = $langChooser->chooseCode('pl', '/manual/pl', 'pl'); + + self::assertSame(['en', 'pl'], $result); + } +}