Author: Takuya Aramaki (takaram)
Committer: GitHub (web-flow)
Pusher: saundefined
Date: 2024-10-14T18:48:22+05:00

Commit: 
https://github.com/php/web-php/commit/38ef33f923321519476d353aabf3d9e19fb1d0d9
Raw diff: 
https://github.com/php/web-php/commit/38ef33f923321519476d353aabf3d9e19fb1d0d9.diff

Move `language_choose_code()` into new `LangChooser` class (#1081)

Changed paths:
  A  src/LangChooser.php
  A  tests/Unit/LangChooserTest.php
  M  include/langchooser.inc


Diff:

diff --git a/include/langchooser.inc b/include/langchooser.inc
index 40790ad9d2..230a3d707d 100644
--- a/include/langchooser.inc
+++ b/include/langchooser.inc
@@ -28,170 +28,20 @@
 
 */
 
+use phpweb\LangChooser;
+
+require_once __DIR__ . '/../src/autoload.php';
+
 // Default STRIPPED_URI
 $_SERVER['STRIPPED_URI'] = htmlspecialchars($_SERVER['REQUEST_URI'], 
ENT_QUOTES, 'UTF-8');
 
 // The code is encapsulated in a function,
 // so the variable namespace is not polluted
-list($LANG, $EXPL_LANG, $UA_LANGS) = language_choose_code();
+list($LANG, $EXPL_LANG) = (new LangChooser($LANGUAGES, 
$INACTIVE_ONLINE_LANGUAGES, myphpnet_language(), default_language() ?: 
''))->chooseCode(
+    $_REQUEST['lang'] ?? null,
+    $_SERVER['REQUEST_URI'],
+    $_SERVER['HTTP_ACCEPT_LANGUAGE'] ?? null,
+);
 
 // Compatibility
 if ($EXPL_LANG == '') { unset($EXPL_LANG); }
-
-function language_choose_code()
-{
-    // Contains all the languages picked up by the
-    // process in priority order (without repeating codes)
-    $languages = [];
-
-    // Default values for languages
-    $explicitly_specified = ''; $selected = '';
-
-    // Specified for the request (GET/POST parameter)
-    if (!empty($_REQUEST['lang']) && is_string($_REQUEST['lang'])) {
-        $explicitly_specified = 
language_add(htmlspecialchars($_REQUEST['lang'], ENT_QUOTES, 'UTF-8'), 
$languages);
-    }
-
-    // Specified in a shortcut URL (eg. /en/echo or /pt_br/echo)
-    if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", 
htmlspecialchars($_SERVER['REQUEST_URI'],ENT_QUOTES, 'UTF-8'), $flang)) {
-
-        // Put language into preference list
-        $rlang = language_add($flang[1], $languages);
-
-        // Set explicity specified language
-        if (empty($explicitly_specified)) {
-            $explicitly_specified = $rlang;
-        }
-
-        // Drop out langauge specification from URL, as this is already handled
-        $_SERVER['STRIPPED_URI'] = preg_replace(
-            "!^/$flang[1]/!", "/", htmlspecialchars($_SERVER['REQUEST_URI'], 
ENT_QUOTES, 'UTF-8'),
-        );
-
-    }
-
-    // Specified in a manual URL (eg. manual/en/ or manual/pt_br/)
-    if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", 
htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8'), $flang)) {
-
-        $flang = language_add($flang[1], $languages);
-
-        // Set explicity specified language
-        if (empty($explicitly_specified)) {
-            $explicitly_specified = $flang;
-        }
-    }
-
-    // Honor the users own language setting (if available)
-    if (myphpnet_language()) {
-        language_add(myphpnet_language(), $languages);
-    }
-
-    // Specified by the user via the browser's Accept Language setting
-    // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
-    $browser_langs = []; $parsed_langs = [];
-
-    // Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and
-    // it no longer breaks if you only have one language set :)
-    if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
-        $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
-
-        // Go through all language preference specs
-        foreach ($browser_accept as $value) {
-            // The language part is either a code or a code with a quality
-            // We cannot do anything with a * code, so it is skipped
-            // If the quality is missing, it is assumed to be 1 according to 
the RFC
-            if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), 
$found)) {
-                $quality = (isset($found[3]) ? (float) $found[3] : 1.0);
-                $browser_langs[] = [$found[1], $quality];
-            }
-            unset($found);
-        }
-    }
-
-    // Order the codes by quality
-    usort($browser_langs, "language_accept_order");
-
-    // For all languages found in the accept-language
-    foreach ($browser_langs as $langdata) {
-
-        // Translation table for accept-language codes and phpdoc codes
-        switch ($langdata[0]) {
-            case "pt-br":
-                $langdata[0] = 'pt_br';
-                break;
-            case "zh-cn":
-                $langdata[0] = 'zh';
-                break;
-            case "zh-hk":
-                $langdata[0] = 'hk';
-                break;
-            case "zh-tw":
-                $langdata[0] = 'tw';
-                break;
-        }
-
-        // We do not support flavors of languages (except the ones above)
-        // This is not in conformance to the RFC, but it here for user
-        // convinience reasons
-        if (preg_match("!^(.+)-!", $langdata[0], $match)) {
-            $langdata[0] = $match[1];
-        }
-
-        // Add language to priority order
-        $parsed_langs[] = language_add($langdata[0], $languages);
-    }
-
-    // Language preferred by this mirror site
-    language_add(default_language(), $languages);
-
-    // Last default language is English
-    language_add("en", $languages);
-
-    // Try to find out what language is available on this mirror.
-    // As most of the language dependant operations involve manual
-    // page display (lookup, search, shortcuts), we will check for
-    // the index file of manuals.
-/*
-    foreach ($languages as $language) {
-        if (file_exists($_SERVER['DOCUMENT_ROOT'] . 
"/manual/$language/index.php")) {
-            $selected = $language;
-            break;
-        }
-    }
-*/
-    $selected = $languages[0];
-
-    // Return with all found data
-    return [$selected, $explicitly_specified, $parsed_langs];
-}
-
-// Add a language to the possible languages' list
-function language_add($langcode, &$langs)
-{
-    global $LANGUAGES, $INACTIVE_ONLINE_LANGUAGES;
-
-    // Make language code lowercase, html encode special chars and remove 
slashes
-    $langcode = strtolower(htmlspecialchars($langcode));
-
-    // The Brazilian Portuguese code needs special attention
-    if ($langcode == 'pt_br') { $langcode = 'pt_BR'; }
-
-    // Append language code in priority order if it is not
-    // there already and supported by the PHP site. Try to
-    // lower number of file_exists() calls to the minumum...
-    if (!in_array($langcode, $langs, false) && isset($LANGUAGES[$langcode])
-        && !isset($INACTIVE_ONLINE_LANGUAGES[$langcode])) {
-        $langs[] = $langcode;
-    }
-
-    // Return with language code
-    return $langcode;
-}
-
-// Order the array of compiled
-// accept-language codes by quality value
-function language_accept_order($a, $b)
-{
-    if ($a[1] == $b[1]) { return 0; }
-    return ($a[1] > $b[1]) ? -1 : 1;
-}
diff --git a/src/LangChooser.php b/src/LangChooser.php
new file mode 100644
index 0000000000..92ddb09de5
--- /dev/null
+++ b/src/LangChooser.php
@@ -0,0 +1,168 @@
+<?php
+
+namespace phpweb;
+
+class LangChooser
+{
+    private readonly string $preferredLanguage;
+
+    private readonly string $defaultLanguage;
+
+    /**
+     * @param array<string, string> $availableLanguages
+     * @param array<string, string> $inactiveLanguages
+     */
+    public function __construct(
+        private readonly array $availableLanguages,
+        private readonly array $inactiveLanguages,
+        string $preferredLanguage,
+        string $defaultLanguage,
+    )
+    {
+        $this->defaultLanguage = $this->normalize($defaultLanguage);
+        $this->preferredLanguage = $this->normalize($preferredLanguage);
+    }
+
+    /**
+     * @return array{string, string}
+     */
+    public function chooseCode(
+        string|array|null $langParam,
+        string $requestUri,
+        ?string $acceptLanguageHeader,
+    ): array
+    {
+        // Default values for languages
+        $explicitly_specified = '';
+
+        // Specified for the request (GET/POST parameter)
+        if (is_string($langParam)) {
+            $langCode = $this->normalize(htmlspecialchars($langParam, 
ENT_QUOTES, 'UTF-8'));
+            $explicitly_specified = $langCode;
+            if ($this->isAvailableLanguage($langCode)) {
+                return [$langCode, $explicitly_specified];
+            }
+        }
+
+        // Specified in a shortcut URL (eg. /en/echo or /pt_br/echo)
+        if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", 
htmlspecialchars($requestUri,ENT_QUOTES, 'UTF-8'), $flang)) {
+            // Put language into preference list
+            $rlang = $this->normalize($flang[1]);
+
+            // Set explicitly specified language
+            if (empty($explicitly_specified)) {
+                $explicitly_specified = $rlang;
+            }
+
+            // Drop out language specification from URL, as this is already 
handled
+            $_SERVER['STRIPPED_URI'] = preg_replace(
+                "!^/$flang[1]/!", "/", htmlspecialchars($requestUri, 
ENT_QUOTES, 'UTF-8'),
+            );
+
+            if ($this->isAvailableLanguage($rlang)) {
+                return [$rlang, $explicitly_specified];
+            }
+        }
+
+        // Specified in a manual URL (eg. manual/en/ or manual/pt_br/)
+        if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", 
htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'), $flang)) {
+            $flang = $this->normalize($flang[1]);
+
+            // Set explicitly specified language
+            if (empty($explicitly_specified)) {
+                $explicitly_specified = $flang;
+            }
+
+            if ($this->isAvailableLanguage($flang)) {
+                return [$flang, $explicitly_specified];
+            }
+        }
+
+        // Honor the users own language setting (if available)
+        if ($this->isAvailableLanguage($this->preferredLanguage)) {
+            return [$this->preferredLanguage, $explicitly_specified];
+        }
+
+        // Specified by the user via the browser's Accept Language setting
+        // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
+        $browser_langs = [];
+
+        // Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and
+        // it no longer breaks if you only have one language set :)
+        if (isset($acceptLanguageHeader)) {
+            $browser_accept = explode(",", $acceptLanguageHeader);
+
+            // Go through all language preference specs
+            foreach ($browser_accept as $value) {
+                // The language part is either a code or a code with a quality
+                // We cannot do anything with a * code, so it is skipped
+                // If the quality is missing, it is assumed to be 1 according 
to the RFC
+                if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), 
$found)) {
+                    $quality = (isset($found[3]) ? (float) $found[3] : 1.0);
+                    $browser_langs[] = [$found[1], $quality];
+                }
+                unset($found);
+            }
+        }
+
+        // Order the codes by quality
+        usort($browser_langs, fn ($a, $b) => $b[1] <=> $a[1]);
+
+        // For all languages found in the accept-language
+        foreach ($browser_langs as $langdata) {
+
+            // Translation table for accept-language codes and phpdoc codes
+            switch ($langdata[0]) {
+                case "pt-br":
+                    $langdata[0] = 'pt_br';
+                    break;
+                case "zh-cn":
+                    $langdata[0] = 'zh';
+                    break;
+                case "zh-hk":
+                    $langdata[0] = 'hk';
+                    break;
+                case "zh-tw":
+                    $langdata[0] = 'tw';
+                    break;
+            }
+
+            // We do not support flavors of languages (except the ones above)
+            // This is not in conformance to the RFC, but it here for user
+            // convenience reasons
+            if (preg_match("!^(.+)-!", $langdata[0], $match)) {
+                $langdata[0] = $match[1];
+            }
+
+            $lang = $this->normalize($langdata[0]);
+            if ($this->isAvailableLanguage($lang)) {
+                return [$lang, $explicitly_specified];
+            }
+        }
+
+        // Language preferred by this mirror site
+        if ($this->isAvailableLanguage($this->defaultLanguage)) {
+            return [$this->defaultLanguage, $explicitly_specified];
+        }
+
+        // Last default language is English
+        return ["en", $explicitly_specified];
+    }
+
+    private function normalize(string $langCode): string
+    {
+        // Make language code lowercase, html encode special chars and remove 
slashes
+        $langCode = strtolower(htmlspecialchars($langCode));
+
+        // The Brazilian Portuguese code needs special attention
+        if ($langCode == 'pt_br') {
+            return 'pt_BR';
+        }
+        return $langCode;
+    }
+
+    private function isAvailableLanguage(string $langCode): bool
+    {
+        return isset($this->availableLanguages[$langCode]) && 
!isset($this->inactiveLanguages[$langCode]);
+    }
+}
diff --git a/tests/Unit/LangChooserTest.php b/tests/Unit/LangChooserTest.php
new file mode 100644
index 0000000000..9706a67b50
--- /dev/null
+++ b/tests/Unit/LangChooserTest.php
@@ -0,0 +1,133 @@
+<?php
+
+declare(strict_types=1);
+
+namespace phpweb\Test\Unit;
+
+use phpweb\LangChooser;
+use PHPUnit\Framework;
+use phpweb\UserPreferences;
+
+#[Framework\Attributes\CoversClass(LangChooser::class)]
+class LangChooserTest extends Framework\TestCase
+{
+    private const DEFAULT_LANGUAGE_LIST = [
+        'en' => 'English',
+        'de' => 'German',
+        'ja' => 'Japanese',
+        'pt_BR' => 'Brazilian Portuguese',
+        'zh' => 'Chinese (Simplified)',
+    ];
+
+    public function testChooseCodeWithLangParameter(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'', 'en');
+        $result = $langChooser->chooseCode('de', '/', null);
+
+        self::assertSame(['de', 'de'], $result);
+    }
+
+    public function testChooseCodeWithShortcutPath(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', '/de/echo', null);
+
+        self::assertSame(['de', 'de'], $result);
+    }
+
+    #[Framework\Attributes\TestWith(['de', 'de'])]
+    #[Framework\Attributes\TestWith(['pt_BR', 'pt_BR'])]
+    public function testChooseCodeWithManualPath(string $pathLang, string 
$expected): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', "/manual/$pathLang", null);
+
+        self::assertSame([$expected, $expected], $result);
+    }
+
+    public function testChooseCodeWithUserPreference(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], 'de', 
'en');
+        $result = $langChooser->chooseCode('', '/', null);
+
+        self::assertSame(['de', ''], $result);
+    }
+
+    public function testChooseCodeWithAcceptLanguage(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', '/', 'de,ja,en');
+
+        self::assertSame(['de', ''], $result);
+    }
+
+    public function testChooseCodeWithAcceptLanguageQuality(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', '/', 'de;q=0.8,ja,en');
+
+        self::assertSame(['ja', ''], $result);
+    }
+
+    #[Framework\Attributes\TestWith(['de-at', 'de'])]
+    #[Framework\Attributes\TestWith(['pt-br', 'pt_BR'])]
+    #[Framework\Attributes\TestWith(['zh-cn', 'zh'])]
+    #[Framework\Attributes\TestWith(['zh-tw', 'en'])]
+    public function 
testChooseCodeWithAcceptLanguageFollowedByCountryCode(string $acceptLanguage, 
string $expected): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', '/', $acceptLanguage);
+
+        self::assertSame([$expected, ''], $result);
+    }
+
+    public function testChooseCodeWithMirrorDefaultLanguage(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'de');
+        $result = $langChooser->chooseCode('', '/', null);
+
+        self::assertSame(['de', ''], $result);
+    }
+
+    public function testChooseCodeWithDefaultLanguage(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'fr');
+        $result = $langChooser->chooseCode('', '/', null);
+
+        self::assertSame(['en', ''], $result);
+    }
+
+    public function testChooseCodeWithLangParameterAndManualPath(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('de', '/manual/en', null);
+
+        self::assertSame(['de', 'de'], $result);
+    }
+
+    public function testChooseCodeWithManualPathAndUserPreference(): void
+    {
+        UserPreferences::$languageCode = 'en';
+
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', '/manual/de', null);
+
+        self::assertSame(['de', 'de'], $result);
+    }
+
+    public function testChooseCodeWithManualPathAndAcceptLanguage(): void
+    {
+        $langChooser = new LangChooser(self::DEFAULT_LANGUAGE_LIST, [], '', 
'en');
+        $result = $langChooser->chooseCode('', '/manual/de', 'en');
+
+        self::assertSame(['de', 'de'], $result);
+    }
+
+    public function testChooseCodeInactiveLanguageIsNotChosen(): void
+    {
+        $langChooser = new LangChooser(['en' => 'English', 'de' => 'German', 
'pl' => 'Polish'], ['pl' => 'Polish'], '', '');
+        $result = $langChooser->chooseCode('pl', '/manual/pl', 'pl');
+
+        self::assertSame(['en', 'pl'], $result);
+    }
+}

Reply via email to