jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/789774 )

Change subject: [bugfix] Improve get_charset_from_content_type function
......................................................................

[bugfix] Improve get_charset_from_content_type function

- remove delimiter in front of the code number
- replace win/windows with cp
- remove language code in font of win/windows like in sr-win1250

Bug: T307760
Change-Id: I4b13dee432b947dbd4db4846ef435d8b41d7a2b1
---
M pywikibot/comms/http.py
M tests/http_tests.py
2 files changed, 15 insertions(+), 2 deletions(-)

Approvals:
  Rubin: Looks good to me, but someone else must approve
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 792a44d..a828e41 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -415,8 +415,13 @@
     if re.sub(r'[ _\-]', '', charset) == 'xeucjp':
         charset = 'euc_jp'
     else:
-        # fix cp encodings (T304830)
-        charset = re.sub(r'\Acp[ _\-](\d{3,4})', r'cp\1', charset)
+        # fix cp encodings (T304830, T307760)
+        # remove delimiter in front of the code number
+        # replace win/windows with cp
+        # remove language code in font of win/windows
+        charset = re.sub(
+            r'\A(?:cp[ _\-]|(?:[a-z]+[_\-]?)?win(?:dows[_\-]?)?)(\d{3,4})',
+            r'cp\1', charset)
     return charset


diff --git a/tests/http_tests.py b/tests/http_tests.py
index 491eea5..fddad67 100755
--- a/tests/http_tests.py
+++ b/tests/http_tests.py
@@ -477,6 +477,14 @@
                                                 resp.apparent_encoding,
                                                 errors='replace'))

+    def test_get_charset_from_content_type(self):
+        """Test get_charset_from_content_type function."""
+        self.assertEqual(
+            http.get_charset_from_content_type('charset="cp-1251"'), 'cp1251')
+        self.assertEqual(
+            http.get_charset_from_content_type('charset="ru-win1251"'),
+            'cp1251')
+

 class BinaryTestCase(TestCase):


--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/789774
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I4b13dee432b947dbd4db4846ef435d8b41d7a2b1
Gerrit-Change-Number: 789774
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: JJMC89 <[email protected]>
Gerrit-Reviewer: Rubin <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to