Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package python-charset-normalizer for
openSUSE:Factory checked in at 2021-05-20 19:25:29
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-charset-normalizer (Old)
and /work/SRC/openSUSE:Factory/.python-charset-normalizer.new.2988 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-charset-normalizer"
Thu May 20 19:25:29 2021 rev:7 rq:894589 version:1.3.9
Changes:
--------
---
/work/SRC/openSUSE:Factory/python-charset-normalizer/python-charset-normalizer.changes
2021-03-30 21:03:02.624892186 +0200
+++
/work/SRC/openSUSE:Factory/.python-charset-normalizer.new.2988/python-charset-normalizer.changes
2021-05-20 19:25:59.841711911 +0200
@@ -1,0 +2,8 @@
+Thu May 20 09:46:56 UTC 2021 - [email protected]
+
+- version update to 1.3.9
+ * Bugfix: bug In some very rare cases, you may end up getting encode/decode
errors due to a bad bytes payload #40
+ * Bugfix: bug Empty given payload for detection may cause an exception if
trying to access the alphabets property. #39
+ * Bugfix: bug The legacy detect function should return UTF-8-SIG if sig is
present in the payload. #38
+
+-------------------------------------------------------------------
Old:
----
charset_normalizer-1.3.6.tar.gz
New:
----
charset_normalizer-1.3.9.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-charset-normalizer.spec ++++++
--- /var/tmp/diff_new_pack.bpcUDn/_old 2021-05-20 19:26:00.337709876 +0200
+++ /var/tmp/diff_new_pack.bpcUDn/_new 2021-05-20 19:26:00.341709859 +0200
@@ -19,7 +19,7 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
%define skip_python2 1
Name: python-charset-normalizer
-Version: 1.3.6
+Version: 1.3.9
Release: 0
Summary: Python Universal Charset detector
License: MIT
@@ -35,7 +35,7 @@
Requires: python-loguru >= 0.5
Requires: python-zhon
Requires(post): update-alternatives
-Requires(postun): update-alternatives
+Requires(postun):update-alternatives
Suggests: python-requests
Suggests: python-requests-html
Suggests: python-unicodedata2
++++++ charset_normalizer-1.3.6.tar.gz -> charset_normalizer-1.3.9.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-1.3.6/PKG-INFO
new/charset_normalizer-1.3.9/PKG-INFO
--- old/charset_normalizer-1.3.6/PKG-INFO 2021-02-09 01:05:13.511735400
+0100
+++ new/charset_normalizer-1.3.9/PKG-INFO 2021-05-13 22:39:08.152140000
+0200
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: charset_normalizer
-Version: 1.3.6
+Version: 1.3.9
Summary: The Real First Universal Charset Detector. No Cpp Bindings, Using
Voodoo and Magical Artifacts.
Home-page: https://github.com/ousret/charset_normalizer
Author: Ahmed TAHRI @Ousret
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-1.3.6/charset_normalizer/legacy.py
new/charset_normalizer-1.3.9/charset_normalizer/legacy.py
--- old/charset_normalizer-1.3.6/charset_normalizer/legacy.py 2021-02-09
01:05:05.000000000 +0100
+++ new/charset_normalizer-1.3.9/charset_normalizer/legacy.py 2021-05-13
22:38:57.000000000 +0200
@@ -19,8 +19,17 @@
r = CnM.from_bytes(byte_str).best().first()
+ encoding = r.encoding if r is not None else None
+ language = r.language if r is not None and r.language != 'Unknown' else ''
+ confidence = 1. - r.chaos if r is not None else None
+
+ # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get
stripped in the detection/normalization process
+ # but chardet does return 'utf-8-sig' and it is a valid codec name.
+ if encoding == 'utf_8' and r.bom:
+ encoding += '_sig'
+
return {
- 'encoding': r.encoding if r is not None else None,
- 'language': r.language if r is not None and r.language != 'Unknown'
else '',
- 'confidence': 1. - r.chaos if r is not None else None
+ 'encoding': encoding,
+ 'language': language,
+ 'confidence': confidence
}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-1.3.6/charset_normalizer/normalizer.py
new/charset_normalizer-1.3.9/charset_normalizer/normalizer.py
--- old/charset_normalizer-1.3.6/charset_normalizer/normalizer.py
2021-02-09 01:05:05.000000000 +0100
+++ new/charset_normalizer-1.3.9/charset_normalizer/normalizer.py
2021-05-13 22:38:57.000000000 +0200
@@ -264,7 +264,7 @@
:return:
:rtype: bytes
"""
- return str(self).encode(encoding)
+ return str(self).encode(encoding, 'replace')
class CharsetNormalizerMatches:
@@ -353,7 +353,7 @@
sequences,
'utf-8',
0.,
- []
+ {}
)
too_small_sequence = len(sequences) < 24
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-1.3.6/charset_normalizer/version.py
new/charset_normalizer-1.3.9/charset_normalizer/version.py
--- old/charset_normalizer-1.3.6/charset_normalizer/version.py 2021-02-09
01:05:05.000000000 +0100
+++ new/charset_normalizer-1.3.9/charset_normalizer/version.py 2021-05-13
22:38:57.000000000 +0200
@@ -1,6 +1,6 @@
-"""
-Expose version
-"""
-
-__version__ = "1.3.6"
-VERSION = __version__.split('.')
+"""
+Expose version
+"""
+
+__version__ = "1.3.9"
+VERSION = __version__.split('.')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/charset_normalizer-1.3.6/charset_normalizer.egg-info/PKG-INFO
new/charset_normalizer-1.3.9/charset_normalizer.egg-info/PKG-INFO
--- old/charset_normalizer-1.3.6/charset_normalizer.egg-info/PKG-INFO
2021-02-09 01:05:13.000000000 +0100
+++ new/charset_normalizer-1.3.9/charset_normalizer.egg-info/PKG-INFO
2021-05-13 22:39:07.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: charset-normalizer
-Version: 1.3.6
+Version: 1.3.9
Summary: The Real First Universal Charset Detector. No Cpp Bindings, Using
Voodoo and Magical Artifacts.
Home-page: https://github.com/ousret/charset_normalizer
Author: Ahmed TAHRI @Ousret
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-1.3.6/test/test_detect_legacy.py
new/charset_normalizer-1.3.9/test/test_detect_legacy.py
--- old/charset_normalizer-1.3.6/test/test_detect_legacy.py 2021-02-09
01:05:05.000000000 +0100
+++ new/charset_normalizer-1.3.9/test/test_detect_legacy.py 2021-05-13
22:38:57.000000000 +0200
@@ -62,3 +62,14 @@
r['encoding'],
'utf_7'
)
+
+ def test_utf8_sig_not_striped(self):
+ r = detect(
+ "Hello World".encode('utf-8-sig')
+ )
+
+ with self.subTest("Verify that UTF-8-SIG is returned when using legacy
detect"):
+ self.assertEqual(
+ r['encoding'],
+ "utf_8_sig"
+ )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/charset_normalizer-1.3.6/test/test_on_byte.py
new/charset_normalizer-1.3.9/test/test_on_byte.py
--- old/charset_normalizer-1.3.6/test/test_on_byte.py 2021-02-09
01:05:05.000000000 +0100
+++ new/charset_normalizer-1.3.9/test/test_on_byte.py 2021-05-13
22:38:57.000000000 +0200
@@ -1,136 +1,141 @@
-import unittest
-
-from charset_normalizer import CharsetNormalizerMatches as CnM
-
-
-class TestBytes(unittest.TestCase):
-
- def test_too_short_none(self):
- self.assertIsNotNone(
- CnM.from_bytes(b'\xfe\xff').best().first()
- )
-
- def test_empty_bytes(self):
- r = CnM.from_bytes(b'').best().first()
-
- self.assertIsNotNone(
- r
- )
-
- self.assertEqual(
- 'utf-8',
- r.encoding
- )
-
- def test_bom_detection(self):
- with self.subTest('GB18030 UNAVAILABLE SIG'):
- self.assertFalse(
- CnM.from_bytes(
-
'????????????????????????????????????????????????'.encode('gb18030')
- ).best().first().byte_order_mark
- )
-
- with self.subTest('GB18030 AVAILABLE SIG'):
- self.assertTrue(
- CnM.from_bytes(
- (u'\uFEFF' +
'????????????????????????????????????????????????').encode('gb18030')
- ).best().first().byte_order_mark
- )
-
- with self.subTest('UTF-7 AVAILABLE BOM'):
- self.assertTrue(
- CnM.from_bytes(
- b'\x2b\x2f\x76\x38' +
'????????????????????????????????????????????????'.encode('utf_7')
- ).best().first().byte_order_mark
- )
-
- with self.subTest('UTF-8 AVAILABLE BOM'):
- self.assertTrue(
- CnM.from_bytes(
- b'\xef\xbb\xbf' +
'????????????????????????????????????????????????'.encode('utf_8')
- ).best().first().byte_order_mark
- )
-
- def test_encode_decode(self):
-
- with self.subTest('Encode & Detect UTF-8 WITHOUT SIG SMALL CONTENT'):
- self.assertEqual(
- CnM.from_bytes(
- 'h\xe9llo world!\n'.encode('utf_8')
- ).best().first().encoding,
- 'utf_8'
- )
-
- with self.subTest('Encode & Detect GB18030 WITHOUT SIG'):
- self.assertEqual(
- CnM.from_bytes(
-
'????????????????????????????????????????????????'.encode('gb18030')
- ).best().first().encoding,
- 'gb18030'
- )
-
- with self.subTest('Encode & Detect GB18030 WITH SIG (CJK)'):
- self.assertEqual(
- CnM.from_bytes(
- (u'\uFEFF' +
'????????????????????????????????????????????????').encode('gb18030')
- ).best().first().encoding,
- 'gb18030'
- )
-
- with self.subTest('Encode & Detect UTF-8 WITHOUT SIG (CJK)'):
- self.assertEqual(
- CnM.from_bytes(
-
'????????????????????????????????????????????????'.encode('utf_8')
- ).best().first().encoding,
- 'utf_8'
- )
-
- with self.subTest('Encode & Detect UTF-7 WITHOUT BOM (CJK)'):
- self.assertEqual(
- CnM.from_bytes(
-
'???????????????,??????????????????????????????'.encode('utf_7')
- ).best().first().encoding,
- 'utf_7'
- )
-
- with self.subTest('Encode & Detect UTF-7 WITH BOM (CJK)'):
- self.assertEqual(
- CnM.from_bytes(
-
b'\x2b\x2f\x76\x38'+'????????????????????????????????????????????????'.encode('utf_7')
- ).best().first().encoding,
- 'utf_7'
- )
-
- with self.subTest('Encode & Detect UTF-7 WITHOUT BOM (CYRILLIC)'):
- self.assertEqual(
- CnM.from_bytes(
- 'B???????? ?????????? ?????? ?????????? ????
??????????????????????. O???????????????????????? ???????????? ???? ????????
??????????????????,'.encode('utf_7')
- ).best().first().encoding,
- 'utf_7'
- )
-
- with self.subTest('Encode & Detect UTF-8 WITH SIG (CJK)'):
- self.assertEqual(
- CnM.from_bytes(
- b'\xef\xbb\xbf' +
'????????????????????????????????????????????????'.encode('utf_8')
- ).best().first().encoding,
- 'utf_8'
- )
-
- with self.subTest('Encode & Detect UTF-8 WITHOUT SIG (CYRILLIC)'):
- self.assertEqual(
- CnM.from_bytes(
- 'B???????? ?????????? ?????? ?????????? ????
??????????????????????. O???????????????????????? ???????????? ???? ????????
??????????????????, '
- '???????? ???? ???? ???????????? ???? ??????????????????
?? ?????????????????? ??????????????????????.'.encode('utf_8')
- ).best().first().encoding,
- 'utf_8'
- )
-
- with self.subTest('Encode & Detect UTF-8 WITHOUT SIG (CYRILLIC)'):
- self.assertEqual(
- CnM.from_bytes(
- 'B???????? ?????????? ?????? ?????????? ????
??????????????????????.'.encode(
- 'utf_8')
- ).best().first().encoding,
- 'utf_8'
- )
+import unittest
+
+from charset_normalizer import CharsetNormalizerMatches as CnM
+
+
+class TestBytes(unittest.TestCase):
+
+ def test_too_short_none(self):
+ self.assertIsNotNone(
+ CnM.from_bytes(b'\xfe\xff').best().first()
+ )
+
+ def test_empty_bytes(self):
+ r = CnM.from_bytes(b'').best().first()
+
+ self.assertIsNotNone(
+ r
+ )
+
+ self.assertEqual(
+ 'utf-8',
+ r.encoding
+ )
+
+ self.assertEqual(
+ 0,
+ len(r.alphabets)
+ )
+
+ def test_bom_detection(self):
+ with self.subTest('GB18030 UNAVAILABLE SIG'):
+ self.assertFalse(
+ CnM.from_bytes(
+
'????????????????????????????????????????????????'.encode('gb18030')
+ ).best().first().byte_order_mark
+ )
+
+ with self.subTest('GB18030 AVAILABLE SIG'):
+ self.assertTrue(
+ CnM.from_bytes(
+ (u'\uFEFF' +
'????????????????????????????????????????????????').encode('gb18030')
+ ).best().first().byte_order_mark
+ )
+
+ with self.subTest('UTF-7 AVAILABLE BOM'):
+ self.assertTrue(
+ CnM.from_bytes(
+ b'\x2b\x2f\x76\x38' +
'????????????????????????????????????????????????'.encode('utf_7')
+ ).best().first().byte_order_mark
+ )
+
+ with self.subTest('UTF-8 AVAILABLE BOM'):
+ self.assertTrue(
+ CnM.from_bytes(
+ b'\xef\xbb\xbf' +
'????????????????????????????????????????????????'.encode('utf_8')
+ ).best().first().byte_order_mark
+ )
+
+ def test_encode_decode(self):
+
+ with self.subTest('Encode & Detect UTF-8 WITHOUT SIG SMALL CONTENT'):
+ self.assertEqual(
+ CnM.from_bytes(
+ 'h\xe9llo world!\n'.encode('utf_8')
+ ).best().first().encoding,
+ 'utf_8'
+ )
+
+ with self.subTest('Encode & Detect GB18030 WITHOUT SIG'):
+ self.assertEqual(
+ CnM.from_bytes(
+
'????????????????????????????????????????????????'.encode('gb18030')
+ ).best().first().encoding,
+ 'gb18030'
+ )
+
+ with self.subTest('Encode & Detect GB18030 WITH SIG (CJK)'):
+ self.assertEqual(
+ CnM.from_bytes(
+ (u'\uFEFF' +
'????????????????????????????????????????????????').encode('gb18030')
+ ).best().first().encoding,
+ 'gb18030'
+ )
+
+ with self.subTest('Encode & Detect UTF-8 WITHOUT SIG (CJK)'):
+ self.assertEqual(
+ CnM.from_bytes(
+
'????????????????????????????????????????????????'.encode('utf_8')
+ ).best().first().encoding,
+ 'utf_8'
+ )
+
+ with self.subTest('Encode & Detect UTF-7 WITHOUT BOM (CJK)'):
+ self.assertEqual(
+ CnM.from_bytes(
+
'???????????????,??????????????????????????????'.encode('utf_7')
+ ).best().first().encoding,
+ 'utf_7'
+ )
+
+ with self.subTest('Encode & Detect UTF-7 WITH BOM (CJK)'):
+ self.assertEqual(
+ CnM.from_bytes(
+
b'\x2b\x2f\x76\x38'+'????????????????????????????????????????????????'.encode('utf_7')
+ ).best().first().encoding,
+ 'utf_7'
+ )
+
+ with self.subTest('Encode & Detect UTF-7 WITHOUT BOM (CYRILLIC)'):
+ self.assertEqual(
+ CnM.from_bytes(
+ 'B???????? ?????????? ?????? ?????????? ????
??????????????????????. O???????????????????????? ???????????? ???? ????????
??????????????????,'.encode('utf_7')
+ ).best().first().encoding,
+ 'utf_7'
+ )
+
+ with self.subTest('Encode & Detect UTF-8 WITH SIG (CJK)'):
+ self.assertEqual(
+ CnM.from_bytes(
+ b'\xef\xbb\xbf' +
'????????????????????????????????????????????????'.encode('utf_8')
+ ).best().first().encoding,
+ 'utf_8'
+ )
+
+ with self.subTest('Encode & Detect UTF-8 WITHOUT SIG (CYRILLIC)'):
+ self.assertEqual(
+ CnM.from_bytes(
+ 'B???????? ?????????? ?????? ?????????? ????
??????????????????????. O???????????????????????? ???????????? ???? ????????
??????????????????, '
+ '???????? ???? ???? ???????????? ???? ??????????????????
?? ?????????????????? ??????????????????????.'.encode('utf_8')
+ ).best().first().encoding,
+ 'utf_8'
+ )
+
+ with self.subTest('Encode & Detect UTF-8 WITHOUT SIG (CYRILLIC)'):
+ self.assertEqual(
+ CnM.from_bytes(
+ 'B???????? ?????????? ?????? ?????????? ????
??????????????????????.'.encode(
+ 'utf_8')
+ ).best().first().encoding,
+ 'utf_8'
+ )