New issue 2938: pypy3: time.strptime(): UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte https://bitbucket.org/pypy/pypy/issues/2938/pypy3-timestrptime-unicodedecodeerror-utf8
Jon Dufresne: ``` $ pypy3 --version Python 3.5.3 (7cafdf4fca72, Aug 27 2018, 22:02:53) [PyPy 6.0.0 with GCC 8.2.1 20180801 (Red Hat 8.2.1-2)] ``` When running the Pillow test suite with pypy3, I receive the error: ``` UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte ``` I do not receive this error with CPython or pypy2. Steps to reproduce: ``` git clone g...@github.com:python-pillow/Pillow.git cd Pillow git checkout b62ff510aa90663bbc76ed4d6309b0774875b973 # The latest revision as of today tox -e pypy3 ``` Result: ``` ==================================================================================================== FAILURES ===================================================================================================== ___________________________________________________________________________________________ TestPdfParser.test_parsing ____________________________________________________________________________________________ self = <test_pdfparser.TestPdfParser testMethod=test_parsing> def test_parsing(self): self.assertEqual(PdfParser.interpret_name(b"Name#23Hash"), b"Name#Hash") self.assertEqual(PdfParser.interpret_name( b"Name#23Hash", as_text=True ), "Name#Hash") self.assertEqual(PdfParser.get_value(b"1 2 R ", 0), (IndirectReference(1, 2), 5)) self.assertEqual(PdfParser.get_value(b"true[", 0), (True, 4)) self.assertEqual(PdfParser.get_value(b"false%", 0), (False, 5)) self.assertEqual(PdfParser.get_value(b"null<", 0), (None, 4)) self.assertEqual(PdfParser.get_value(b"%cmt\n %cmt\n 123\n", 0), (123, 15)) self.assertEqual(PdfParser.get_value(b"<901FA3>", 0), (b"\x90\x1F\xA3", 8)) self.assertEqual(PdfParser.get_value(b"asd < 9 0 1 f A > qwe", 3), (b"\x90\x1F\xA0", 17)) self.assertEqual(PdfParser.get_value(b"(asd)", 0), (b"asd", 5)) self.assertEqual(PdfParser.get_value(b"(asd(qwe)zxc)zzz(aaa)", 0), (b"asd(qwe)zxc", 13)) self.assertEqual(PdfParser.get_value(b"(Two \\\nwords.)", 0), (b"Two words.", 14)) self.assertEqual(PdfParser.get_value(b"(Two\nlines.)", 0), (b"Two\nlines.", 12)) self.assertEqual(PdfParser.get_value(b"(Two\r\nlines.)", 0), (b"Two\nlines.", 13)) self.assertEqual(PdfParser.get_value(b"(Two\\nlines.)", 0), (b"Two\nlines.", 13)) self.assertEqual(PdfParser.get_value(b"(One\\(paren).", 0), (b"One(paren", 12)) self.assertEqual(PdfParser.get_value(b"(One\\)paren).", 0), (b"One)paren", 12)) self.assertEqual(PdfParser.get_value(b"(\\0053)", 0), (b"\x053", 7)) self.assertEqual(PdfParser.get_value(b"(\\053)", 0), (b"\x2B", 6)) self.assertEqual(PdfParser.get_value(b"(\\53)", 0), (b"\x2B", 5)) self.assertEqual(PdfParser.get_value(b"(\\53a)", 0), (b"\x2Ba", 6)) self.assertEqual(PdfParser.get_value(b"(\\1111)", 0), (b"\x491", 7)) self.assertEqual(PdfParser.get_value(b" 123 (", 0), (123, 4)) self.assertAlmostEqual(PdfParser.get_value(b" 123.4 %", 0)[0], 123.4) self.assertEqual(PdfParser.get_value(b" 123.4 %", 0)[1], 6) self.assertRaises(PdfFormatError, PdfParser.get_value, b"]", 0) d = PdfParser.get_value(b"<</Name (value) /N /V>>", 0)[0] self.assertIsInstance(d, PdfDict) self.assertEqual(len(d), 2) self.assertEqual(d.Name, "value") self.assertEqual(d[b"Name"], b"value") self.assertEqual(d.N, PdfName("V")) a = PdfParser.get_value(b"[/Name (value) /N /V]", 0)[0] self.assertIsInstance(a, list) self.assertEqual(len(a), 4) self.assertEqual(a[0], PdfName("Name")) s = PdfParser.get_value( b"<</Name (value) /Length 5>>\nstream\nabcde\nendstream<<...", 0 )[0] self.assertIsInstance(s, PdfStream) self.assertEqual(s.dictionary.Name, "value") self.assertEqual(s.decode(), b"abcde") for name in ["CreationDate", "ModDate"]: for date, value in { b"20180729214124": "20180729214124", b"D:20180729214124": "20180729214124", b"D:2018072921": "20180729210000", b"D:20180729214124Z": "20180729214124", b"D:20180729214124+08'00'": "20180729134124", b"D:20180729214124-05'00'": "20180730024124" }.items(): d = PdfParser.get_value( b"<</"+name.encode()+b" ("+date+b")>>", 0)[0] self.assertEqual( > time.strftime("%Y%m%d%H%M%S", getattr(d, name)), value) Tests/test_pdfparser.py:96: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ .tox/pypy3/site-packages/PIL/PdfParser.py:298: in __getattr__ value = time.strptime(value[:len(format)+2], format) /usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:504: in _strptime_time tt = _strptime(data_string, format)[0] /usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:318: in _strptime _TimeRE_cache = TimeRE() /usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:194: in __init__ self.locale_time = LocaleTime() /usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:73: in __init__ self.__calc_weekday() /usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:95: in __calc_weekday a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] /usr/lib64/pypy3-6.0/lib-python/3/_strptime.py:95: in <listcomp> a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] /usr/lib64/pypy3-6.0/lib-python/3/calendar.py:80: in __getitem__ return funcs(self.format) /usr/lib64/pypy3-6.0/lib-python/3/datetime.py:754: in strftime return _wrap_strftime(self, format, self.timetuple()) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ object = datetime.date(2001, 1, 3), format = '%a', timetuple = time.struct_time(tm_year=2001, tm_mon=1, tm_mday=3, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=3, tm_isdst=-1) def _wrap_strftime(object, format, timetuple): # Don't call utcoffset() or tzname() unless actually needed. freplace = None # the string to use for %f zreplace = None # the string to use for %z Zreplace = None # the string to use for %Z # Scan format for %z and %Z escapes, replacing as needed. newformat = [] push = newformat.append i, n = 0, len(format) while i < n: ch = format[i] i += 1 if ch == '%': if i < n: ch = format[i] i += 1 if ch == 'f': if freplace is None: freplace = '%06d' % getattr(object, 'microsecond', 0) newformat.append(freplace) elif ch == 'z': if zreplace is None: zreplace = "" if hasattr(object, "utcoffset"): offset = object.utcoffset() if offset is not None: sign = '+' if offset.days < 0: offset = -offset sign = '-' h, m = divmod(offset, timedelta(hours=1)) assert not m % timedelta(minutes=1), "whole minute" m //= timedelta(minutes=1) zreplace = '%c%02d%02d' % (sign, h, m) assert '%' not in zreplace newformat.append(zreplace) elif ch == 'Z': if Zreplace is None: Zreplace = "" if hasattr(object, "tzname"): s = object.tzname() if s is not None: # strftime is going to have at this: escape % Zreplace = s.replace('%', '%%') newformat.append(Zreplace) else: push('%') push(ch) else: push('%') else: push(ch) newformat = "".join(newformat) > return _time.strftime(newformat, timetuple) E UnicodeDecodeError: 'utf8' codec can't decode byte 0xb6 in position 0: invalid start byte /usr/lib64/pypy3-6.0/lib-python/3/datetime.py:221: UnicodeDecodeError ``` _______________________________________________ pypy-issue mailing list pypy-issue@python.org https://mail.python.org/mailman/listinfo/pypy-issue