Author: Philip Jenvey <[email protected]>
Branch: 
Changeset: r60109:1804a92ff657
Date: 2013-01-16 11:35 -0800
http://bitbucket.org/pypy/pypy/changeset/1804a92ff657/

Log:    emit BOMs w/ utf16/32 empty strings. a py3k incremental encoding
        test (test_codecs's test_decoder_state) actually relies on this

diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -460,6 +460,10 @@
                                  errorhandler=None,
                                  byteorder='little'):
     if size == 0:
+        if byteorder == 'native':
+            result = StringBuilder(2)
+            _STORECHAR(result, 0xFEFF, BYTEORDER)
+            return result.build()
         return ""
 
     result = StringBuilder(size * 2 + 2)
@@ -621,6 +625,10 @@
                                  errorhandler=None,
                                  byteorder='little'):
     if size == 0:
+        if byteorder == 'native':
+            result = StringBuilder(4)
+            _STORECHAR32(result, 0xFEFF, BYTEORDER)
+            return result.build()
         return ""
 
     result = StringBuilder(size * 4 + 4)
diff --git a/pypy/rlib/test/test_runicode.py b/pypy/rlib/test/test_runicode.py
--- a/pypy/rlib/test/test_runicode.py
+++ b/pypy/rlib/test/test_runicode.py
@@ -679,6 +679,11 @@
                          "utf-32 utf-32-be utf-32-le").split():
             self.checkencode(uni, encoding)
 
+    def test_empty(self):
+        for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
+                         "utf-32 utf-32-be utf-32-le").split():
+            self.checkencode(u'', encoding)
+
     def test_single_chars_utf8(self):
         # check every number of bytes per char
         for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to