Author: Amaury Forgeot d'Arc <amaur...@gmail.com>
Branch: py3.3
Changeset: r75163:342524c121fd
Date: 2014-12-29 23:38 +0100
http://bitbucket.org/pypy/pypy/changeset/342524c121fd/

Log:    Update the _multibytecodec C files with the ones from CPython
        version 3.3.5

diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
@@ -85,7 +85,7 @@
         TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
             NEXT(2, 1)
         }
-        else return 2;
+        else return 1;
     }
 
     return 0;
@@ -141,7 +141,7 @@
         REQUIRE_INBUF(2)
 
         GBK_DECODE(c, IN2, **outbuf)
-        else return 2;
+        else return 1;
 
         NEXT(2, 1)
     }
@@ -267,7 +267,7 @@
             c3 = IN3;
             c4 = IN4;
             if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
-                return 4;
+                return 1;
             c -= 0x81;  c2 -= 0x30;
             c3 -= 0x81; c4 -= 0x30;
 
@@ -292,12 +292,12 @@
                     continue;
                 }
             }
-            return 4;
+            return 1;
         }
 
         GBK_DECODE(c, c2, **outbuf)
         else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
-        else return 2;
+        else return 1;
 
         NEXT(2, 1)
     }
@@ -400,7 +400,7 @@
             else if (c2 == '\n')
                 ; /* line-continuation */
             else
-                return 2;
+                return 1;
             NEXT(2, 0);
             continue;
         }
@@ -419,7 +419,7 @@
                 NEXT(2, 1)
             }
             else
-                return 2;
+                return 1;
         }
     }
 
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
@@ -112,55 +112,56 @@
 
         REQUIRE_INBUF(2)
 
-        if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
-            goto hkscsdec;
+        if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
+            TRYMAP_DEC(big5, **outbuf, c, IN2) {
+                NEXT(2, 1)
+                continue;
+            }
+        }
 
-        TRYMAP_DEC(big5, **outbuf, c, IN2) {
-            NEXT(2, 1)
+        TRYMAP_DEC(big5hkscs, decoded, c, IN2)
+        {
+            int s = BH2S(c, IN2);
+            const unsigned char *hintbase;
+
+            assert(0x87 <= c && c <= 0xfe);
+            assert(0x40 <= IN2 && IN2 <= 0xfe);
+
+            if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
+                    hintbase = big5hkscs_phint_0;
+                    s -= BH2S(0x87, 0x40);
+            }
+            else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
+                    hintbase = big5hkscs_phint_12130;
+                    s -= BH2S(0xc6, 0xa1);
+            }
+            else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
+                    hintbase = big5hkscs_phint_21924;
+                    s -= BH2S(0xf9, 0xd6);
+            }
+            else
+                    return MBERR_INTERNAL;
+
+            if (hintbase[s >> 3] & (1 << (s & 7))) {
+                    WRITEUCS4(decoded | 0x20000)
+                    NEXT_IN(2)
+            }
+            else {
+                    OUT1(decoded)
+                    NEXT(2, 1)
+            }
+            continue;
         }
-        else
-hkscsdec:       TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
-                        int s = BH2S(c, IN2);
-                        const unsigned char *hintbase;
 
-                        assert(0x87 <= c && c <= 0xfe);
-                        assert(0x40 <= IN2 && IN2 <= 0xfe);
+        switch ((c << 8) | IN2) {
+        case 0x8862: WRITE2(0x00ca, 0x0304); break;
+        case 0x8864: WRITE2(0x00ca, 0x030c); break;
+        case 0x88a3: WRITE2(0x00ea, 0x0304); break;
+        case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+        default: return 1;
+        }
 
-                        if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
-                                hintbase = big5hkscs_phint_0;
-                                s -= BH2S(0x87, 0x40);
-                        }
-                        else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
-                                hintbase = big5hkscs_phint_12130;
-                                s -= BH2S(0xc6, 0xa1);
-                        }
-                        else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
-                                hintbase = big5hkscs_phint_21924;
-                                s -= BH2S(0xf9, 0xd6);
-                        }
-                        else
-                                return MBERR_INTERNAL;
-
-                        if (hintbase[s >> 3] & (1 << (s & 7))) {
-                                WRITEUCS4(decoded | 0x20000)
-                                NEXT_IN(2)
-                        }
-                        else {
-                                OUT1(decoded)
-                                NEXT(2, 1)
-                        }
-                }
-                else {
-                        switch ((c << 8) | IN2) {
-                        case 0x8862: WRITE2(0x00ca, 0x0304); break;
-                        case 0x8864: WRITE2(0x00ca, 0x030c); break;
-                        case 0x88a3: WRITE2(0x00ea, 0x0304); break;
-                        case 0x88a5: WRITE2(0x00ea, 0x030c); break;
-                        default: return 2;
-                        }
-
-                        NEXT(2, 2) /* all decoded codepoints are pairs, above. 
*/
-        }
+        NEXT(2, 2) /* all decoded codepoints are pairs, above. */
     }
 
     return 0;
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
@@ -123,7 +123,7 @@
 
 CODEC_INIT(iso2022)
 {
-    const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
+    const struct iso2022_designation *desig;
     for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
         if (desig->initializer != NULL && desig->initializer() != 0)
             return -1;
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
@@ -112,7 +112,7 @@
         TRYMAP_DEC(cp932ext, **outbuf, c, c2);
         else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
             if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
-                return 2;
+                return 1;
 
             c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
             c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -120,7 +120,7 @@
             c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
 
             TRYMAP_DEC(jisx0208, **outbuf, c, c2);
-            else return 2;
+            else return 1;
         }
         else if (c >= 0xf0 && c <= 0xf9) {
             if ((c2 >= 0x40 && c2 <= 0x7e) ||
@@ -128,10 +128,10 @@
                 OUT1(0xe000 + 188 * (c - 0xf0) +
                      (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
             else
-                return 2;
+                return 1;
         }
         else
-            return 2;
+            return 1;
 
         NEXT(2, 1)
     }
@@ -256,7 +256,7 @@
                 NEXT(2, 1)
             }
             else
-                return 2;
+                return 1;
         }
         else if (c == 0x8f) {
             unsigned char c2, c3;
@@ -274,7 +274,7 @@
                 continue;
             }
             else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
-            else return 3;
+            else return 1;
             NEXT(3, 1)
         }
         else {
@@ -300,7 +300,7 @@
                 NEXT(2, 2)
                 continue;
             }
-            else return 2;
+            else return 1;
             NEXT(2, 1)
         }
     }
@@ -371,11 +371,11 @@
 
         REQUIRE_OUTBUF(1)
 
-            if (c < 0x80) {
-                OUT1(c)
-                NEXT(1, 1)
-                continue;
-            }
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
 
         if (c == 0x8e) {
             /* JIS X 0201 half-width katakana */
@@ -388,7 +388,7 @@
                 NEXT(2, 1)
             }
             else
-                return 2;
+                return 1;
         }
         else if (c == 0x8f) {
             unsigned char c2, c3;
@@ -401,7 +401,7 @@
                 NEXT(3, 1)
             }
             else
-                return 3;
+                return 1;
         }
         else {
             unsigned char c2;
@@ -417,7 +417,7 @@
 #endif
                 TRYMAP_DEC(jisx0208, **outbuf,
                            c ^ 0x80, c2 ^ 0x80) ;
-            else return 2;
+            else return 1;
             NEXT(2, 1)
         }
     }
@@ -502,7 +502,7 @@
             REQUIRE_INBUF(2)
             c2 = IN2;
             if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
-                return 2;
+                return 1;
 
             c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
             c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -522,10 +522,10 @@
                 continue;
             }
             else
-                return 2;
+                return 1;
         }
         else
-            return 2;
+            return 1;
 
         NEXT(1, 1) /* JIS X 0201 */
     }
@@ -645,7 +645,7 @@
             REQUIRE_INBUF(2)
             c2 = IN2;
             if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
-                return 2;
+                return 1;
 
             c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
             c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -671,7 +671,7 @@
                     NEXT_OUT(2)
                 }
                 else
-                    return 2;
+                    return 1;
                 NEXT_IN(2)
             }
             else { /* Plane 2 */
@@ -689,13 +689,13 @@
                     continue;
                 }
                 else
-                    return 2;
+                    return 1;
                 NEXT(2, 1)
             }
             continue;
         }
         else
-            return 2;
+            return 1;
 
         NEXT(1, 1) /* JIS X 0201 */
     }
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
@@ -123,7 +123,7 @@
             if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
                 (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
                 (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
-                return 8;
+                return 1;
 
             c = (*inbuf)[3];
             if (0xa1 <= c && c <= 0xbe)
@@ -143,7 +143,7 @@
                 jong = NONE;
 
             if (cho == NONE || jung == NONE || jong == NONE)
-                return 8;
+                return 1;
 
             OUT1(0xac00 + cho*588 + jung*28 + jong);
             NEXT(8, 1)
@@ -152,7 +152,7 @@
             NEXT(2, 1)
         }
         else
-            return 2;
+            return 1;
     }
 
     return 0;
@@ -208,7 +208,7 @@
         REQUIRE_INBUF(2)
         TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
         else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
-        else return 2;
+        else return 1;
 
         NEXT(2, 1)
     }
@@ -375,7 +375,7 @@
             i_jong = johabidx_jongseong[c_jong];
 
             if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
-                return 2;
+                return 1;
 
             /* we don't use U+1100 hangul jamo yet. */
             if (i_cho == FILL) {
@@ -391,7 +391,7 @@
                         OUT1(0x3100 |
                           johabjamo_jungseong[c_jung])
                     else
-                        return 2;
+                        return 1;
                 }
             } else {
                 if (i_jung == FILL) {
@@ -399,7 +399,7 @@
                         OUT1(0x3100 |
                           johabjamo_choseong[c_cho])
                     else
-                        return 2;
+                        return 1;
                 }
                 else
                     OUT1(0xac00 +
@@ -414,7 +414,7 @@
                 c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
                 (c2 & 0x7f) == 0x7f ||
                 (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
-                return 2;
+                return 1;
             else {
                 unsigned char t1, t2;
 
@@ -425,7 +425,7 @@
                 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
 
                 TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
-                else return 2;
+                else return 1;
                 NEXT(2, 1)
             }
         }
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py 
b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -30,23 +30,23 @@
         assert e.reason == "incomplete multibyte sequence"
         #
         e = raises(UnicodeDecodeError, codec.decode, b"~{xyz}").value
-        assert e.args == ('hz', b'~{xyz}', 2, 4, 'illegal multibyte sequence')
+        assert e.args == ('hz', b'~{xyz}', 2, 3, 'illegal multibyte sequence')
 
     def test_decode_hz_ignore(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
         r = codec.decode(b"def~{}abc", errors='ignore')
-        assert r == ('def\u5fcf', 9)
+        assert r == ('def\u5f95', 9)
         r = codec.decode(b"def~{}abc", 'ignore')
-        assert r == ('def\u5fcf', 9)
+        assert r == ('def\u5f95', 9)
 
     def test_decode_hz_replace(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
         r = codec.decode(b"def~{}abc", errors='replace')
-        assert r == ('def\ufffd\u5fcf', 9)
+        assert r == ('def\ufffd\u5f95\ufffd', 9)
         r = codec.decode(b"def~{}abc", 'replace')
-        assert r == ('def\ufffd\u5fcf', 9)
+        assert r == ('def\ufffd\u5f95\ufffd', 9)
 
     def test_decode_custom_error_handler(self):
         import codecs
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py 
b/pypy/module/_multibytecodec/test/test_app_incremental.py
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -21,11 +21,11 @@
             return IncrementalHzEncoder
         """)
         cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """():
-            import _codecs_cn
+            import _codecs_hk
             from _multibytecodec import MultibyteIncrementalEncoder
 
             class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder):
-                codec = _codecs_cn.getcodec('big5hkscs')
+                codec = _codecs_hk.getcodec('big5hkscs')
 
             return IncrementalBig5hkscsEncoder
         """)
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py 
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -80,18 +80,18 @@
     #
     e = py.test.raises(EncodeDecodeError, decode, c, "~{xyz}").value
     assert e.start == 2
-    assert e.end == 4
+    assert e.end == 3
     assert e.reason == "illegal multibyte sequence"
 
 def test_decode_hz_ignore():
     c = getcodec("hz")
     u = decode(c, 'def~{}abc', 'ignore')
-    assert u == u'def\u5fcf'
+    assert u == u'def\u5f95'
 
 def test_decode_hz_replace():
     c = getcodec("hz")
     u = decode(c, 'def~{}abc', 'replace')
-    assert u == u'def\ufffd\u5fcf'
+    assert u == u'def\ufffd\u5f95\ufffd'
 
 def test_encode_hz():
     c = getcodec("hz")
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to