Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3.3 Changeset: r75163:342524c121fd Date: 2014-12-29 23:38 +0100 http://bitbucket.org/pypy/pypy/changeset/342524c121fd/
Log: Update the _multibytecodec C files with the ones from CPython version 3.3.5 diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c --- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c +++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c @@ -85,7 +85,7 @@ TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -141,7 +141,7 @@ REQUIRE_INBUF(2) GBK_DECODE(c, IN2, **outbuf) - else return 2; + else return 1; NEXT(2, 1) } @@ -267,7 +267,7 @@ c3 = IN3; c4 = IN4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) - return 4; + return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; @@ -292,12 +292,12 @@ continue; } } - return 4; + return 1; } GBK_DECODE(c, c2, **outbuf) else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); - else return 2; + else return 1; NEXT(2, 1) } @@ -400,7 +400,7 @@ else if (c2 == '\n') ; /* line-continuation */ else - return 2; + return 1; NEXT(2, 0); continue; } @@ -419,7 +419,7 @@ NEXT(2, 1) } else - return 2; + return 1; } } diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c --- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c +++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c @@ -112,55 +112,56 @@ REQUIRE_INBUF(2) - if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) - goto hkscsdec; + if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { + TRYMAP_DEC(big5, **outbuf, c, IN2) { + NEXT(2, 1) + continue; + } + } - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) + TRYMAP_DEC(big5hkscs, decoded, c, IN2) + { + int s = BH2S(c, IN2); + const unsigned char *hintbase; + + assert(0x87 <= c && c <= 0xfe); + assert(0x40 <= IN2 && IN2 <= 0xfe); + + if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { + hintbase = big5hkscs_phint_0; + s -= BH2S(0x87, 0x40); + } + else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ + hintbase = big5hkscs_phint_12130; + s -= BH2S(0xc6, 0xa1); + } + else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ + hintbase = big5hkscs_phint_21924; + s -= BH2S(0xf9, 0xd6); + } + else + return MBERR_INTERNAL; + + if (hintbase[s >> 3] & (1 << (s & 7))) { + WRITEUCS4(decoded | 0x20000) + NEXT_IN(2) + } + else { + OUT1(decoded) + NEXT(2, 1) + } + continue; } - else -hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { - int s = BH2S(c, IN2); - const unsigned char *hintbase; - assert(0x87 <= c && c <= 0xfe); - assert(0x40 <= IN2 && IN2 <= 0xfe); + switch ((c << 8) | IN2) { + case 0x8862: WRITE2(0x00ca, 0x0304); break; + case 0x8864: WRITE2(0x00ca, 0x030c); break; + case 0x88a3: WRITE2(0x00ea, 0x0304); break; + case 0x88a5: WRITE2(0x00ea, 0x030c); break; + default: return 1; + } - if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { - hintbase = big5hkscs_phint_0; - s -= BH2S(0x87, 0x40); - } - else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ - hintbase = big5hkscs_phint_12130; - s -= BH2S(0xc6, 0xa1); - } - else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ - hintbase = big5hkscs_phint_21924; - s -= BH2S(0xf9, 0xd6); - } - else - return MBERR_INTERNAL; - - if (hintbase[s >> 3] & (1 << (s & 7))) { - WRITEUCS4(decoded | 0x20000) - NEXT_IN(2) - } - else { - OUT1(decoded) - NEXT(2, 1) - } - } - else { - switch ((c << 8) | IN2) { - case 0x8862: WRITE2(0x00ca, 0x0304); break; - case 0x8864: WRITE2(0x00ca, 0x030c); break; - case 0x88a3: WRITE2(0x00ea, 0x0304); break; - case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; - } - - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ - } + NEXT(2, 2) /* all decoded codepoints are pairs, above. */ } return 0; diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c --- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c +++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c @@ -123,7 +123,7 @@ CODEC_INIT(iso2022) { - const struct iso2022_designation *desig = CONFIG_DESIGNATIONS; + const struct iso2022_designation *desig; for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) if (desig->initializer != NULL && desig->initializer() != 0) return -1; diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c --- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c +++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c @@ -112,7 +112,7 @@ TRYMAP_DEC(cp932ext, **outbuf, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -120,7 +120,7 @@ c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else return 2; + else return 1; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || @@ -128,10 +128,10 @@ OUT1(0xe000 + 188 * (c - 0xf0) + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) else - return 2; + return 1; } else - return 2; + return 1; NEXT(2, 1) } @@ -256,7 +256,7 @@ NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -274,7 +274,7 @@ continue; } else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; - else return 3; + else return 1; NEXT(3, 1) } else { @@ -300,7 +300,7 @@ NEXT(2, 2) continue; } - else return 2; + else return 1; NEXT(2, 1) } } @@ -371,11 +371,11 @@ REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } if (c == 0x8e) { /* JIS X 0201 half-width katakana */ @@ -388,7 +388,7 @@ NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -401,7 +401,7 @@ NEXT(3, 1) } else - return 3; + return 1; } else { unsigned char c2; @@ -417,7 +417,7 @@ #endif TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ; - else return 2; + else return 1; NEXT(2, 1) } } @@ -502,7 +502,7 @@ REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -522,10 +522,10 @@ continue; } else - return 2; + return 1; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } @@ -645,7 +645,7 @@ REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -671,7 +671,7 @@ NEXT_OUT(2) } else - return 2; + return 1; NEXT_IN(2) } else { /* Plane 2 */ @@ -689,13 +689,13 @@ continue; } else - return 2; + return 1; NEXT(2, 1) } continue; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c --- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c +++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c @@ -123,7 +123,7 @@ if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) - return 8; + return 1; c = (*inbuf)[3]; if (0xa1 <= c && c <= 0xbe) @@ -143,7 +143,7 @@ jong = NONE; if (cho == NONE || jung == NONE || jong == NONE) - return 8; + return 1; OUT1(0xac00 + cho*588 + jung*28 + jong); NEXT(8, 1) @@ -152,7 +152,7 @@ NEXT(2, 1) } else - return 2; + return 1; } return 0; @@ -208,7 +208,7 @@ REQUIRE_INBUF(2) TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } @@ -375,7 +375,7 @@ i_jong = johabidx_jongseong[c_jong]; if (i_cho == NONE || i_jung == NONE || i_jong == NONE) - return 2; + return 1; /* we don't use U+1100 hangul jamo yet. */ if (i_cho == FILL) { @@ -391,7 +391,7 @@ OUT1(0x3100 | johabjamo_jungseong[c_jung]) else - return 2; + return 1; } } else { if (i_jung == FILL) { @@ -399,7 +399,7 @@ OUT1(0x3100 | johabjamo_choseong[c_cho]) else - return 2; + return 1; } else OUT1(0xac00 + @@ -414,7 +414,7 @@ c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || (c2 & 0x7f) == 0x7f || (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) - return 2; + return 1; else { unsigned char t1, t2; @@ -425,7 +425,7 @@ t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; TRYMAP_DEC(ksx1001, **outbuf, t1, t2); - else return 2; + else return 1; NEXT(2, 1) } } diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py --- a/pypy/module/_multibytecodec/test/test_app_codecs.py +++ b/pypy/module/_multibytecodec/test/test_app_codecs.py @@ -30,23 +30,23 @@ assert e.reason == "incomplete multibyte sequence" # e = raises(UnicodeDecodeError, codec.decode, b"~{xyz}").value - assert e.args == ('hz', b'~{xyz}', 2, 4, 'illegal multibyte sequence') + assert e.args == ('hz', b'~{xyz}', 2, 3, 'illegal multibyte sequence') def test_decode_hz_ignore(self): import _codecs_cn codec = _codecs_cn.getcodec("hz") r = codec.decode(b"def~{}abc", errors='ignore') - assert r == ('def\u5fcf', 9) + assert r == ('def\u5f95', 9) r = codec.decode(b"def~{}abc", 'ignore') - assert r == ('def\u5fcf', 9) + assert r == ('def\u5f95', 9) def test_decode_hz_replace(self): import _codecs_cn codec = _codecs_cn.getcodec("hz") r = codec.decode(b"def~{}abc", errors='replace') - assert r == ('def\ufffd\u5fcf', 9) + assert r == ('def\ufffd\u5f95\ufffd', 9) r = codec.decode(b"def~{}abc", 'replace') - assert r == ('def\ufffd\u5fcf', 9) + assert r == ('def\ufffd\u5f95\ufffd', 9) def test_decode_custom_error_handler(self): import codecs diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py --- a/pypy/module/_multibytecodec/test/test_app_incremental.py +++ b/pypy/module/_multibytecodec/test/test_app_incremental.py @@ -21,11 +21,11 @@ return IncrementalHzEncoder """) cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """(): - import _codecs_cn + import _codecs_hk from _multibytecodec import MultibyteIncrementalEncoder class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder): - codec = _codecs_cn.getcodec('big5hkscs') + codec = _codecs_hk.getcodec('big5hkscs') return IncrementalBig5hkscsEncoder """) diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py --- a/pypy/module/_multibytecodec/test/test_c_codecs.py +++ b/pypy/module/_multibytecodec/test/test_c_codecs.py @@ -80,18 +80,18 @@ # e = py.test.raises(EncodeDecodeError, decode, c, "~{xyz}").value assert e.start == 2 - assert e.end == 4 + assert e.end == 3 assert e.reason == "illegal multibyte sequence" def test_decode_hz_ignore(): c = getcodec("hz") u = decode(c, 'def~{}abc', 'ignore') - assert u == u'def\u5fcf' + assert u == u'def\u5f95' def test_decode_hz_replace(): c = getcodec("hz") u = decode(c, 'def~{}abc', 'replace') - assert u == u'def\ufffd\u5fcf' + assert u == u'def\ufffd\u5f95\ufffd' def test_encode_hz(): c = getcodec("hz") _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit