Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.3
Changeset: r75163:342524c121fd
Date: 2014-12-29 23:38 +0100
http://bitbucket.org/pypy/pypy/changeset/342524c121fd/
Log: Update the _multibytecodec C files with the ones from CPython
version 3.3.5
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
@@ -85,7 +85,7 @@
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
- else return 2;
+ else return 1;
}
return 0;
@@ -141,7 +141,7 @@
REQUIRE_INBUF(2)
GBK_DECODE(c, IN2, **outbuf)
- else return 2;
+ else return 1;
NEXT(2, 1)
}
@@ -267,7 +267,7 @@
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
- return 4;
+ return 1;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
@@ -292,12 +292,12 @@
continue;
}
}
- return 4;
+ return 1;
}
GBK_DECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
@@ -400,7 +400,7 @@
else if (c2 == '\n')
; /* line-continuation */
else
- return 2;
+ return 1;
NEXT(2, 0);
continue;
}
@@ -419,7 +419,7 @@
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
}
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_hk.c
@@ -112,55 +112,56 @@
REQUIRE_INBUF(2)
- if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
- goto hkscsdec;
+ if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
+ TRYMAP_DEC(big5, **outbuf, c, IN2) {
+ NEXT(2, 1)
+ continue;
+ }
+ }
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
+ TRYMAP_DEC(big5hkscs, decoded, c, IN2)
+ {
+ int s = BH2S(c, IN2);
+ const unsigned char *hintbase;
+
+ assert(0x87 <= c && c <= 0xfe);
+ assert(0x40 <= IN2 && IN2 <= 0xfe);
+
+ if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
+ hintbase = big5hkscs_phint_0;
+ s -= BH2S(0x87, 0x40);
+ }
+ else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
+ hintbase = big5hkscs_phint_12130;
+ s -= BH2S(0xc6, 0xa1);
+ }
+ else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
+ hintbase = big5hkscs_phint_21924;
+ s -= BH2S(0xf9, 0xd6);
+ }
+ else
+ return MBERR_INTERNAL;
+
+ if (hintbase[s >> 3] & (1 << (s & 7))) {
+ WRITEUCS4(decoded | 0x20000)
+ NEXT_IN(2)
+ }
+ else {
+ OUT1(decoded)
+ NEXT(2, 1)
+ }
+ continue;
}
- else
-hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
- int s = BH2S(c, IN2);
- const unsigned char *hintbase;
- assert(0x87 <= c && c <= 0xfe);
- assert(0x40 <= IN2 && IN2 <= 0xfe);
+ switch ((c << 8) | IN2) {
+ case 0x8862: WRITE2(0x00ca, 0x0304); break;
+ case 0x8864: WRITE2(0x00ca, 0x030c); break;
+ case 0x88a3: WRITE2(0x00ea, 0x0304); break;
+ case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+ default: return 1;
+ }
- if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
- hintbase = big5hkscs_phint_0;
- s -= BH2S(0x87, 0x40);
- }
- else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
- hintbase = big5hkscs_phint_12130;
- s -= BH2S(0xc6, 0xa1);
- }
- else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
- hintbase = big5hkscs_phint_21924;
- s -= BH2S(0xf9, 0xd6);
- }
- else
- return MBERR_INTERNAL;
-
- if (hintbase[s >> 3] & (1 << (s & 7))) {
- WRITEUCS4(decoded | 0x20000)
- NEXT_IN(2)
- }
- else {
- OUT1(decoded)
- NEXT(2, 1)
- }
- }
- else {
- switch ((c << 8) | IN2) {
- case 0x8862: WRITE2(0x00ca, 0x0304); break;
- case 0x8864: WRITE2(0x00ca, 0x030c); break;
- case 0x88a3: WRITE2(0x00ea, 0x0304); break;
- case 0x88a5: WRITE2(0x00ea, 0x030c); break;
- default: return 2;
- }
-
- NEXT(2, 2) /* all decoded codepoints are pairs, above.
*/
- }
+ NEXT(2, 2) /* all decoded codepoints are pairs, above. */
}
return 0;
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_iso2022.c
@@ -123,7 +123,7 @@
CODEC_INIT(iso2022)
{
- const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
+ const struct iso2022_designation *desig;
for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
if (desig->initializer != NULL && desig->initializer() != 0)
return -1;
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_jp.c
@@ -112,7 +112,7 @@
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -120,7 +120,7 @@
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else return 2;
+ else return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
@@ -128,10 +128,10 @@
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
- return 2;
+ return 1;
}
else
- return 2;
+ return 1;
NEXT(2, 1)
}
@@ -256,7 +256,7 @@
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
@@ -274,7 +274,7 @@
continue;
}
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
- else return 3;
+ else return 1;
NEXT(3, 1)
}
else {
@@ -300,7 +300,7 @@
NEXT(2, 2)
continue;
}
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
@@ -371,11 +371,11 @@
REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
@@ -388,7 +388,7 @@
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
@@ -401,7 +401,7 @@
NEXT(3, 1)
}
else
- return 3;
+ return 1;
}
else {
unsigned char c2;
@@ -417,7 +417,7 @@
#endif
TRYMAP_DEC(jisx0208, **outbuf,
c ^ 0x80, c2 ^ 0x80) ;
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
@@ -502,7 +502,7 @@
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -522,10 +522,10 @@
continue;
}
else
- return 2;
+ return 1;
}
else
- return 2;
+ return 1;
NEXT(1, 1) /* JIS X 0201 */
}
@@ -645,7 +645,7 @@
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
@@ -671,7 +671,7 @@
NEXT_OUT(2)
}
else
- return 2;
+ return 1;
NEXT_IN(2)
}
else { /* Plane 2 */
@@ -689,13 +689,13 @@
continue;
}
else
- return 2;
+ return 1;
NEXT(2, 1)
}
continue;
}
else
- return 2;
+ return 1;
NEXT(1, 1) /* JIS X 0201 */
}
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_kr.c
@@ -123,7 +123,7 @@
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
- return 8;
+ return 1;
c = (*inbuf)[3];
if (0xa1 <= c && c <= 0xbe)
@@ -143,7 +143,7 @@
jong = NONE;
if (cho == NONE || jung == NONE || jong == NONE)
- return 8;
+ return 1;
OUT1(0xac00 + cho*588 + jung*28 + jong);
NEXT(8, 1)
@@ -152,7 +152,7 @@
NEXT(2, 1)
}
else
- return 2;
+ return 1;
}
return 0;
@@ -208,7 +208,7 @@
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
@@ -375,7 +375,7 @@
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
- return 2;
+ return 1;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
@@ -391,7 +391,7 @@
OUT1(0x3100 |
johabjamo_jungseong[c_jung])
else
- return 2;
+ return 1;
}
} else {
if (i_jung == FILL) {
@@ -399,7 +399,7 @@
OUT1(0x3100 |
johabjamo_choseong[c_cho])
else
- return 2;
+ return 1;
}
else
OUT1(0xac00 +
@@ -414,7 +414,7 @@
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
- return 2;
+ return 1;
else {
unsigned char t1, t2;
@@ -425,7 +425,7 @@
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
- else return 2;
+ else return 1;
NEXT(2, 1)
}
}
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py
b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -30,23 +30,23 @@
assert e.reason == "incomplete multibyte sequence"
#
e = raises(UnicodeDecodeError, codec.decode, b"~{xyz}").value
- assert e.args == ('hz', b'~{xyz}', 2, 4, 'illegal multibyte sequence')
+ assert e.args == ('hz', b'~{xyz}', 2, 3, 'illegal multibyte sequence')
def test_decode_hz_ignore(self):
import _codecs_cn
codec = _codecs_cn.getcodec("hz")
r = codec.decode(b"def~{}abc", errors='ignore')
- assert r == ('def\u5fcf', 9)
+ assert r == ('def\u5f95', 9)
r = codec.decode(b"def~{}abc", 'ignore')
- assert r == ('def\u5fcf', 9)
+ assert r == ('def\u5f95', 9)
def test_decode_hz_replace(self):
import _codecs_cn
codec = _codecs_cn.getcodec("hz")
r = codec.decode(b"def~{}abc", errors='replace')
- assert r == ('def\ufffd\u5fcf', 9)
+ assert r == ('def\ufffd\u5f95\ufffd', 9)
r = codec.decode(b"def~{}abc", 'replace')
- assert r == ('def\ufffd\u5fcf', 9)
+ assert r == ('def\ufffd\u5f95\ufffd', 9)
def test_decode_custom_error_handler(self):
import codecs
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py
b/pypy/module/_multibytecodec/test/test_app_incremental.py
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -21,11 +21,11 @@
return IncrementalHzEncoder
""")
cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """():
- import _codecs_cn
+ import _codecs_hk
from _multibytecodec import MultibyteIncrementalEncoder
class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder):
- codec = _codecs_cn.getcodec('big5hkscs')
+ codec = _codecs_hk.getcodec('big5hkscs')
return IncrementalBig5hkscsEncoder
""")
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -80,18 +80,18 @@
#
e = py.test.raises(EncodeDecodeError, decode, c, "~{xyz}").value
assert e.start == 2
- assert e.end == 4
+ assert e.end == 3
assert e.reason == "illegal multibyte sequence"
def test_decode_hz_ignore():
c = getcodec("hz")
u = decode(c, 'def~{}abc', 'ignore')
- assert u == u'def\u5fcf'
+ assert u == u'def\u5f95'
def test_decode_hz_replace():
c = getcodec("hz")
u = decode(c, 'def~{}abc', 'replace')
- assert u == u'def\ufffd\u5fcf'
+ assert u == u'def\ufffd\u5f95\ufffd'
def test_encode_hz():
c = getcodec("hz")
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit