Module Name: src Committed By: tnozaki Date: Mon Mar 15 15:00:58 UTC 2010
Modified Files: src/lib/libc/citrus/modules: citrus_utf1632.c Log Message: 1. fix wrong byte order mark of utf-16, reported by NARUSE Yui -san. patch provided by tshiozak@ -san. 2. don't eat 0xfeff/0xfffe if they don't appear at the first of bytestream. noticed y tshiozak@ -san, patch provied by me. thanks a lot. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/lib/libc/citrus/modules/citrus_utf1632.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libc/citrus/modules/citrus_utf1632.c diff -u src/lib/libc/citrus/modules/citrus_utf1632.c:1.9 src/lib/libc/citrus/modules/citrus_utf1632.c:1.10 --- src/lib/libc/citrus/modules/citrus_utf1632.c:1.9 Sat Jun 14 16:01:08 2008 +++ src/lib/libc/citrus/modules/citrus_utf1632.c Mon Mar 15 15:00:58 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: citrus_utf1632.c,v 1.9 2008/06/14 16:01:08 tnozaki Exp $ */ +/* $NetBSD: citrus_utf1632.c,v 1.10 2010/03/15 15:00:58 tnozaki Exp $ */ /*- * Copyright (c)2003 Citrus Project, @@ -28,7 +28,7 @@ #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: citrus_utf1632.c,v 1.9 2008/06/14 16:01:08 tnozaki Exp $"); +__RCSID("$NetBSD: citrus_utf1632.c,v 1.10 2010/03/15 15:00:58 tnozaki Exp $"); #endif /* LIBC_SCCS and not lint */ #include <assert.h> @@ -127,37 +127,39 @@ result++; } - /* judge endian marker */ - if ((ei->mode & _MODE_UTF32) == 0) { - /* UTF16 */ - if (psenc->ch[0]==0xFE && psenc->ch[1]==0xFF) { - psenc->current_endian = _ENDIAN_BIG; - chlenbak = 0; - goto refetch; - } else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE) { - psenc->current_endian = _ENDIAN_LITTLE; - chlenbak = 0; - goto refetch; - } - } else { - /* UTF32 */ - if (psenc->ch[0]==0x00 && psenc->ch[1]==0x00 && - psenc->ch[2]==0xFE && psenc->ch[3]==0xFF) { - psenc->current_endian = _ENDIAN_BIG; - chlenbak = 0; - goto refetch; - } else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE && - psenc->ch[2]==0x00 && psenc->ch[3]==0x00) { - psenc->current_endian = _ENDIAN_LITTLE; - chlenbak = 0; - goto refetch; + if (psenc->current_endian == _ENDIAN_UNKNOWN) { + if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) { + /* judge endian marker */ + if ((ei->mode & _MODE_UTF32) == 0) { + /* UTF16 */ + if (psenc->ch[0]==0xFE && psenc->ch[1]==0xFF) { + psenc->current_endian = _ENDIAN_BIG; + chlenbak = 0; + goto refetch; + } else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE) { + psenc->current_endian = _ENDIAN_LITTLE; + chlenbak = 0; + goto refetch; + } + } else { + /* UTF32 */ + if (psenc->ch[0]==0x00 && psenc->ch[1]==0x00 && + psenc->ch[2]==0xFE && psenc->ch[3]==0xFF) { + psenc->current_endian = _ENDIAN_BIG; + chlenbak = 0; + goto refetch; + } else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE && + psenc->ch[2]==0x00 && psenc->ch[3]==0x00) { + psenc->current_endian = _ENDIAN_LITTLE; + chlenbak = 0; + goto refetch; + } + } + } else { + psenc->current_endian = ei->preffered_endian; } } - if ((ei->mode & _MODE_FORCE_ENDIAN) != 0 || - psenc->current_endian == _ENDIAN_UNKNOWN) - endian = ei->preffered_endian; - else - endian = psenc->current_endian; + endian = psenc->current_endian; /* get wc */ if ((ei->mode & _MODE_UTF32) == 0) { @@ -186,13 +188,13 @@ wc <<= 10; switch (endian) { case _ENDIAN_LITTLE: - if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF) + if (psenc->ch[3]<0xDC || psenc->ch[3]>0xDF) goto ilseq; wc |= psenc->ch[2]; wc |= (wchar_t)(psenc->ch[3] & 3) << 8; break; case _ENDIAN_BIG: - if (psenc->ch[3]<0xDC || psenc->ch[3]>0xDF) + if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF) goto ilseq; wc |= psenc->ch[3]; wc |= (wchar_t)(psenc->ch[2] & 3) << 8;