Package: jless
Version: 382-iso262-2
Severity: wishlist
Tags: patch
Please apply the UTF-8 patch.
less-382-iso262-utf8.dpatch: jless-utf8.patch
less-382-iso262-utf8-2.dpatch: bug fix (assert with binary file)
Original:
http://nijino.homelinux.net/diary/200605.shtml
http://nijino.homelinux.net/tmp/jless-utf8.patch
ITO Keisuke
#! /bin/sh /usr/share/dpatch/dpatch-run
## less-382-iso262-utf8.dpatch
##
## All lines beginning with `## DP:' are a description of the patch.
## DP: No description.
@DPATCH@
diff -urNad jless-382-iso262~/charset.c jless-382-iso262/charset.c
--- jless-382-iso262~/charset.c 2010-07-04 12:43:53.000000000 +0900
+++ jless-382-iso262/charset.c 2010-07-04 12:44:58.000000000 +0900
@@ -130,6 +130,11 @@
/* recoginize all JIS except supplement */
SCSASCII | SCSALLSJIS,
ESISO7, ESALLJA, ESSJIS },
+#ifdef ARI_UTF8
+ { "japanese-utf-8", NULL, "8bcccb4c11bc4b95.b127.b",
+ SCSASCII | SCSALLJIS,
+ ESISO7, ESALLJA, ESUTF8 },
+#endif
/* read all KANJI before 1983 */
{ "japanese83-iso7", NULL, "8bcccb4c11bc4b95.b127.b",
diff -urNad jless-382-iso262~/multi.c jless-382-iso262/multi.c
--- jless-382-iso262~/multi.c 2010-07-04 12:43:53.000000000 +0900
+++ jless-382-iso262/multi.c 2010-07-04 12:44:58.000000000 +0900
@@ -753,6 +753,8 @@
mp->startpos = mp->lastpos + 1;
return 1;
}
+#ifdef ARI_UTF8
+#else
} else if (ISUJISKANJI(c0, c1)) {
if (mp->io.scs & SCSJISX0213_2004) {
mp->icharset = UJIS2004;
@@ -785,6 +787,7 @@
}
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
+#endif
} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) {
/* do nothing. return 1 to get next byte */
return 1;
@@ -1087,28 +1090,28 @@
return;
}
}
- if (mp->io.inputr & ESUJIS) {
+ if (mp->io.inputr & ESUTF8) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 4) {
- if (mp->io.inputr & ESUJIS) {
+ if (mp->io.inputr & ESUTF8) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 5) {
- if (mp->io.inputr & ESUJIS) {
+ if (mp->io.inputr & ESUTF8) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 6) {
- if (mp->io.inputr & ESUJIS) {
+ if (mp->io.inputr & ESUTF8) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
@@ -2276,6 +2279,36 @@
}
#endif
+#ifdef ARI_UTF8
+#include <iconv.h>
+#ifndef ICONV_EUC_JP
+#define ICONV_EUC_JP ("eucJP")
+#endif
+//#ifndef ICONV_SHIFT_JIS
+//#define ICONV_SHIFT_JIS ("ms932")
+//#endif
+
+static int iconv_to_utf8(const char* fromcode)
+{
+ iconv_t cd = iconv_open("UTF-8", fromcode);
+ assert(cd != (iconv_t)(-1));
+
+ size_t inbytesleft = strlen(cvbuffer);
+ char outbuf[4];
+ size_t outbytesleft = 4;
+ char* inptr = cvbuffer;
+ char* outptr = outbuf;
+ size_t ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
+ iconv_close(cd);
+ if (ret == (iconv_t)(-1))
+ {
+ return FALSE;
+ }
+ assert(inbytesleft == 0);
+ strncpy(cvbuffer, outbuf, 4 - outbytesleft);
+ return TRUE;
+}
+#endif
static char *convert_to_utf8(c, cs)
int c;
int cs;
@@ -2296,12 +2329,46 @@
cs = CS2CHARSET(cs);
+#ifndef ARI_UTF8
assert(0);
+#endif
if (cs == ASCII || cs == JISX0201ROMAN)
{
assert(cvindex == 1);
cvindex = 0;
return (cvbuffer);
+#ifdef ARI_UTF8
+ } else if (cs == UTF8)
+ {
+ cvindex = 0;
+ return (cvbuffer);
+#ifdef ICONV_SHIFT_JIS
+ } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
+ cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
+ cs == JISX02132004KANJI1)
+ {
+ cvindex--;
+ if (convert_to_sjis(c, cs) == nullcvbuffer)
+ return (nullcvbuffer);
+ iconv_to_utf8(ICONV_SHIFT_JIS);
+ return (cvbuffer);
+ } else if (cs == JISX0201KANA || cs == JISX02132004KANJI2 ||
+ cs == JISX0212KANJISUP)
+#else
+ } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
+ cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
+ cs == JISX02132004KANJI1 ||
+ cs == JISX0201KANA || cs == JISX02132004KANJI2 ||
+ cs == JISX0212KANJISUP)
+#endif
+ {
+ cvindex--;
+ if (convert_to_ujis(c, cs) == nullcvbuffer)
+ return (nullcvbuffer);
+ iconv_to_utf8(ICONV_EUC_JP);
+ return (cvbuffer);
+ }
+#else
} else if (cs == JISX0201KANA)
{
assert(cvindex == 1);
@@ -2324,6 +2391,7 @@
cvindex = 0;
return (nullcvbuffer);
}
+#endif
assert(0);
cvindex = 0;
return (cvbuffer);
#! /bin/sh /usr/share/dpatch/dpatch-run
## less-382-iso262-utf8-2.dpatch
##
## All lines beginning with `## DP:' are a description of the patch.
## DP: No description.
@DPATCH@
diff -urNad jless-382-iso262~/charset.c jless-382-iso262/charset.c
--- jless-382-iso262~/charset.c 2010-07-18 01:28:13.000000000 +0900
+++ jless-382-iso262/charset.c 2010-07-18 01:28:31.000000000 +0900
@@ -130,11 +130,9 @@
/* recoginize all JIS except supplement */
SCSASCII | SCSALLSJIS,
ESISO7, ESALLJA, ESSJIS },
-#ifdef ARI_UTF8
{ "japanese-utf-8", NULL, "8bcccb4c11bc4b95.b127.b",
SCSASCII | SCSALLJIS,
ESISO7, ESALLJA, ESUTF8 },
-#endif
/* read all KANJI before 1983 */
{ "japanese83-iso7", NULL, "8bcccb4c11bc4b95.b127.b",
diff -urNad jless-382-iso262~/multi.c jless-382-iso262/multi.c
--- jless-382-iso262~/multi.c 2010-07-18 01:28:13.000000000 +0900
+++ jless-382-iso262/multi.c 2010-07-18 01:30:12.000000000 +0900
@@ -753,41 +753,6 @@
mp->startpos = mp->lastpos + 1;
return 1;
}
-#ifdef ARI_UTF8
-#else
- } else if (ISUJISKANJI(c0, c1)) {
- if (mp->io.scs & SCSJISX0213_2004) {
- mp->icharset = UJIS2004;
- mp->cs = JISX02132004KANJI1;
- } else if (mp->io.scs & SCSJISX0213_2000) {
- mp->icharset = UJIS2000;
- mp->cs = JISX0213KANJI1;
- } else {
- mp->icharset = UJIS;
- mp->cs = JISX0208KANJI;
- }
- mp->multiint[mp->intindex] = c0;
- mp->multics[mp->intindex] = mp->icharset;
- mp->multiint[mp->intindex + 1] = c1;
- mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset;
-
- /* Check character whether it has defined glyph or not */
- if (chisvalid_cs(&mp->multiint[mp->intindex],
- &mp->multics[mp->intindex])) {
- /* defined */
- mp->multiint[mp->intindex] = c0 & 0x7f;
- mp->multics[mp->intindex] = mp->cs;
- mp->multiint[mp->intindex + 1] = c1 & 0x7f;
- mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
- mp->intindex += 2;
- mp->startpos = mp->lastpos + 1;
- } else {
- /* undefined. less ignore them */
- wrongchar(mp);
- }
- /* data are recognized as kanji or wrong data, so return 1 */
- return 1;
-#endif
} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) {
/* do nothing. return 1 to get next byte */
return 1;
@@ -2279,14 +2244,13 @@
}
#endif
-#ifdef ARI_UTF8
#include <iconv.h>
#ifndef ICONV_EUC_JP
-#define ICONV_EUC_JP ("eucJP")
+#define ICONV_EUC_JP ("EUCJP-MS")
+#endif
+#ifndef ICONV_SHIFT_JIS
+#define ICONV_SHIFT_JIS ("ms932")
#endif
-//#ifndef ICONV_SHIFT_JIS
-//#define ICONV_SHIFT_JIS ("ms932")
-//#endif
static int iconv_to_utf8(const char* fromcode)
{
@@ -2308,7 +2272,6 @@
strncpy(cvbuffer, outbuf, 4 - outbytesleft);
return TRUE;
}
-#endif
static char *convert_to_utf8(c, cs)
int c;
int cs;
@@ -2329,20 +2292,15 @@
cs = CS2CHARSET(cs);
-#ifndef ARI_UTF8
- assert(0);
-#endif
if (cs == ASCII || cs == JISX0201ROMAN)
{
assert(cvindex == 1);
cvindex = 0;
return (cvbuffer);
-#ifdef ARI_UTF8
} else if (cs == UTF8)
{
cvindex = 0;
return (cvbuffer);
-#ifdef ICONV_SHIFT_JIS
} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
cs == JISX02132004KANJI1)
@@ -2354,13 +2312,6 @@
return (cvbuffer);
} else if (cs == JISX0201KANA || cs == JISX02132004KANJI2 ||
cs == JISX0212KANJISUP)
-#else
- } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
- cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
- cs == JISX02132004KANJI1 ||
- cs == JISX0201KANA || cs == JISX02132004KANJI2 ||
- cs == JISX0212KANJISUP)
-#endif
{
cvindex--;
if (convert_to_ujis(c, cs) == nullcvbuffer)
@@ -2368,31 +2319,6 @@
iconv_to_utf8(ICONV_EUC_JP);
return (cvbuffer);
}
-#else
- } else if (cs == JISX0201KANA)
- {
- assert(cvindex == 1);
- cvbuffer[0] |= 0x80;
- cvindex = 0;
- return (cvbuffer);
- } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
- cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
- cs == JISX02132004KANJI1)
- {
- cvindex = 0;
- return (cvbuffer);
- } else if (cs == JISX0213KANJI2)
- {
- cvindex = 0;
- return (cvbuffer);
- } else if (cs == UTF8)
- {
- /* ? */
- cvindex = 0;
- return (nullcvbuffer);
- }
-#endif
- assert(0);
cvindex = 0;
return (cvbuffer);
}