On Wed, Aug 08, 2007 at 01:57:34PM -0400, Frédéric Brière wrote:
> I was thinking of giving this issue a try when a little Googling turned
> up a very thorough patch from Phillip Vandry that makes xjdic
> locale-aware: <http://tzone.org/~vandry/xjdic/xjdic-24.locale.patch>.
There's actually a bug in the original patch which can trigger a
segfault on amd64 (due to the size difference between int and size_t).
Here's a corrected version, refreshed to apply to 24-10; I'm also
including a stand-alone patch for the bugfix itself.
diff --git a/xjdfrontend.c b/xjdfrontend.c
index 58cc95f..c4135a3 100644
--- a/xjdfrontend.c
+++ b/xjdfrontend.c
@@ -30,6 +30,15 @@
#include <signal.h>
#include <errno.h>
#include <unistd.h>
+
+#define HAVE_LOCALE
+#ifdef HAVE_LOCALE
+#include <locale.h>
+#include <iconv.h>
+#include <langinfo.h>
+#include <wchar.h>
+#endif
+
#include "xjdic.h"
/* Paul Burchard supplied a patch to provide BSD compatibility for xjdic
@@ -100,7 +109,13 @@ int ShiftJIS = FALSE,NoSJIS=FALSE;
unsigned char instr[256],radkanj[250][2];
int radnos[250];
unsigned char kanatab[NRKANA*2][7];
-int Omode = 0,Smode = 0,Dmode = 0,AKanaMode;
+#ifdef HAVE_LOCALE
+int Omode = 3;
+int new_input_mode = 1;
+#else
+int Omode = 0;
+#endif
+int Smode = 0,Dmode = 0,AKanaMode;
int DRow,DCol,MaxY=MAXLINES,MaxX=MAXCOLS-1,KFlushRes,nok;
unsigned long hittab[NOHITS];
int verblen,DispHit,ksp,hitind,FirstKanj = 0,prieng = FALSE,Extopen=FALSE,NoSkip;
@@ -178,6 +193,13 @@ int RVACTIVE = TRUE;
int DicNum;
long DicLoc;
+#define GETKBSTR_SPACE_AFTER_PROMPT 1
+#define GETKBSTR_ALLOW_HELP 2
+#define GETKBSTR_ALLOW_ROMAJI 4
+#define GETKBSTR_CTRLD 8
+#define GETKBSTR_CTRLZ 16
+#define GETKBSTR_ROMAJI_DEFAULT 32
+
/*====== Prototypes========================================================*/
FILE *xfopen(char *file_name, char *file_mode, int *xfilelen);
@@ -627,6 +649,104 @@ void jis2sjis(unsigned char *p1,unsigned char *p2) /* courtesy of Ken Lunde */
*p2 = c2 + cellOffset;
}
+#ifdef HAVE_LOCALE
+/*
+ * Locale support:
+ *
+ * Because xjdic uses EUC internally extensively, we only convert
+ * from EUC to the locale character set on output and convert to
+ * EUC on input. iconv is used for this. The input code (see
+ * locale_GetKBStr) keeps the entire input string in a wide
+ * character array and convert it to EUC-JP before returning it.
+ * -pkv Tue Sep 23 15:45:59 EDT 2003
+ */
+static char *get_locale_charset(void)
+{
+static int locale_initted = 0;
+static char *locale_charset = NULL;
+
+ if (!locale_initted) {
+ setlocale(LC_CTYPE, "");
+ locale_initted = 1;
+ }
+ if (!locale_charset)
+ locale_charset = nl_langinfo(CODESET);
+
+ return locale_charset;
+}
+
+/*======locale_output (convert EUC to current locale's charset) =======*/
+void locale_output(unsigned int length, unsigned char c1, unsigned char c2, unsigned char c3)
+{
+static int iconv_initted = 0;
+static int conversion_failed = 0;
+static iconv_t descr;
+char *target_charset;
+char inbuf[4];
+char outbuf[64]; /* this better be big enough */
+char *inbuf_p, *outbuf_p;
+size_t inbytesleft, outbytesleft;
+
+ inbuf[0] = c1;
+ inbuf[1] = c2;
+ inbuf[2] = c3;
+ if (conversion_failed) {
+ fwrite(&(inbuf[0]), 1, length, stdout);
+ return;
+ }
+ if (!(target_charset = get_locale_charset())) {
+ fprintf(stderr, "locale does not specify a target charset, using EUC-JP!\n");
+ conversion_failed = 1;
+ locale_output(length, c1, c2, c3);
+ return;
+ }
+
+ if (!strcmp(target_charset, "EUC-JP")) {
+ /* if no conversion is required, then pretent conversion failed. This
+ will cause the data to be passed straight through and may be more
+ efficient than calling iconv with an identity descriptor */
+ conversion_failed = 1;
+ locale_output(length, c1, c2, c3);
+ return;
+ }
+ if (!iconv_initted) {
+ descr = iconv_open(target_charset, "EUC-JP");
+ if (descr == (iconv_t)-1) {
+ fprintf(stderr, "conversion from EUC-JP to %s not supported. using EUC-JP.\n", target_charset);
+ conversion_failed = 1;
+ locale_output(length, c1, c2, c3);
+ return;
+ }
+ iconv_initted = 1;
+ }
+
+ inbuf_p = &(inbuf[0]);
+ outbuf_p = &(outbuf[0]);
+ inbytesleft = length;
+ outbytesleft = sizeof(outbuf);
+ /* The caller is supposed to provide a valid, complete multibyte sequence as */
+ /* input so we will ignore errors concerning invalid input. And if the output */
+ /* buffer is not big enough, let's just fail, so don't check for that either */
+
+ iconv(descr, &inbuf_p, &inbytesleft, &outbuf_p, &outbytesleft);
+
+ fwrite(&(outbuf[0]), 1, outbuf_p - &(outbuf[0]), stdout);
+
+ /* This function might not be called again to output the next character
+ so put the output back into the initial state. This is wasteful for
+ character sets that need to use shift sequences to enter and exit
+ Kanji mode (ISO-2022) but xjdic already has that problem, and besides,
+ this has no effect if the character encoding is something like UTF-8 */
+
+ inbytesleft = 0;
+ outbytesleft = sizeof(outbuf);
+ outbuf_p = &(outbuf[0]);
+ iconv(descr, NULL, &inbytesleft, &outbuf_p, &outbytesleft);
+ if (outbuf_p - &(outbuf[0]))
+ fwrite(&(outbuf[0]), 1, outbuf_p - &(outbuf[0]), stdout);
+}
+#endif /* HAVE_LOCALE */
+
/*====KEOS===End of screen processing for KFlush==================*/
int KEOS (unsigned char *msg)
{
@@ -654,6 +774,7 @@ int KEOS (unsigned char *msg)
int KFlush(unsigned char *msg)
{
unsigned char *kptr,ktemp[512];
+ unsigned char *p;
int retf,it,j;
int Test;
@@ -677,6 +798,20 @@ int KFlush(unsigned char *msg)
strcpy(ktemp,ktemp+1);
}
it = strlen(ktemp);
+
+ /* Look for instances of RVon and RVoff inside the string. */
+ /* These do not consume any columns -pkv */
+ p = ktemp;
+ while (p && (*p) && (p = strstr(p, RVon))) {
+ p += strlen(RVon);
+ it -= strlen(RVon);
+ }
+ p = ktemp;
+ while (p && (*p) && (p = strstr(p, RVoff))) {
+ p += strlen(RVoff);
+ it -= strlen(RVoff);
+ }
+
if (DCol+it < Test)
{
DCol = DCol+it+1;
@@ -690,7 +825,9 @@ int KFlush(unsigned char *msg)
if (!retf) return (FALSE);
}
KOut(ktemp);
- if (DCol <= MAXCOLS) KOut(" ");
+ /* if (DCol <= MAXCOLS) KOut(" "); */
+ /* -pkv */
+ if (DCol <= MaxX) KOut(" ");
kptr = (unsigned char *)strtok(NULL," ");
}
KOut("\n");
@@ -757,6 +894,19 @@ void KOut(unsigned char *sout)
printf("%c%c",c1,c2);
i++;
break;
+
+#ifdef HAVE_LOCALE
+ case 3 : /* locale's character set */
+ if (c1 == 0x8f)
+ {
+ locale_output(3, c1, c2, sout[i+2]);
+ i+=2;
+ break;
+ }
+ locale_output(2, c1, c2, 0);
+ i++;
+ break;
+#endif /* HAVE_LOCALE */
}
}
}
@@ -1949,7 +2099,7 @@ void DoJIS()
/*===== GetKBStr=== Collect ASCII or JIS string from keyboard=========*/
-void GetKBStr(unsigned char *prompt)
+void legacy_GetKBStr(unsigned char *prompt)
{
int ShowIt,escf,bit8,i;
unsigned char c;
@@ -2019,6 +2169,200 @@ void GetKBStr(unsigned char *prompt)
printf("\n\r");
}
+#ifdef HAVE_LOCALE
+char locale_GetKBStr(unsigned char *prompt, const wchar_t *specials, int flags)
+{
+char c;
+char *source_charset;
+iconv_t descr;
+int length = 0;
+int done = 0;
+int i;
+mbstate_t instate;
+size_t result;
+int use_iconv;
+char *convert_buffer;
+wchar_t wbuf[512];
+char *inbuf_p, *outbuf_p;
+size_t inbytesleft, outbytesleft;
+
+ fbuff[0] = 0;
+
+ memset(&instate, 0, sizeof(instate));
+
+ /* the following called setlocale() if it has not been done already */
+ source_charset = get_locale_charset();
+
+ while (!done) {
+ /* See if we can get a character */
+ c = getcharxx();
+ result = mbrtowc(&(wbuf[length]), &c, 1, &instate);
+ if (result == -1) {
+ /* illegal byte sequence */
+ memset(&instate, 0, sizeof(instate)); /* reset state */
+ /* skip byte */
+ continue;
+ } else if (result == -2) {
+ continue;
+ } else if (result == 0) {
+ /* Got NULL character */
+ done = 1;
+ break;
+ } else {
+ if (wcschr(specials, wbuf[length])) {
+ /* XXX this is not a proper cast. It is a bug that I
+ depend on this working */
+ return (char)(wbuf[length]);
+ } else if ((wbuf[length] == L'\n') || (wbuf[length] == L'\r')) {
+ done = 1;
+ break;
+ } else if ((wbuf[length] == L'\004') && (flags & GETKBSTR_CTRLD)) {
+ return 4;
+ } else if ((wbuf[length] == L'\032') && (flags & GETKBSTR_CTRLZ)) {
+ return 26;
+ } else if ((wbuf[length] == L'?') && (flags & GETKBSTR_ALLOW_HELP)) {
+ DRow = 0;
+ for (i = 0; strcmp(Help[i], "$$$")!=0;i++) {
+ strcpy(KLine, Help[i]);
+ if (!KFlush("Continue Help Display? (y/n)")) break;
+ }
+ return 0;
+ } else if ((wbuf[length] == L'@') && (flags & GETKBSTR_ALLOW_ROMAJI)) {
+ DoRomaji('@');
+ GetEUC(fbuff);
+ return 0;
+ } else if ((wbuf[length] == L'#') && (flags & GETKBSTR_ALLOW_ROMAJI)) {
+ DoRomaji('#');
+ GetEUC(fbuff);
+ return 0;
+ } else if ((wbuf[length] == L'\010') || (wbuf[length] == L'\177')) {
+ /* backspace */
+ if (length) length--;
+ wbuf[length] = L'\0';
+
+ printf("\r%s%s%s%s%ls ", RVon, prompt, RVoff, (flags &
+ GETKBSTR_SPACE_AFTER_PROMPT) ?
+ " " : "", wbuf);
+ fflush(stdout);
+ printf("\r%s%s%s%s%ls", RVon, prompt, RVoff, (flags &
+ GETKBSTR_SPACE_AFTER_PROMPT) ? " " : "", wbuf);
+ fflush(stdout);
+ } else if (wbuf[length] == L'\025') {
+ /* line kill */
+ /* send more erase sequences than we have to, to
+ make sure wide characters get erased even on buggy
+ terminals */
+ while (length--) printf("\b\b \b\b");
+ length = 0;
+ printf("\r%s%s%s%s", RVon, prompt, RVoff, (flags &
+ GETKBSTR_SPACE_AFTER_PROMPT) ? " " : "");
+ } else if ((flags & GETKBSTR_ROMAJI_DEFAULT) &&
+ (((wbuf[length] >= L'a') && (wbuf[length] <= L'z')) ||
+ ((wbuf[length] >= L'A') && (wbuf[length] <= L'Z')))) {
+ /* romaji mode by default, and character is a letter */
+ if ((wbuf[length] == L'L') || (wbuf[length] == L'l')) {
+ /* back to normal mode */
+ flags &= ~GETKBSTR_ROMAJI_DEFAULT;
+ } else {
+ ungetc((char)(wbuf[length]), stdin); /* XXX */
+ DoRomaji('@');
+ GetEUC(fbuff);
+ return 0;
+ }
+ } else if (iswprint(wbuf[length])) {
+ wbuf[length+1] = L'\0';
+ printf("%ls", wbuf+(length++));
+ }
+ }
+ }
+
+ if (source_charset && (strcmp(source_charset, "EUC-JP"))) {
+ descr = iconv_open("EUC-JP", source_charset);
+ if (descr == (iconv_t)-1) use_iconv = 0;
+ else use_iconv = 1;
+ } else {
+ use_iconv = 0;
+ }
+
+ convert_buffer = malloc(MB_CUR_MAX+1);
+ if (!convert_buffer) {
+ strcpy(fbuff, ""); /* oops memory */
+ return 0;
+ }
+
+ memset(&instate, 0, sizeof(instate));
+
+ outbuf_p = fbuff;
+ outbytesleft = sizeof(fbuff)-1;
+
+ done = 0;
+ for (i = 0; i < length; i++) {
+ result = wcrtomb(convert_buffer, wbuf[i], &instate);
+
+ if (use_iconv) {
+ inbuf_p = convert_buffer;
+ inbytesleft = result;
+ if (iconv(descr, &inbuf_p, &inbytesleft, &outbuf_p, &outbytesleft) == -1)
+ break;
+ } else {
+ if (!outbytesleft) break;
+ if (outbytesleft >= result) {
+ memcpy(outbuf_p, convert_buffer, result);
+ outbuf_p += result;
+ outbytesleft -= result;
+ } else {
+ break;
+ }
+ }
+ }
+ *outbuf_p = 0;
+
+ if (use_iconv) iconv_close(descr);
+
+ return 0;
+}
+
+/* output in fbuff */
+void convert_to_euc(char *in, char *out, size_t outlen)
+{
+iconv_t descr;
+int use_iconv;
+char *source_charset;
+char *inbuf_p;
+size_t inbytesleft;
+
+ source_charset = get_locale_charset();
+ if (source_charset && (strcmp(source_charset, "EUC-JP"))) {
+ descr = iconv_open("EUC-JP", source_charset);
+ if (descr == (iconv_t)-1) use_iconv = 0;
+ else use_iconv = 1;
+ } else {
+ use_iconv = 0;
+ }
+ if (!use_iconv) {
+ strncpy(out, in, outlen);
+ return;
+ }
+
+ inbytesleft = strlen(in);
+ iconv(descr, &in, &inbytesleft, &out, &outlen);
+ *out = 0;
+ iconv_close(descr);
+ return;
+}
+#endif /* HAVE_LOCALE */
+
+void GetKBStr(unsigned char *prompt)
+{
+#ifdef HAVE_LOCALE
+ if (new_input_mode)
+ locale_GetKBStr(prompt, L"", 0);
+ else
+#endif
+ legacy_GetKBStr(prompt);
+ printf("\r\n");
+}
+
/*===== OneShot === Collect and set single filter=============*/
void OneShot()
@@ -2690,6 +3034,13 @@ main(int argc,char **argv)
Omode = 1;
printf("Output mode set to EUC\n");
}
+#ifdef HAVE_LOCALE
+ if (strtmp[0] == 'l')
+ {
+ Omode = 3;
+ printf("Output mode set to locale dependant\n");
+ }
+#endif
continue;
}
#ifdef XJDCLSERV
@@ -2802,6 +3153,16 @@ main(int argc,char **argv)
NoSJIS = TRUE;
printf("EUC (No Shift-JIS) operation enforced\n");
}
+ if ((xap[0] == '-') && (xap[1] == 'O'))
+ {
+#ifdef HAVE_LOCALE
+ new_input_mode = 0;
+ if (Omode == 3) Omode = 0;
+ printf("Legacy input/output mode selected (no locale support)\n");
+#else
+ printf("Locale support not compiled; -O ignored\n");
+#endif
+ }
if ((xap[0] == '-') && (xap[1] == 'v'))
{
Jverb = FALSE;
@@ -2853,7 +3214,7 @@ exit(0);
{
GetWinSize(); /* Just in case the screen has changed */
sprintf(kbprompt,"%sXJDIC [%d:%s] SEARCH KEY:%s ",RVon,CurrDic,DicName(CurrDic),RVoff);
- sprintf(kbprompt2,"XJDIC [%d:%s] SEARCH KEY: ",CurrDic,DicName(CurrDic));
+ sprintf(kbprompt2,"XJDIC [%d:%s] SEARCH KEY:",CurrDic,DicName(CurrDic));
if (GDmode)
{
sprintf(kbprompt,"%sXJDIC [GLOBAL] SEARCH KEY:%s ",RVon,RVoff);
@@ -2862,6 +3223,9 @@ exit(0);
printf("\n\r%s",kbprompt);
c = 0;
cmdmode = FALSE;
+#ifdef HAVE_LOCALE
+if (!new_input_mode) {
+#endif
strf = FALSE;
escf = FALSE;
bit8 = FALSE;
@@ -2970,8 +3334,33 @@ exit(0);
if ((instr[i] == 'B')&&(instr[i-1] == '(')&&(instr[i-2] == 0x1b)) break;
}
fseek(stdin,0L,SEEK_END); /*kill any leftovers*/
+ GetEUC(fbuff);
+#ifdef HAVE_LOCALE
+} else { /* if (!new_input_mode) */
+ /* new locale based code */
+ if (!clipmode) {
+ c = locale_GetKBStr(kbprompt2, L"!{}$%*&^=/-:\'+\\;][|_`",
+ GETKBSTR_SPACE_AFTER_PROMPT|GETKBSTR_ALLOW_HELP|
+ ((KImode == 0) ? GETKBSTR_ROMAJI_DEFAULT : 0)|
+ GETKBSTR_ALLOW_ROMAJI|GETKBSTR_CTRLD|GETKBSTR_CTRLZ);
+
+ if (c > 0) cmdmode = TRUE;
+ if (c == 4) {
+ cbreakoff();
+ exit(0);
+ } else if (c == 26) {
+ cbreakoff();
+ printf("\nSuspending XJDIC. Type `fg' to resume.\n");
+ pid = getpid();
+ kill(pid,sig);
+ cbreakon();
+ cmdmode = FALSE;
+ }
+ }
+}
+#endif /* HAVE_LOCALE */
/* "bye" is the end of the run */
- if ((instr[2] == 'e')&&(instr[1] == 'y')&&(instr[0] == 'b'))
+ if ((fbuff[2] == 'e')&&(fbuff[1] == 'y')&&(fbuff[0] == 'b'))
{
cbreakoff();
exit(0);
@@ -2986,7 +3375,7 @@ exit(0);
clipmode = TRUE;
continue;
}
- if (c == '}') /* matching { */
+ if /* { */ (c == '}')
{
printf("\r \r");
RVtoggle();
@@ -3192,6 +3581,7 @@ exit(0);
DoKANJI();
break;
}
+ continue;
}
if (clipmode)
{
@@ -3210,12 +3600,6 @@ exit(0);
fgets(clipstring1,50,fclip);
fclose(fclip);
if (clipstring1[strlen(clipstring1)-1] < 32) clipstring1[strlen(clipstring1)-1] = 0;
- if (strcmp(clipstring1,"quit") == 0)
- {
- clipmode = FALSE;
- printf("\nLeaving Clipboard mode\n");
- break;
- }
if (strcmp(clipstring1,clipstring2) == 0)
{
continue;
@@ -3224,13 +3608,30 @@ exit(0);
{
strcpy(clipstring2,clipstring1);
strcpy(instr,clipstring1);
+#ifdef HAVE_LOCALE
+ if (new_input_mode)
+ convert_to_euc(instr, fbuff, sizeof(fbuff));
+ else
+#endif
+ GetEUC(fbuff);
+ if (strcmp(fbuff, "quit") == 0) {
+ clipmode = FALSE;
+ printf("\nLeaving Clipboard mode\n");
+ fbuff[0] = 0;
+ break;
+ }
break;
}
}
}
- if(strlen(instr) < 2) continue;
- GetEUC(fbuff);
- if (escf) KOut(fbuff);
+#ifdef HAVE_LOCALE
+ if (!new_input_mode) {
+#endif
+ if (escf) KOut(fbuff);
+#ifdef HAVE_LOCALE
+ }
+#endif
+ if(strlen(fbuff) < 2) continue;
snprintf(tempout,sizeof(tempout),"\nSearching for: %s%s%s\n",RVon,fbuff,RVoff);
KOut(tempout);
Dmode = 0;
diff --git a/xjdic.1 b/xjdic.1
index 3a812ac..4de9f15 100644
--- a/xjdic.1
+++ b/xjdic.1
@@ -115,11 +115,22 @@ specify a dictionary file to use (up to 9 may be specified.)
specify a kanji data file to use.
.At
-.B -j j/e/s
+.B -j j/e/s/l
[CL,SA]
.Ap
Specify the output coding for Japanese text (j=JIS, e=EUC, s=Shift-JIS)
+l=Locale based output. Output will be according to the character set
+specified by the current system locale.
+
+.At
+.B -O
+[CL,SA]
+.Ap
+Request the old input code. Also selects -j j (which controls output) unless
+overridden. The old code does not respect the current locale but it does
+EUC/JIS detection on input.
+
.At
.B -P port_no
[CL,SV]
commit 47f2736e9114fc1ac9e7074181374b56ee29f20c
Author: Frédéric Brière <[email protected]>
Date: Tue Oct 27 11:30:11 2015 -0400
iconv() requires size_t, not int
diff --git a/xjdfrontend.c b/xjdfrontend.c
index 3b3e86d..200a065 100644
--- a/xjdfrontend.c
+++ b/xjdfrontend.c
@@ -2162,7 +2162,7 @@ int use_iconv;
char *convert_buffer;
wchar_t wbuf[512];
char *inbuf_p, *outbuf_p;
-int inbytesleft, outbytesleft;
+size_t inbytesleft, outbytesleft;
fbuff[0] = 0;
@@ -2301,13 +2301,13 @@ int inbytesleft, outbytesleft;
}
/* output in fbuff */
-void convert_to_euc(char *in, char *out, int outlen)
+void convert_to_euc(char *in, char *out, size_t outlen)
{
iconv_t descr;
int use_iconv;
char *source_charset;
char *inbuf_p;
-int inbytesleft;
+size_t inbytesleft;
source_charset = get_locale_charset();
if (source_charset && (strcmp(source_charset, "EUC-JP"))) {