Hi Ludovic, Jim!

I was thinking of giving this issue a try when a little Googling turned
up a very thorough patch from Phillip Vandry that makes xjdic
locale-aware: <http://tzone.org/~vandry/xjdic/xjdic-24.locale.patch>.
(Turns out I was wrong in assuming this wouldn't be too hard to do.)

This patch also includes some cleanup and bugfixes, which you may or may
not feel comfortable with.  I thought of splitting it in two, but it's
basically one big patch over one big file, so this is not easily doable.

I'm attaching a version that's slightly modified to apply to 24-6.  I'm
also attaching a cheap hack I wrote to disable locale support when
TERM=kterm, in an attempt to have xjdic behave correctly when started in
a kterm when LANG is still set to a legacy encoding like latin-1.  (As I
used to do until recently.)

After applying both patches (in order), all that remains is to pass
-DHAVE_LOCALE to gcc.  No further build-deps are required.


-- 
< nobse> bleh... last night I had a dream... someone NMU'ed vim...
         nightmare
                -- in #debian-devel
--- xjdfrontend.c	2003/09/14 15:25:30	1.1
+++ xjdfrontend.c	2004/04/25 17:19:18
@@ -26,7 +26,15 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <errno.h>
 #include <unistd.h>
+
+#ifdef HAVE_LOCALE
+#include <locale.h>
+#include <iconv.h>
+#include <langinfo.h>
+#include <wchar.h>
+#endif
+
 #include "xjdic.h"
 
 /*    Paul Burchard supplied a patch to provide BSD compatibility for xjdic
@@ -90,7 +99,13 @@
 unsigned char instr[256],radkanj[250][2];
 int radnos[250];
 unsigned char kanatab[NRKANA*2][7];
-int Omode = 0,Smode = 0,Dmode = 0,AKanaMode;
+#ifdef HAVE_LOCALE
+int Omode = 3;
+int new_input_mode = 1;
+#else
+int Omode = 0;
+#endif
+int Smode = 0,Dmode = 0,AKanaMode;
 int DRow,DCol,MaxY=MAXLINES,MaxX=MAXCOLS-1,KFlushRes,nok;
 unsigned long hittab[NOHITS];
 int verblen,DispHit,ksp,hitind,FirstKanj = 0,prieng = FALSE,Extopen=FALSE,NoSkip;
@@ -168,6 +183,13 @@
 int DicNum;
 long DicLoc;
 
+#define GETKBSTR_SPACE_AFTER_PROMPT 1
+#define GETKBSTR_ALLOW_HELP 2
+#define GETKBSTR_ALLOW_ROMAJI 4
+#define GETKBSTR_CTRLD 8
+#define GETKBSTR_CTRLZ 16
+#define GETKBSTR_ROMAJI_DEFAULT 32
+
 /*====== Prototypes========================================================*/
 
 FILE  *xfopen(char *file_name, char *file_mode, int *xfilelen);
@@ -605,6 +627,104 @@
     *p2 = c2 + cellOffset;
 }
 
+#ifdef HAVE_LOCALE
+/*
+ * Locale support:
+ *
+ * Because xjdic uses EUC internally extensively, we only convert
+ * from EUC to the locale character set on output and convert to
+ * EUC on input. iconv is used for this. The input code (see
+ * locale_GetKBStr) keeps the entire input string in a wide
+ * character array and convert it to EUC-JP before returning it.
+ * -pkv Tue Sep 23 15:45:59 EDT 2003
+ */
+static char *get_locale_charset(void)
+{
+static int locale_initted = 0;
+static char *locale_charset = NULL;
+
+	if (!locale_initted) {
+		setlocale(LC_CTYPE, "");
+		locale_initted = 1;
+	}
+	if (!locale_charset)
+		locale_charset = nl_langinfo(CODESET);
+
+	return locale_charset;
+}
+
+/*======locale_output  (convert EUC to current locale's charset) =======*/
+void locale_output(unsigned int length, unsigned char c1, unsigned char c2, unsigned char c3)
+{
+static int iconv_initted = 0;
+static int conversion_failed = 0;
+static iconv_t descr;
+char *target_charset;
+char inbuf[4];
+char outbuf[64]; /* this better be big enough */
+char *inbuf_p, *outbuf_p;
+size_t inbytesleft, outbytesleft;
+
+	inbuf[0] = c1;
+	inbuf[1] = c2;
+	inbuf[2] = c3;
+	if (conversion_failed) {
+		fwrite(&(inbuf[0]), 1, length, stdout);
+		return;
+	}
+	if (!(target_charset = get_locale_charset())) {
+		fprintf(stderr, "locale does not specify a target charset, using EUC-JP!\n");
+		conversion_failed = 1;
+		locale_output(length, c1, c2, c3);
+		return;
+	}
+
+	if (!strcmp(target_charset, "EUC-JP")) {
+		/* if no conversion is required, then pretent conversion failed. This
+		   will cause the data to be passed straight through and may be more
+		   efficient than calling iconv with an identity descriptor */
+		conversion_failed = 1;
+		locale_output(length, c1, c2, c3);
+		return;
+	}
+	if (!iconv_initted) {
+		descr = iconv_open(target_charset, "EUC-JP");
+		if (descr == (iconv_t)-1) {
+			fprintf(stderr, "conversion from EUC-JP to %s not supported. using EUC-JP.\n", target_charset);
+			conversion_failed = 1;
+			locale_output(length, c1, c2, c3);
+			return;
+		}
+		iconv_initted = 1;
+	}
+
+	inbuf_p = &(inbuf[0]);
+	outbuf_p = &(outbuf[0]);
+	inbytesleft = length;
+	outbytesleft = sizeof(outbuf);
+	/* The caller is supposed to provide a valid, complete multibyte sequence as */
+	/* input so we will ignore errors concerning invalid input. And if the output */
+	/* buffer is not big enough, let's just fail, so don't check for that either */
+
+	iconv(descr, &inbuf_p, &inbytesleft, &outbuf_p, &outbytesleft);
+
+	fwrite(&(outbuf[0]), 1, outbuf_p - &(outbuf[0]), stdout);
+
+	/* This function might not be called again to output the next character
+	   so put the output back into the initial state. This is wasteful for
+	   character sets that need to use shift sequences to enter and exit
+	   Kanji mode (ISO-2022) but xjdic already has that problem, and besides,
+	   this has no effect if the character encoding is something like UTF-8 */
+
+	inbytesleft = 0;
+	outbytesleft = sizeof(outbuf);
+	outbuf_p = &(outbuf[0]);
+	iconv(descr, NULL, &inbytesleft, &outbuf_p, &outbytesleft);
+	if (outbuf_p - &(outbuf[0]))
+		fwrite(&(outbuf[0]), 1, outbuf_p - &(outbuf[0]), stdout);
+}
+#endif /* HAVE_LOCALE */
+
 /*====KEOS===End of screen processing for KFlush==================*/
 int KEOS (unsigned char *msg)
 {
@@ -632,6 +752,7 @@
 int KFlush(unsigned char *msg)
 {
 	unsigned char *kptr,ktemp[512];
+	unsigned char *p;
 	int retf,it,j;
 	int Test;
 
@@ -655,6 +776,20 @@
 			strcpy(ktemp,ktemp+1);
 		}
 		it = strlen(ktemp);
+
+		/* Look for instances of RVon and RVoff inside the string. */
+		/* These do not consume any columns -pkv */
+		p = ktemp;
+		while (p && (*p) && (p = strstr(p, RVon))) {
+			p += strlen(RVon);
+			it -= strlen(RVon);
+		}
+		p = ktemp;
+		while (p && (*p) && (p = strstr(p, RVoff))) {
+			p += strlen(RVoff);
+			it -= strlen(RVoff);
+		}
+
 		if (DCol+it < Test)
 		{
 			DCol = DCol+it+1;
@@ -668,7 +803,9 @@
 			if (!retf) return (FALSE);
 		}
 		KOut(ktemp);
-		if (DCol <= MAXCOLS) KOut(" ");
+		/* if (DCol <= MAXCOLS) KOut(" "); */
+		/* -pkv */
+		if (DCol <= MaxX) KOut(" ");
 		kptr = (unsigned char *)strtok(NULL," ");
 	}
 	KOut("\n");
@@ -735,6 +872,19 @@
 			printf("%c%c",c1,c2);
 			i++;
 			break;
+
+#ifdef HAVE_LOCALE
+		case 3 : /* locale's character set */
+			if (c1 == 0x8f)
+			{
+				locale_output(3, c1, c2, sout[i+2]);
+				i+=2;
+				break;
+			}
+			locale_output(2, c1, c2, 0);
+			i++;
+			break;
+#endif /* HAVE_LOCALE */
 		}
 	}
 }
@@ -1927,7 +2077,7 @@
 
 /*=====  GetKBStr=== Collect ASCII or JIS string from keyboard=========*/
 
-void GetKBStr(unsigned char *prompt)
+void legacy_GetKBStr(unsigned char *prompt)
 {
 	int ShowIt,escf,bit8,i;
 	unsigned char c;
@@ -1997,6 +2147,200 @@
 	printf("\n\r");
 }
 
+#ifdef HAVE_LOCALE
+char locale_GetKBStr(unsigned char *prompt, const wchar_t *specials, int flags)
+{
+char c;
+char *source_charset;
+iconv_t descr;
+int length = 0;
+int done = 0;
+int i;
+mbstate_t instate;
+size_t result;
+int use_iconv;
+char *convert_buffer;
+wchar_t wbuf[512];
+char *inbuf_p, *outbuf_p;
+int inbytesleft, outbytesleft;
+
+	fbuff[0] = 0;
+
+	memset(&instate, 0, sizeof(instate));
+
+	/* the following called setlocale() if it has not been done already */
+	source_charset = get_locale_charset();
+
+	while (!done) {
+		/* See if we can get a character */
+		c = getcharxx();
+		result = mbrtowc(&(wbuf[length]), &c, 1, &instate);
+		if (result == -1) {
+			/* illegal byte sequence */
+			memset(&instate, 0, sizeof(instate)); /* reset state */
+			/* skip byte */
+			continue;
+		} else if (result == -2) {
+			continue;
+		} else if (result == 0) {
+			/* Got NULL character */
+			done = 1;
+			break;
+		} else {
+			if (wcschr(specials, wbuf[length])) {
+				/* XXX this is not a proper cast. It is a bug that I
+				   depend on this working */
+				return (char)(wbuf[length]);
+			} else if ((wbuf[length] == L'\n') || (wbuf[length] == L'\r')) {
+				done = 1;
+				break;
+			} else if ((wbuf[length] == L'\004') && (flags & GETKBSTR_CTRLD)) {
+				return 4;
+			} else if ((wbuf[length] == L'\032') && (flags & GETKBSTR_CTRLZ)) {
+				return 26;
+			} else if ((wbuf[length] == L'?') && (flags & GETKBSTR_ALLOW_HELP)) {
+				DRow = 0;
+				for (i = 0; strcmp(Help[i], "$$$")!=0;i++) {
+					strcpy(KLine, Help[i]);
+					if (!KFlush("Continue Help Display? (y/n)")) break;
+				}
+				return 0;
+			} else if ((wbuf[length] == L'@') && (flags & GETKBSTR_ALLOW_ROMAJI)) {
+				DoRomaji('@');
+				GetEUC(fbuff);
+				return 0;
+			} else if ((wbuf[length] == L'#') && (flags & GETKBSTR_ALLOW_ROMAJI)) {
+				DoRomaji('#');
+				GetEUC(fbuff);
+				return 0;
+			} else if ((wbuf[length] == L'\010') || (wbuf[length] == L'\177')) {
+				/* backspace */
+				if (length) length--;
+				wbuf[length] = L'\0';
+
+				printf("\r%s%s%s%s%ls  ", RVon, prompt, RVoff, (flags &
+					GETKBSTR_SPACE_AFTER_PROMPT) ?
+					" " : "", wbuf);
+				fflush(stdout);
+				printf("\r%s%s%s%s%ls", RVon, prompt, RVoff, (flags &
+					GETKBSTR_SPACE_AFTER_PROMPT) ? " " : "", wbuf);
+				fflush(stdout);
+			} else if (wbuf[length] == L'\025') {
+				/* line kill */
+				/* send more erase sequences than we have to, to
+				   make sure wide characters get erased even on buggy
+				   terminals */
+				while (length--) printf("\b\b  \b\b");
+				length = 0;
+				printf("\r%s%s%s%s", RVon, prompt, RVoff, (flags &
+					GETKBSTR_SPACE_AFTER_PROMPT) ? " " : "");
+			} else if ((flags & GETKBSTR_ROMAJI_DEFAULT) &&
+				(((wbuf[length] >= L'a') && (wbuf[length] <= L'z')) ||
+				((wbuf[length] >= L'A') && (wbuf[length] <= L'Z')))) {
+					/* romaji mode by default, and character is a letter */
+					if ((wbuf[length] == L'L') || (wbuf[length] == L'l')) {
+						/* back to normal mode */
+						flags &= ~GETKBSTR_ROMAJI_DEFAULT;
+					} else {
+						ungetc((char)(wbuf[length]), stdin); /* XXX */
+						DoRomaji('@');
+						GetEUC(fbuff);
+						return 0;
+					}
+			} else if (iswprint(wbuf[length])) {
+				wbuf[length+1] = L'\0';
+				printf("%ls", wbuf+(length++));
+			}
+		}
+	}
+
+	if (source_charset && (strcmp(source_charset, "EUC-JP"))) {
+		descr = iconv_open("EUC-JP", source_charset);
+		if (descr == (iconv_t)-1) use_iconv = 0;
+		else use_iconv = 1;
+	} else {
+		use_iconv = 0;
+	}
+
+	convert_buffer = malloc(MB_CUR_MAX+1);
+	if (!convert_buffer) {
+		strcpy(fbuff, "");	/* oops memory */
+		return 0;
+	}
+
+	memset(&instate, 0, sizeof(instate));
+
+	outbuf_p = fbuff;
+	outbytesleft = sizeof(fbuff)-1;
+
+	done = 0;
+	for (i = 0; i < length; i++) {
+		result = wcrtomb(convert_buffer, wbuf[i], &instate);
+
+		if (use_iconv) {
+			inbuf_p = convert_buffer;
+			inbytesleft = result;
+			if (iconv(descr, &inbuf_p, &inbytesleft, &outbuf_p, &outbytesleft) == -1)
+				break;
+		} else {
+			if (!outbytesleft) break;
+			if (outbytesleft >= result) {
+				memcpy(outbuf_p, convert_buffer, result);
+				outbuf_p += result;
+				outbytesleft -= result;
+			} else {
+				break;
+			}
+		}
+	}
+	*outbuf_p = 0;
+
+	if (use_iconv) iconv_close(descr);
+
+	return 0;
+}
+
+/* output in fbuff */
+void convert_to_euc(char *in, char *out, int outlen)
+{
+iconv_t descr;
+int use_iconv;
+char *source_charset;
+char *inbuf_p;
+int inbytesleft;
+
+	source_charset = get_locale_charset();
+	if (source_charset && (strcmp(source_charset, "EUC-JP"))) {
+		descr = iconv_open("EUC-JP", source_charset);
+		if (descr == (iconv_t)-1) use_iconv = 0;
+		else use_iconv = 1;
+	} else {
+		use_iconv = 0;
+	}
+	if (!use_iconv) {
+		strncpy(out, in, outlen);
+		return;
+	}
+
+	inbytesleft = strlen(in);
+	iconv(descr, &in, &inbytesleft, &out, &outlen);
+	*out = 0;
+	iconv_close(descr);
+	return;
+}
+#endif /* HAVE_LOCALE */
+
+void GetKBStr(unsigned char *prompt)
+{
+#ifdef HAVE_LOCALE
+	if (new_input_mode)
+		locale_GetKBStr(prompt, L"", 0);
+	else
+#endif
+		legacy_GetKBStr(prompt);
+	printf("\r\n");
+}
+
 /*=====  OneShot === Collect and set single filter=============*/
 
 void OneShot()
@@ -2669,6 +3013,13 @@
 					Omode = 1;
 					printf("Output mode set to EUC\n");
 				}
+#ifdef HAVE_LOCALE
+				if (strtmp[0] == 'l')
+				{
+					Omode = 3;
+					printf("Output mode set to locale dependant\n");
+				}
+#endif
 				continue;
 			}
 #ifdef XJDCLSERV
@@ -2776,6 +3127,16 @@
 				NoSJIS = TRUE;
 				printf("EUC (No Shift-JIS) operation enforced\n");
 			}
+			if ((xap[0] == '-') && (xap[1] == 'O'))
+			{
+#ifdef HAVE_LOCALE
+				new_input_mode = 0;
+				if (Omode == 3) Omode = 0;
+				printf("Legacy input/output mode selected (no locale support)\n");
+#else
+				printf("Locale support not compiled; -O ignored\n");
+#endif
+			}
 			if ((xap[0] == '-') && (xap[1] == 'v'))
 			{
 				Jverb = FALSE;
@@ -2827,7 +3188,7 @@
 	{
 		GetWinSize(); /* Just in case the screen has changed  */
 		sprintf(kbprompt,"%sXJDIC [%d:%s] SEARCH KEY:%s ",RVon,CurrDic,DicName(CurrDic),RVoff);
-		sprintf(kbprompt2,"XJDIC [%d:%s] SEARCH KEY: ",CurrDic,DicName(CurrDic));
+		sprintf(kbprompt2,"XJDIC [%d:%s] SEARCH KEY:",CurrDic,DicName(CurrDic));
 		if (GDmode)
 		{
 			sprintf(kbprompt,"%sXJDIC [GLOBAL] SEARCH KEY:%s ",RVon,RVoff);
@@ -2836,6 +3197,9 @@
 		printf("\n\r%s",kbprompt);
 		c = 0;
 		cmdmode = FALSE;
+#ifdef HAVE_LOCALE
+if (!new_input_mode) {
+#endif
 		strf = FALSE;
 		escf = FALSE;
 		bit8 = FALSE;
@@ -2944,8 +3308,33 @@
 			if ((instr[i] == 'B')&&(instr[i-1] == '(')&&(instr[i-2] == 0x1b)) break;
 		}
 		fseek(stdin,0L,SEEK_END); /*kill any leftovers*/
+		GetEUC(fbuff);
+#ifdef HAVE_LOCALE
+} else {	/* if (!new_input_mode) */
+		/* new locale based code */
+		if (!clipmode) {
+			c = locale_GetKBStr(kbprompt2, L"!{}$%*&^=/-:\'+\\;][|_`",
+				GETKBSTR_SPACE_AFTER_PROMPT|GETKBSTR_ALLOW_HELP|
+				((KImode == 0) ? GETKBSTR_ROMAJI_DEFAULT : 0)|
+				GETKBSTR_ALLOW_ROMAJI|GETKBSTR_CTRLD|GETKBSTR_CTRLZ);
+
+			if (c > 0) cmdmode = TRUE;
+			if (c == 4) {
+				cbreakoff();
+				exit(0);
+			} else if (c == 26) {
+				cbreakoff();
+				printf("\nSuspending XJDIC. Type `fg' to resume.\n");
+				pid = getpid();
+				kill(pid,sig);
+				cbreakon();
+				cmdmode = FALSE;
+			}
+		}
+}
+#endif /* HAVE_LOCALE */
 		/* "bye" is the end of the run			*/
-		if ((instr[2] == 'e')&&(instr[1] == 'y')&&(instr[0] == 'b')) 
+		if ((fbuff[2] == 'e')&&(fbuff[1] == 'y')&&(fbuff[0] == 'b')) 
 		{
 			cbreakoff();
 			exit(0);
@@ -2960,7 +3349,7 @@
 				clipmode = TRUE;
 				continue;
 			}
-			if (c == '}')   /* matching {  */
+			if /* { */ (c == '}')
 			{
 				printf("\r                                      \r");
 				RVtoggle();
@@ -3166,6 +3555,7 @@
 					DoKANJI();
 					break;
 			}
+			continue;
 		}
  		if (clipmode)
  		{
@@ -3184,12 +3574,6 @@
  				fgets(clipstring1,50,fclip);
  				fclose(fclip);
  				if (clipstring1[strlen(clipstring1)-1] < 32) clipstring1[strlen(clipstring1)-1] = 0;
- 				if (strcmp(clipstring1,"quit") == 0)
- 				{
- 					clipmode = FALSE;
- 					printf("\nLeaving Clipboard mode\n");
- 					break;
- 				}
  				if (strcmp(clipstring1,clipstring2) == 0)
  				{
  					continue;
@@ -3198,13 +3582,30 @@
  				{
  					strcpy(clipstring2,clipstring1);
  					strcpy(instr,clipstring1);
+#ifdef HAVE_LOCALE
+					if (new_input_mode)
+						convert_to_euc(instr, fbuff, sizeof(fbuff));
+					else
+#endif
+						GetEUC(fbuff);
+ 					if (strcmp(fbuff, "quit") == 0) {
+ 						clipmode = FALSE;
+ 						printf("\nLeaving Clipboard mode\n");
+						fbuff[0] = 0;
+ 						break;
+ 					}
  					break;
  				}
 			}
 		}
-		if(strlen(instr) < 2) continue;
-		GetEUC(fbuff);
-		if (escf) KOut(fbuff);
+#ifdef HAVE_LOCALE
+		if (!new_input_mode) {
+#endif
+			if (escf) KOut(fbuff);
+#ifdef HAVE_LOCALE
+		}
+#endif
+		if(strlen(fbuff) < 2) continue;
 		sprintf(tempout,"\nSearching for: %s%s%s\n",RVon,fbuff,RVoff);
 		KOut(tempout);
 		Dmode = 0;
--- xjdic.1	2003/09/23 19:36:54	1.1
+++ xjdic.1	2003/09/23 19:40:48
@@ -115,11 +115,22 @@
 specify a kanji data file to use.
 
 .At
-.B -j j/e/s 
+.B -j j/e/s/l 
 [CL,SA]
 .Ap
 Specify the output coding for Japanese text (j=JIS, e=EUC, s=Shift-JIS)
 
+l=Locale based output. Output will be according to the character set
+specified by the current system locale.
+
+.At
+.B -O
+[CL,SA]
+.Ap
+Request the old input code. Also selects -j j (which controls output) unless
+overridden. The old code does not respect the current locale but it does
+EUC/JIS detection on input.
+
 .At
 .B -P port_no 
 [CL,SV]
--- xjdfrontend.c.orig	2007-08-08 12:33:23.000000000 -0400
+++ xjdfrontend.c	2007-08-08 12:48:41.000000000 -0400
@@ -2928,6 +2928,9 @@
 	unsigned char xap[50];
 	unsigned char kbprompt[99];
 	unsigned char kbprompt2[99];
+#ifdef HAVE_LOCALE
+	char *term_env;
+#endif
 	
   	printf("XJDIC Version %s (Japanese Dictionary) Copyright J.W.Breen 2003.\n",sver);
 #ifdef XJDDIC
@@ -2936,6 +2939,16 @@
 	printf("   Client mode\n");
 #endif
 
+#ifdef HAVE_LOCALE
+	if (term_env = getenv("TERM")) {
+		if (strcmp(term_env, "kterm") == 0) {
+			printf("Kterm detected -- locale support disabled\n");
+			new_input_mode = 0;
+			Omode = 0;
+		}
+	}
+#endif
+
 	for (i=0;i<NOFILT;i++)
 	{
 		filtact[i] = FALSE;

Reply via email to