On Wed, Mar 13, 2019 at 08:16:05PM +0200, Lauri Tirkkonen wrote: > Changing worddelimiters to wchar_t * allows to use wcschr() instead of > implementing utf8strchr(). > > Also, since there are so many different spaces and punctuation in > unicode, I have another diff that uses iswspace()/iswpunct() to figure > out delimiters, but I expect that to be a bit more controversial than > this patch so I'll send it separately. > > From cb65b11944147f3ea43b9b19abf068e0dea7a562 Mon Sep 17 00:00:00 2001 > From: Lauri Tirkkonen <[email protected]> > Date: Wed, 13 Mar 2019 19:40:52 +0200 > Subject: [PATCH] replace utf8strchr with wcschr > > --- > config.def.h | 4 ++-- > st.c | 20 +------------------- > st.h | 2 +- > 3 files changed, 4 insertions(+), 22 deletions(-) > > diff --git a/config.def.h b/config.def.h > index 0e01717..482901e 100644 > --- a/config.def.h > +++ b/config.def.h > @@ -30,9 +30,9 @@ static float chscale = 1.0; > /* > * word delimiter string > * > - * More advanced example: " `'\"()[]{}" > + * More advanced example: L" `'\"()[]{}" > */ > -char *worddelimiters = " "; > +wchar_t *worddelimiters = L" "; > > /* selection timeouts (in milliseconds) */ > static unsigned int doubleclicktimeout = 300; > diff --git a/st.c b/st.c > index d35f89d..812f30c 100644 > --- a/st.c > +++ b/st.c > @@ -41,7 +41,7 @@ > #define ISCONTROLC0(c) (BETWEEN(c, 0, 0x1f) || (c) == '\177') > #define ISCONTROLC1(c) (BETWEEN(c, 0x80, 0x9f)) > #define ISCONTROL(c) (ISCONTROLC0(c) || ISCONTROLC1(c)) > -#define ISDELIM(u) (utf8strchr(worddelimiters, u) != NULL) > +#define ISDELIM(u) (u != 0 && wcschr(worddelimiters, u) != NULL) > > enum term_mode { > MODE_WRAP = 1 << 0, > @@ -210,7 +210,6 @@ static void selsnap(int *, int *, int); > static size_t utf8decode(const char *, Rune *, size_t); > static Rune utf8decodebyte(char, size_t *); > static char utf8encodebyte(Rune, size_t); > -static char *utf8strchr(char *, Rune); > static size_t utf8validate(Rune *, size_t); > > static char *base64dec(const char *); > @@ -337,23 +336,6 @@ utf8encodebyte(Rune u, size_t i) > return utfbyte[i] | (u & ~utfmask[i]); > } > > -char * > -utf8strchr(char *s, Rune u) > -{ > - Rune r; > - size_t i, j, len; > - > - len = strlen(s); > - for (i = 0, j = 0; i < len; i += j) { > - if (!(j = utf8decode(&s[i], &r, len - i))) > - break; > - if (r == u) > - return &(s[i]); > - } > - > - return NULL; > -} > - > size_t > utf8validate(Rune *u, size_t i) > { > diff --git a/st.h b/st.h > index 38c61c4..4da3051 100644 > --- a/st.h > +++ b/st.h > @@ -114,7 +114,7 @@ char *xstrdup(char *); > extern char *utmp; > extern char *stty_args; > extern char *vtiden; > -extern char *worddelimiters; > +extern wchar_t *worddelimiters; > extern int allowaltscreen; > extern char *termname; > extern unsigned int tabspaces; > -- > 2.20.1 > > -- > Lauri Tirkkonen | lotheac @ IRCnet >
I don't like mixing of the existing functions with wchar_t. I think st should (at the very least internally) use utf-8. Won't apply. -- Kind regards, Hiltjo
