Hi,
I was having some trouble with utf8 chars in my Swedish dictionary for
the Illume keyboard, so I took a look at the code and fixed the issue.
I was also able to speed up the searching.
As hinted by the FIXME on the function _e_kbd_dict_normalized_strcpy,
this was not multibyte safe, which made words with multibyte chars
broken.
As for the speedup, the _e_kbd_dict_normalized_strncmp did a lot of
unnecessary conversions, so this should be faster (and equivalent, I
hope).
Best regards,
Olof Sjöbergh
Index: e/src/modules/illume/e_kbd_dict.c
===================================================================
--- e/src/modules/illume/e_kbd_dict.c (revision 38274)
+++ e/src/modules/illume/e_kbd_dict.c (working copy)
@@ -137,19 +137,21 @@ static void _e_kbd_dict_string_normalise(wchar_t *
static int
_e_kbd_dict_normalized_strncmp(const char *a, const char *b, int len)
{
- mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
- // Calculate the size of the wchar buffer we will need to convert a and b (the number of codepoints in a/b)
- size_t n_codep_a = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
- size_t n_codep_b = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
- wchar_t awc[n_codep_a+1]; awc[n_codep_a] = '\0';
- wchar_t bwc[n_codep_b+1]; bwc[n_codep_a] = '\0';
- // Convert a and b to wchar strings so we can nomalise them with the lookup table
- len > 0 ? mbsnrtowcs(awc, &a, len, n_codep_a, &shiftState) : mbsrtowcs(awc, &a, n_codep_a, &shiftState);
- len > 0 ? mbsnrtowcs(bwc, &b, len, n_codep_b, &shiftState) : mbsrtowcs(bwc, &b, n_codep_b, &shiftState);
+ // There should never be more wchars than chars, so set initial size to multibyte size
+ size_t n_codep_a = len > 0 ? len : strlen(a);
+ size_t n_codep_b = len > 0 ? len : strlen(b);
+ wchar_t awc[n_codep_a+1];
+ wchar_t bwc[n_codep_b+1];
+
+ // Convert a and b to wchar strings so we can normalise them with the lookup table
+ n_codep_a = mbstowcs(awc, a, n_codep_a); awc[n_codep_a] = '\0';
+ n_codep_b = mbstowcs(bwc, b, n_codep_b); bwc[n_codep_b] = '\0';
+
_e_kbd_dict_string_normalise(awc);
_e_kbd_dict_string_normalise(bwc);
- if(len > 0) return wcsncasecmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
- return wcscasecmp(awc, bwc);
+
+ if (len > 0) return wcsncmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
+ return wcscmp(awc, bwc);
}
static int
@@ -158,18 +160,26 @@ _e_kbd_dict_normalized_strcmp(const char *a, const
return _e_kbd_dict_normalized_strncmp(a, b, -1);
}
-// FIXME: Does not support multi byte UTF8, does it?
static void
_e_kbd_dict_normalized_strcpy(char *dst, const char *src)
{
const char *p;
char *d;
-
- for (p = src, d = dst; *p; p++, d++)
- {
- *d = _e_kbd_dict_letter_normalise(*p);
- }
- *d = 0;
+ wchar_t *wtword;
+ size_t n_codep;
+ mbstate_t shiftState;
+
+ // Convert word to wide character and normalise it
+ wtword = alloca(strlen(src) + 1);
+ n_codep = mbstowcs(wtword, src, n_codep);
+ wtword[n_codep] = '\0';
+ _e_kbd_dict_string_normalise(wtword);
+
+ // Convert it back to multi byte string (dst needs be large enough)
+ memset(&shiftState, 0, sizeof(mbstate_t));
+ n_codep = wcsrtombs(NULL, (const wchar_t**)&wtword, 0, &shiftState);
+ dst[n_codep] = '\0';
+ wcsrtombs(dst, (const wchar_t**)&wtword, n_codep, &shiftState);
}
static int
@@ -563,26 +573,6 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
tword = alloca(strlen(word) + 1);
_e_kbd_dict_normalized_strcpy(tword, word);
-/*
- printf("search: %s\n", word);
- // Convert word to wide character and normalise it
- wchar_t *wtword;
- mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
- size_t n_codep = mbsrtowcs(NULL, &word, 0, &shiftState);
- printf("cp: %d\n", n_codep);
- wtword = alloca(n_codep + 1);
- wtword[n_codep] = '\0';
- mbsrtowcs(wtword, &word, n_codep, &shiftState);
- _e_kbd_dict_string_normalise(wtword);
- printf("wchar: %ls\n", wtword);
- // Convert it back to multi byte string
- n_codep = wcsrtombs(NULL, (const wchar_t**)&wtword, 0, &shiftState);
- printf("cp: %d\n", n_codep);
- tword = alloca(n_codep + 1);
- tword[n_codep] = '\0';
- wcsrtombs(tword, (const wchar_t**)&wtword, n_codep, &shiftState);
- printf("after conv: %s\n", tword);
-*/
p = eina_hash_find(kd->matches.leads, tword);
if (p) return p;
p2 = strlen(tword);
------------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It is the best place to buy or sell services for
just about anything Open Source.
http://p.sf.net/sfu/Xq1LFB
_______________________________________________
enlightenment-devel mailing list
enlightenment-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/enlightenment-devel