Hi,

I was having some trouble with utf8 chars in my Swedish dictionary for
the Illume keyboard, so I took a look at the code and fixed the issue.
I was also able to speed up the searching.

As hinted by the FIXME on the function _e_kbd_dict_normalized_strcpy,
this was not multibyte safe, which made words with multibyte chars
broken.

As for the speedup, the _e_kbd_dict_normalized_strncmp did a lot of
unnecessary conversions, so this should be faster (and equivalent, I
hope).

Best regards,

Olof Sjöbergh
Index: e/src/modules/illume/e_kbd_dict.c
===================================================================
--- e/src/modules/illume/e_kbd_dict.c	(revision 38274)
+++ e/src/modules/illume/e_kbd_dict.c	(working copy)
@@ -137,19 +137,21 @@ static void _e_kbd_dict_string_normalise(wchar_t *
 static int
 _e_kbd_dict_normalized_strncmp(const char *a, const char *b, int len)
 {
-   mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
-   // Calculate the size of the wchar buffer we will need to convert a and b (the number of codepoints in a/b)
-   size_t n_codep_a = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
-   size_t n_codep_b = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
-   wchar_t awc[n_codep_a+1]; awc[n_codep_a] = '\0';
-   wchar_t bwc[n_codep_b+1]; bwc[n_codep_a] = '\0';
-   // Convert a and b to wchar strings so we can nomalise them with the lookup table
-   len > 0 ? mbsnrtowcs(awc, &a, len, n_codep_a, &shiftState) : mbsrtowcs(awc, &a, n_codep_a, &shiftState);
-   len > 0 ? mbsnrtowcs(bwc, &b, len, n_codep_b, &shiftState) : mbsrtowcs(bwc, &b, n_codep_b, &shiftState);
+   // There should never be more wchars than chars, so set initial size to multibyte size
+   size_t n_codep_a = len > 0 ? len : strlen(a);
+   size_t n_codep_b = len > 0 ? len : strlen(b);
+   wchar_t awc[n_codep_a+1];
+   wchar_t bwc[n_codep_b+1];
+
+   // Convert a and b to wchar strings so we can normalise them with the lookup table
+   n_codep_a = mbstowcs(awc, a, n_codep_a); awc[n_codep_a] = '\0';
+   n_codep_b = mbstowcs(bwc, b, n_codep_b); bwc[n_codep_b] = '\0';
+
    _e_kbd_dict_string_normalise(awc);
    _e_kbd_dict_string_normalise(bwc);
-   if(len > 0) return wcsncasecmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
-   return wcscasecmp(awc, bwc);
+
+   if (len > 0) return wcsncmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
+   return wcscmp(awc, bwc);
 }
 
 static int
@@ -158,18 +160,26 @@ _e_kbd_dict_normalized_strcmp(const char *a, const
    return _e_kbd_dict_normalized_strncmp(a, b, -1);
 }
 
-// FIXME: Does not support multi byte UTF8, does it?
 static void
 _e_kbd_dict_normalized_strcpy(char *dst, const char *src)
 {
    const char *p;
    char *d;
-   
-   for (p = src, d = dst; *p; p++, d++)
-     {
-	*d = _e_kbd_dict_letter_normalise(*p);
-     }
-   *d = 0;
+   wchar_t *wtword;
+   size_t n_codep;
+   mbstate_t shiftState;
+
+   // Convert word to wide character and normalise it
+   wtword = alloca(strlen(src) + 1);
+   n_codep = mbstowcs(wtword, src, n_codep);
+   wtword[n_codep] = '\0';
+   _e_kbd_dict_string_normalise(wtword);
+
+   // Convert it back to multi byte string (dst needs be large enough)
+   memset(&shiftState, 0, sizeof(mbstate_t));
+   n_codep = wcsrtombs(NULL, (const wchar_t**)&wtword, 0, &shiftState);
+   dst[n_codep] = '\0';
+   wcsrtombs(dst, (const wchar_t**)&wtword, n_codep, &shiftState);
 }
 
 static int
@@ -563,26 +573,6 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
    tword = alloca(strlen(word) + 1);
    _e_kbd_dict_normalized_strcpy(tword, word);
 
-/*
-   printf("search: %s\n", word);
-   // Convert word to wide character and normalise it
-   wchar_t *wtword;
-   mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
-   size_t n_codep = mbsrtowcs(NULL, &word, 0, &shiftState);
-   printf("cp: %d\n", n_codep);
-   wtword = alloca(n_codep + 1);
-   wtword[n_codep] = '\0';
-   mbsrtowcs(wtword, &word, n_codep, &shiftState);
-   _e_kbd_dict_string_normalise(wtword);
-   printf("wchar: %ls\n", wtword);
-   // Convert it back to multi byte string
-   n_codep = wcsrtombs(NULL, (const wchar_t**)&wtword, 0, &shiftState);
-   printf("cp: %d\n", n_codep);
-   tword = alloca(n_codep + 1);
-   tword[n_codep] = '\0';
-   wcsrtombs(tword, (const wchar_t**)&wtword, n_codep, &shiftState);
-   printf("after conv: %s\n", tword);
-*/  
    p = eina_hash_find(kd->matches.leads, tword);
    if (p) return p;
    p2 = strlen(tword);
------------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It is the best place to buy or sell services for
just about anything Open Source.
http://p.sf.net/sfu/Xq1LFB
_______________________________________________
enlightenment-devel mailing list
enlightenment-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/enlightenment-devel

Reply via email to