Re: [E-devel] [Patch] Illume keyboard dictionary fix and speedup

Olof Sjobergh Sat, 10 Jan 2009 07:31:40 -0800

On Wed, Jan 7, 2009 at 9:34 PM, Olof Sjobergh <[email protected]> wrote:
> I was having some trouble with utf8 chars in my Swedish dictionary for
> the Illume keyboard, so I took a look at the code and fixed the issue.
> I was also able to speed up the searching.
>
> As hinted by the FIXME on the function _e_kbd_dict_normalized_strcpy,
> this was not multibyte safe, which made words with multibyte chars
> broken.
>
> As for the speedup, the _e_kbd_dict_normalized_strncmp did a lot of
> unnecessary conversions, so this should be faster (and equivalent, I
> hope).


Hi again,

I made a mistake in the last patch, so here's a new one that should
work better. It fixes the problems i mentioned above, plus:

- E no longer segfaults on entering a € (euro sign) from the Numbers
keyboard. This would segfault since it is not in the normalisation
table and thus won't be normalised to a char < 128. To fix this we
just include a check if a normalised char is < 128 before looking for
tuples.

- When entering long words (~25 chars) there was a segfault due to
alloca not allocating large enough memory, so use malloc instead
(though words probably won't be that large, it's bad to segfault on
the user).

- Fixes typo loolup -> lookup.

Best regards,

Olof Sjöbergh

Index: e/src/modules/illume/e_kbd_dict.c
===================================================================
--- e/src/modules/illume/e_kbd_dict.c	(revision 38274)
+++ e/src/modules/illume/e_kbd_dict.c	(working copy)
@@ -137,19 +137,21 @@ static void _e_kbd_dict_string_normalise(wchar_t *
 static int
 _e_kbd_dict_normalized_strncmp(const char *a, const char *b, int len)
 {
-   mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
-   // Calculate the size of the wchar buffer we will need to convert a and b (the number of codepoints in a/b)
-   size_t n_codep_a = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
-   size_t n_codep_b = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
-   wchar_t awc[n_codep_a+1]; awc[n_codep_a] = '\0';
-   wchar_t bwc[n_codep_b+1]; bwc[n_codep_a] = '\0';
-   // Convert a and b to wchar strings so we can nomalise them with the lookup table
-   len > 0 ? mbsnrtowcs(awc, &a, len, n_codep_a, &shiftState) : mbsrtowcs(awc, &a, n_codep_a, &shiftState);
-   len > 0 ? mbsnrtowcs(bwc, &b, len, n_codep_b, &shiftState) : mbsrtowcs(bwc, &b, n_codep_b, &shiftState);
+   // There should never be more wchars than chars, so set initial size to multi byte size
+   size_t n_codep_a = len > 0 ? len : strlen(a);
+   size_t n_codep_b = len > 0 ? len : strlen(b);
+   wchar_t awc[n_codep_a+1];
+   wchar_t bwc[n_codep_b+1];
+
+   // Convert a and b to wchar strings so we can normalise them with the lookup table
+   n_codep_a = mbstowcs(awc, a, n_codep_a); awc[n_codep_a] = '\0';
+   n_codep_b = mbstowcs(bwc, b, n_codep_b); bwc[n_codep_b] = '\0';
+
    _e_kbd_dict_string_normalise(awc);
    _e_kbd_dict_string_normalise(bwc);
-   if(len > 0) return wcsncasecmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
-   return wcscasecmp(awc, bwc);
+
+   if (len > 0) return wcsncmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
+   return wcscmp(awc, bwc);
 }
 
 static int
@@ -158,22 +160,30 @@ _e_kbd_dict_normalized_strcmp(const char *a, const
    return _e_kbd_dict_normalized_strncmp(a, b, -1);
 }
 
-// FIXME: Does not support multi byte UTF8, does it?
 static void
 _e_kbd_dict_normalized_strcpy(char *dst, const char *src)
 {
    const char *p;
    char *d;
-   
-   for (p = src, d = dst; *p; p++, d++)
-     {
-	*d = _e_kbd_dict_letter_normalise(*p);
-     }
-   *d = 0;
+   wchar_t *wtword;
+   size_t size, w_size;
+   mbstate_t shiftState;
+
+   // Convert word to wchar string and normalise it
+   size = strlen(src);
+   wtword = malloc((size + 1)*sizeof(wchar_t));
+   w_size = mbstowcs(wtword, src, size);
+   wtword[w_size] = '\0';
+   _e_kbd_dict_string_normalise(wtword);
+
+   // Convert it back to multi byte string (dst must be large enough)
+   wcstombs(dst, wtword, size);
+   dst[size] = '\0';
+   free(wtword);
 }
 
 static int
-_e_kbd_dict_matches_loolup_cb_sort(const void *d1, const void *d2)
+_e_kbd_dict_matches_lookup_cb_sort(const void *d1, const void *d2)
 {
    const E_Kbd_Dict_Word *kw1, *kw2;
    
@@ -560,29 +570,9 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
     * go
     * g
     */
-   tword = alloca(strlen(word) + 1);
+   tword = malloc((strlen(word) + 1)*sizeof(char));
    _e_kbd_dict_normalized_strcpy(tword, word);
 
-/*
-   printf("search: %s\n", word);
-   // Convert word to wide character and normalise it
-   wchar_t *wtword;
-   mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
-   size_t n_codep = mbsrtowcs(NULL, &word, 0, &shiftState);
-   printf("cp: %d\n", n_codep);
-   wtword = alloca(n_codep + 1);
-   wtword[n_codep] = '\0';
-   mbsrtowcs(wtword, &word, n_codep, &shiftState);
-   _e_kbd_dict_string_normalise(wtword);
-   printf("wchar: %ls\n", wtword);
-   // Convert it back to multi byte string
-   n_codep = wcsrtombs(NULL, (const wchar_t**)&wtword, 0, &shiftState);
-   printf("cp: %d\n", n_codep);
-   tword = alloca(n_codep + 1);
-   tword[n_codep] = '\0';
-   wcsrtombs(tword, (const wchar_t**)&wtword, n_codep, &shiftState);
-   printf("after conv: %s\n", tword);
-*/  
    p = eina_hash_find(kd->matches.leads, tword);
    if (p) return p;
    p2 = strlen(tword);
@@ -595,6 +585,7 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
 	if (p)
 	  return _e_kbd_dict_find_pointer(kd, p, p2, word);
      }
+   free(tword);
    /* looking at leads going back letters didn't work */
    p = kd->file.dict;
    if ((p[0] == '\n') && (kd->file.size <= 1)) return NULL;
@@ -603,20 +594,24 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
    if ((p2 > 0) && (glyphs[0] > 0))
      p2 = evas_string_char_next_get(word, p2, &(glyphs[1]));
    v1 = _e_kbd_dict_letter_normalise(glyphs[0]);
-   if (glyphs[1] != 0)
+   v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
+   if (v1 < 128)
      {
-	v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
-	p = kd->lookup.tuples[v1][v2];
-     }
-   else
-     {
-	for (i = 0; i < 128; i++)
+	if (glyphs[1] != 0 && v2 < 128)
 	  {
-	     p = kd->lookup.tuples[v1][i];
-	     if (p) break;
+	     p = kd->lookup.tuples[v1][v2];
 	  }
+	else
+	  {
+	     for (i = 0; i < 128; i++)
+	       {
+		  p = kd->lookup.tuples[v1][i];
+		  if (p) break;
+	       }
+	  }
+	return _e_kbd_dict_find_pointer(kd, p, p2, word);
      }
-   return _e_kbd_dict_find_pointer(kd, p, p2, word);
+   return NULL;
 }
 
 static const char *
@@ -903,7 +898,7 @@ e_kbd_dict_matches_lookup(E_Kbd_Dict *kd)
      _e_kbd_dict_matches_lookup_iter(kd, NULL, kd->word.letters);
    kd->matches.list = eina_list_sort(kd->matches.list,
 				     eina_list_count(kd->matches.list),
-				     _e_kbd_dict_matches_loolup_cb_sort);
+				     _e_kbd_dict_matches_lookup_cb_sort);
 }
 
 EAPI void

------------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It is the best place to buy or sell services for
just about anything Open Source.
http://p.sf.net/sfu/Xq1LFB

_______________________________________________
enlightenment-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/enlightenment-devel

Re: [E-devel] [Patch] Illume keyboard dictionary fix and speedup

Reply via email to