On Wed, Jan 7, 2009 at 9:34 PM, Olof Sjobergh <olo...@gmail.com> wrote:
> I was having some trouble with utf8 chars in my Swedish dictionary for
> the Illume keyboard, so I took a look at the code and fixed the issue.
> I was also able to speed up the searching.
>
> As hinted by the FIXME on the function _e_kbd_dict_normalized_strcpy,
> this was not multibyte safe, which made words with multibyte chars
> broken.
>
> As for the speedup, the _e_kbd_dict_normalized_strncmp did a lot of
> unnecessary conversions, so this should be faster (and equivalent, I
> hope).
Hi again,
I made a mistake in the last patch, so here's a new one that should
work better. It fixes the problems i mentioned above, plus:
- E no longer segfaults on entering a € (euro sign) from the Numbers
keyboard. This would segfault since it is not in the normalisation
table and thus won't be normalised to a char < 128. To fix this we
just include a check if a normalised char is < 128 before looking for
tuples.
- When entering long words (~25 chars) there was a segfault due to
alloca not allocating large enough memory, so use malloc instead
(though words probably won't be that large, it's bad to segfault on
the user).
- Fixes typo loolup -> lookup.
Best regards,
Olof Sjöbergh
Index: e/src/modules/illume/e_kbd_dict.c
===================================================================
--- e/src/modules/illume/e_kbd_dict.c (revision 38274)
+++ e/src/modules/illume/e_kbd_dict.c (working copy)
@@ -137,19 +137,21 @@ static void _e_kbd_dict_string_normalise(wchar_t *
static int
_e_kbd_dict_normalized_strncmp(const char *a, const char *b, int len)
{
- mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
- // Calculate the size of the wchar buffer we will need to convert a and b (the number of codepoints in a/b)
- size_t n_codep_a = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
- size_t n_codep_b = len > 0 ? mbsnrtowcs(NULL, &a, len, 0, &shiftState) : mbsrtowcs(NULL, &a, 0, &shiftState);
- wchar_t awc[n_codep_a+1]; awc[n_codep_a] = '\0';
- wchar_t bwc[n_codep_b+1]; bwc[n_codep_a] = '\0';
- // Convert a and b to wchar strings so we can nomalise them with the lookup table
- len > 0 ? mbsnrtowcs(awc, &a, len, n_codep_a, &shiftState) : mbsrtowcs(awc, &a, n_codep_a, &shiftState);
- len > 0 ? mbsnrtowcs(bwc, &b, len, n_codep_b, &shiftState) : mbsrtowcs(bwc, &b, n_codep_b, &shiftState);
+ // There should never be more wchars than chars, so set initial size to multi byte size
+ size_t n_codep_a = len > 0 ? len : strlen(a);
+ size_t n_codep_b = len > 0 ? len : strlen(b);
+ wchar_t awc[n_codep_a+1];
+ wchar_t bwc[n_codep_b+1];
+
+ // Convert a and b to wchar strings so we can normalise them with the lookup table
+ n_codep_a = mbstowcs(awc, a, n_codep_a); awc[n_codep_a] = '\0';
+ n_codep_b = mbstowcs(bwc, b, n_codep_b); bwc[n_codep_b] = '\0';
+
_e_kbd_dict_string_normalise(awc);
_e_kbd_dict_string_normalise(bwc);
- if(len > 0) return wcsncasecmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
- return wcscasecmp(awc, bwc);
+
+ if (len > 0) return wcsncmp(awc, bwc, n_codep_a > n_codep_b ? n_codep_b : n_codep_a);
+ return wcscmp(awc, bwc);
}
static int
@@ -158,22 +160,30 @@ _e_kbd_dict_normalized_strcmp(const char *a, const
return _e_kbd_dict_normalized_strncmp(a, b, -1);
}
-// FIXME: Does not support multi byte UTF8, does it?
static void
_e_kbd_dict_normalized_strcpy(char *dst, const char *src)
{
const char *p;
char *d;
-
- for (p = src, d = dst; *p; p++, d++)
- {
- *d = _e_kbd_dict_letter_normalise(*p);
- }
- *d = 0;
+ wchar_t *wtword;
+ size_t size, w_size;
+ mbstate_t shiftState;
+
+ // Convert word to wchar string and normalise it
+ size = strlen(src);
+ wtword = malloc((size + 1)*sizeof(wchar_t));
+ w_size = mbstowcs(wtword, src, size);
+ wtword[w_size] = '\0';
+ _e_kbd_dict_string_normalise(wtword);
+
+ // Convert it back to multi byte string (dst must be large enough)
+ wcstombs(dst, wtword, size);
+ dst[size] = '\0';
+ free(wtword);
}
static int
-_e_kbd_dict_matches_loolup_cb_sort(const void *d1, const void *d2)
+_e_kbd_dict_matches_lookup_cb_sort(const void *d1, const void *d2)
{
const E_Kbd_Dict_Word *kw1, *kw2;
@@ -560,29 +570,9 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
* go
* g
*/
- tword = alloca(strlen(word) + 1);
+ tword = malloc((strlen(word) + 1)*sizeof(char));
_e_kbd_dict_normalized_strcpy(tword, word);
-/*
- printf("search: %s\n", word);
- // Convert word to wide character and normalise it
- wchar_t *wtword;
- mbstate_t shiftState; memset(&shiftState, 0, sizeof(mbstate_t));
- size_t n_codep = mbsrtowcs(NULL, &word, 0, &shiftState);
- printf("cp: %d\n", n_codep);
- wtword = alloca(n_codep + 1);
- wtword[n_codep] = '\0';
- mbsrtowcs(wtword, &word, n_codep, &shiftState);
- _e_kbd_dict_string_normalise(wtword);
- printf("wchar: %ls\n", wtword);
- // Convert it back to multi byte string
- n_codep = wcsrtombs(NULL, (const wchar_t**)&wtword, 0, &shiftState);
- printf("cp: %d\n", n_codep);
- tword = alloca(n_codep + 1);
- tword[n_codep] = '\0';
- wcsrtombs(tword, (const wchar_t**)&wtword, n_codep, &shiftState);
- printf("after conv: %s\n", tword);
-*/
p = eina_hash_find(kd->matches.leads, tword);
if (p) return p;
p2 = strlen(tword);
@@ -595,6 +585,7 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
if (p)
return _e_kbd_dict_find_pointer(kd, p, p2, word);
}
+ free(tword);
/* looking at leads going back letters didn't work */
p = kd->file.dict;
if ((p[0] == '\n') && (kd->file.size <= 1)) return NULL;
@@ -603,20 +594,24 @@ _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
if ((p2 > 0) && (glyphs[0] > 0))
p2 = evas_string_char_next_get(word, p2, &(glyphs[1]));
v1 = _e_kbd_dict_letter_normalise(glyphs[0]);
- if (glyphs[1] != 0)
+ v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
+ if (v1 < 128)
{
- v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
- p = kd->lookup.tuples[v1][v2];
- }
- else
- {
- for (i = 0; i < 128; i++)
+ if (glyphs[1] != 0 && v2 < 128)
{
- p = kd->lookup.tuples[v1][i];
- if (p) break;
+ p = kd->lookup.tuples[v1][v2];
}
+ else
+ {
+ for (i = 0; i < 128; i++)
+ {
+ p = kd->lookup.tuples[v1][i];
+ if (p) break;
+ }
+ }
+ return _e_kbd_dict_find_pointer(kd, p, p2, word);
}
- return _e_kbd_dict_find_pointer(kd, p, p2, word);
+ return NULL;
}
static const char *
@@ -903,7 +898,7 @@ e_kbd_dict_matches_lookup(E_Kbd_Dict *kd)
_e_kbd_dict_matches_lookup_iter(kd, NULL, kd->word.letters);
kd->matches.list = eina_list_sort(kd->matches.list,
eina_list_count(kd->matches.list),
- _e_kbd_dict_matches_loolup_cb_sort);
+ _e_kbd_dict_matches_lookup_cb_sort);
}
EAPI void
------------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It is the best place to buy or sell services for
just about anything Open Source.
http://p.sf.net/sfu/Xq1LFB
_______________________________________________
enlightenment-devel mailing list
enlightenment-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/enlightenment-devel