Modified: trunk/Source/WebKit/gtk/ChangeLog (117627 => 117628)
--- trunk/Source/WebKit/gtk/ChangeLog 2012-05-18 21:24:51 UTC (rev 117627)
+++ trunk/Source/WebKit/gtk/ChangeLog 2012-05-18 21:30:52 UTC (rev 117628)
@@ -1,5 +1,39 @@
2012-05-18 Martin Robinson <[email protected]>
+ Spell checker doesn't recognize contractions (apostrophes)
+ https://bugs.webkit.org/show_bug.cgi?id=86118
+
+ Reviewed by Gustavo Noronha Silva.
+
+ The Enchant spell checker was breaking words on apostrophes, because
+ apparently they were always being detected as Pango word-end
+ characters. In reality, to know whether or not the apostrophe is a
+ word end character requires looking at a string with a larger
+ granularity than one character. In reality, WebCore has already
+ ensured that any apostrophes in the string belong to contractions by
+ using the WordBreakIterator.
+
+ Simplify the way the we break strings, by just manually trimming off
+ all non-graphable characters from the string and then finding the end
+ of the first word by looking for the next non-graphable character.
+ This has the side effect of removing the dependency on Pango and
+ eliminating one copy.
+
+ This change also cleans up some misbehavior on the part of the
+ WebCoreSupport layer which was not converting from Unicode character
+ offsets to UTF-16. These offsets can be different if any of the
+ characters in the UTF-16 string are surrogate pairs (non BMP
+ characters).
+
+ * WebCoreSupport/TextCheckerClientGtk.cpp:
+ (WebKit::TextCheckerClientGtk::checkSpellingOfString): Properly
+ convert from Unicode offsets to UTF-16 offsets.
+ * webkit/webkitspellcheckerenchant.cpp:
+ (findByteOffsetToFirstNonGraphableCharacter): Added this helper.
+ (checkSpellingOfString): Don't split words on apostrophes.
+
+2012-05-18 Martin Robinson <[email protected]>
+
OOM running webgl/sdk/tests/conformance/context/context-creation-and-destruction.html
https://bugs.webkit.org/show_bug.cgi?id=80509
Modified: trunk/Source/WebKit/gtk/WebCoreSupport/TextCheckerClientGtk.cpp (117627 => 117628)
--- trunk/Source/WebKit/gtk/WebCoreSupport/TextCheckerClientGtk.cpp 2012-05-18 21:24:51 UTC (rev 117627)
+++ trunk/Source/WebKit/gtk/WebCoreSupport/TextCheckerClientGtk.cpp 2012-05-18 21:30:52 UTC (rev 117628)
@@ -63,6 +63,27 @@
{
GOwnPtr<gchar> utf8Text(g_utf16_to_utf8(const_cast<gunichar2*>(text), length, 0, 0, 0));
webkit_spell_checker_check_spelling_of_string(m_spellChecker.get(), utf8Text.get(), misspellingLocation, misspellingLength);
+
+ // We have the offset and length in Unicode characters, but we need to convert them to UTF-16 offsets.
+ // Unfortunately there doesn't seem to be a simple way to do this.
+ if (!*misspellingLength)
+ return;
+
+ bool pastStartOfWord = false;
+ for (int i = 0; i < length; i++) {
+ if (i >= *misspellingLocation + *misspellingLength)
+ return;
+ if (!pastStartOfWord && i > *misspellingLocation)
+ pastStartOfWord = true;
+
+ // If this character is part of a surrogate pair, we need to skip the next character (the trail)
+ // and to increase our offsets.
+ if (!U16_IS_SINGLE(text[i])) {
+ i++;
+ (*misspellingLength)++;
+ *misspellingLocation += pastStartOfWord ? 0 : 1;
+ }
+ }
}
String TextCheckerClientGtk::getAutoCorrectSuggestionForMisspelledWord(const String& inputWord)
Modified: trunk/Source/WebKit/gtk/webkit/webkitspellcheckerenchant.cpp (117627 => 117628)
--- trunk/Source/WebKit/gtk/webkit/webkitspellcheckerenchant.cpp 2012-05-18 21:24:51 UTC (rev 117627)
+++ trunk/Source/WebKit/gtk/webkit/webkitspellcheckerenchant.cpp 2012-05-18 21:30:52 UTC (rev 117628)
@@ -88,6 +88,14 @@
priv->enchantDicts = 0;
}
+static size_t findByteOffsetToFirstNonGraphableCharacter(const char* utf8String)
+{
+ const char* firstNonGraphableCharacter = utf8String;
+ while (firstNonGraphableCharacter && g_unichar_isgraph(g_utf8_get_char(firstNonGraphableCharacter)))
+ firstNonGraphableCharacter = g_utf8_find_next_char(firstNonGraphableCharacter, 0);
+ return firstNonGraphableCharacter - utf8String;
+}
+
static void checkSpellingOfString(WebKitSpellChecker* checker, const char* string, int* misspellingLocation, int* misspellingLength)
{
WebKitSpellCheckerEnchantPrivate* priv = WEBKIT_SPELL_CHECKER_ENCHANT(checker)->priv;
@@ -96,50 +104,33 @@
if (!dicts)
return;
- int length = g_utf8_strlen(string, -1);
+ // At the time this code was written, WebCore only sends us one word at a
+ // time during spellchecking, with a chance of having some small amount of
+ // leading and trailing whitespace. For this reason we can merely chop off
+ // the whitespace and send the word directly to Enchant.
+ const char* firstWord = string;
+ while (firstWord && !g_unichar_isgraph(g_utf8_get_char(firstWord)))
+ firstWord = g_utf8_find_next_char(firstWord, NULL);
- PangoLanguage* language(pango_language_get_default());
- GOwnPtr<PangoLogAttr> attrs(g_new(PangoLogAttr, length + 1));
+ // Either the string only had whitespace characters or no characters at all.
+ if (!firstWord)
+ return;
- // pango_get_log_attrs uses an aditional position at the end of the text.
- pango_get_log_attrs(string, -1, -1, language, attrs.get(), length + 1);
+ size_t byteOffsetToEndOfFirstWord = findByteOffsetToFirstNonGraphableCharacter(firstWord);
+ for (; dicts; dicts = dicts->next) {
+ EnchantDict* dict = static_cast<EnchantDict*>(dicts->data);
+ int result = enchant_dict_check(dict, firstWord, byteOffsetToEndOfFirstWord);
- for (int i = 0; i < length + 1; i++) {
- // We go through each character until we find an is_word_start,
- // then we get into an inner loop to find the is_word_end corresponding
- // to it.
- if (attrs.get()[i].is_word_start) {
- int start = i;
- int end = i;
- int wordLength;
-
- while (attrs.get()[end].is_word_end < 1)
- end++;
-
- wordLength = end - start;
- // Set the iterator to be at the current word end, so we don't
- // check characters twice.
- i = end;
-
- gchar* cstart = g_utf8_offset_to_pointer(string, start);
- gint bytes = static_cast<gint>(g_utf8_offset_to_pointer(string, end) - cstart);
- GOwnPtr<gchar> word(g_new0(gchar, bytes + 1));
-
- g_utf8_strncpy(word.get(), cstart, wordLength);
-
- for (; dicts; dicts = dicts->next) {
- EnchantDict* dict = static_cast<EnchantDict*>(dicts->data);
- if (enchant_dict_check(dict, word.get(), wordLength)) {
- *misspellingLocation = start;
- *misspellingLength = wordLength;
- } else {
- // Stop checking, this word is ok in at least one dict.
- *misspellingLocation = -1;
- *misspellingLength = 0;
- break;
- }
- }
+ if (result < 0) // Error during checking.
+ continue;
+ if (!result) { // Stop checking, as this word is correct for at least one dictionary.
+ *misspellingLocation = -1;
+ *misspellingLength = 0;
+ return;
}
+
+ *misspellingLocation = g_utf8_pointer_to_offset(string, firstWord);
+ *misspellingLength = g_utf8_pointer_to_offset(string, firstWord + byteOffsetToEndOfFirstWord) - *misspellingLocation;
}
}