In February, I wrote about my intention to code a patch to implement an inline 
(on the fly) spellchecker for LyX (see bug 718).

This is a preliminary version of the patch. There is still a lot of things 
missing but before I continue, I want your opinion and your advice. 

1. My principal concern is about loading a document. It is slow (Very Slow!) 
because I'm checking every word in the document on the main thread (not in 
the background).

The next step for me will be to run the spellchecker in the background using 
multi-processing or multi-threading. Is there any problem to use pthreads in 
Windows ? Is there any other situation where LyX uses threads ?

2. I've copied from ControlSpellchecker.C the getSpeller function. And I'm 
calling it each time that I want to check a single word. It's certainly not 
optimized! 

But I don't really know where is the better place to move this function into a 
common place to be used by me (Paragraph methods) and by the GUI 
SpellChecker. Any advice ?

Thanks a lot,
-- 
Félix-Antoine Bourbonnais
http://www.rubico.info/

.....................................................................
   This message contains a digital signature that can be used to 
validate the integrity of its contents and to authenticate the sender.

PGP Key:
   http://www.rubico.info/gpg/
Fingerprint:
   D5C0 5316 7D4F 36E4 CD58 52E4 D9C7 C8CE FC3C EA60
.....................................................................
Index: src/BufferView.C
===================================================================
--- src/BufferView.C	(revision 13875)
+++ src/BufferView.C	(working copy)
@@ -375,9 +375,11 @@
 		cursor().inset().notifyCursorLeaves(cursor());
 
 	// do the dEPM magic if needed
-	if (cursor().inTexted())
+	if (cursor().inTexted()) {
+    	cursor().text()->updateSpelling(cursor(), cur, false);
 		cursor().text()->deleteEmptyParagraphMechanism(cur, cursor());
-
+	}
+	
 	cursor() = cur;
 	cursor().clearSelection();
 	cursor().setTargetX();
Index: src/CutAndPaste.C
===================================================================
--- src/CutAndPaste.C	(revision 13875)
+++ src/CutAndPaste.C	(working copy)
@@ -129,6 +129,11 @@
 	// Make a copy of the CaP paragraphs.
 	ParagraphList insertion = parlist;
 	textclass_type const tc = buffer.params().textclass;
+	
+	// Lunch the spell checker on each par. inserted
+	ParagraphList::iterator it = insertion.begin();
+	for (; it != insertion.end(); it++)
+	    it->checkAllSpelling(buffer.params());
 
 	// Now remove all out of the pars which is NOT allowed in the
 	// new environment and set also another font if that is required.
Index: src/LColor.C
===================================================================
--- src/LColor.C	(revision 13875)
+++ src/LColor.C	(working copy)
@@ -112,6 +112,7 @@
 	{ greyedoutbg, N_("greyedout inset background"), "greyedoutbg", "linen", "greyedoutbg" },
 	{ depthbar, N_("depth bar"), "depthbar", "IndianRed", "depthbar" },
 	{ language, N_("language"), "language", "Blue", "language" },
+	{ spelling, N_("spelling"), "spelling", "Red", "spelling" },
 	{ command, N_("command inset"), "command", "black", "command" },
 	{ commandbg, N_("command inset background"), "commandbg", "azure", "commandbg" },
 	{ commandframe, N_("command inset frame"), "commandframe", "black", "commandframe" },
Index: src/LColor.h
===================================================================
--- src/LColor.h	(revision 13875)
+++ src/LColor.h	(working copy)
@@ -99,6 +99,8 @@
 		depthbar,
 		/// Color for marking foreign language words
 		language,
+		/// Color for marking misspelled words
+		spelling,
 
 		/// Text color for command insets
 		command,
Index: src/buffer.C
===================================================================
--- src/buffer.C	(revision 13875)
+++ src/buffer.C	(working copy)
@@ -508,6 +508,10 @@
 			if (autobreakrows && (!par.empty() || par.allowEmpty())) {
 				breakParagraph(params(), pars, pit, pos,
 					       par.layout()->isEnvironment());
+			    
+			    //Checking the spelling of the par.
+			    par.checkAllSpelling(params());
+			    
 				++pit;
 				pos = 0;
 				space_inserted = true;
@@ -543,6 +547,9 @@
 		}
 
 	}
+	
+    //Checking the spelling of the last par.
+    pars[pit].checkAllSpelling(params());
 }
 
 
Index: src/lyxtext.h
===================================================================
--- src/lyxtext.h	(revision 13875)
+++ src/lyxtext.h	(working copy)
@@ -171,7 +171,13 @@
 		 pos_type pos, bool setfont = true, bool boundary = false);
 	///
 	void setCurrentFont(LCursor & cur);
-
+	
+	/// Called when the cusor has moved.
+	/**
+	 * returns true if the row need to be updated
+	 */
+	bool updateSpelling(LCursor & oldc, LCursor & newc, bool force = false);
+	
 	///
 	void recUndo(pit_type first, pit_type last) const;
 	///
Index: src/paragraph.C
===================================================================
--- src/paragraph.C	(revision 13875)
+++ src/paragraph.C	(working copy)
@@ -45,6 +45,20 @@
 #include "support/textutils.h"
 #include "support/convert.h"
 
+#include "WordLangTuple.h"
+
+#if defined(USE_ASPELL)
+# include "aspell_local.h"
+#elif defined(USE_PSPELL)
+# include "pspell.h"
+#endif
+
+#if defined(USE_ISPELL)
+# include "ispell.h"
+#else
+# include "SpellBase.h"
+#endif
+
 #include <boost/tuple/tuple.hpp>
 #include <boost/bind.hpp>
 
@@ -56,6 +70,7 @@
 using lyx::pos_type;
 
 using lyx::support::subst;
+using lyx::support::contains;
 
 using std::distance;
 using std::endl;
@@ -544,6 +559,190 @@
 	}
 }
 
+namespace {
+
+//Copied from ControlSpellchecker.C
+//Should probably not be here and should be grouped
+//with the ControlSpellchecker::getSpeller method.
+#warning "Duplicate from ControlSpellchecker.C"
+SpellBase * getSpeller(BufferParams const & bp)
+{
+	string lang = (lyxrc.isp_use_alt_lang)
+	              ? lyxrc.isp_alt_lang
+		      : bp.language->code();
+
+#if defined(USE_ASPELL)
+	if (lyxrc.use_spell_lib)
+		return new ASpell(bp, lang);
+#elif defined(USE_PSPELL)
+	if (lyxrc.use_spell_lib)
+		return new PSpell(bp, lang);
+#endif
+
+#if defined(USE_ISPELL)
+	lang = (lyxrc.isp_use_alt_lang) ?
+		lyxrc.isp_alt_lang : bp.language->lang();
+
+	return new ISpell(bp, lang);
+#else
+	return new SpellBase;
+#endif
+}
+
+
+
+}
+
+bool Paragraph::checkWordSpelling(BufferParams const & bparams, 
+				lyx::pos_type const from, lyx::pos_type to)
+{
+	//lyxerr[Debug::INFO] << "CHECK WORD SPELLING" << std::endl;
+	
+	//Get word
+	std::string word;
+	lyx::pos_type pos = from;
+	for(; pos < to; pos++)
+		word += getChar(pos);
+		
+	//Tests
+	if (word.length() == 0) {
+		setSpelling(from, 0, true); //Word must be removed from the table
+		return false; //Nothing to show
+	}
+		
+	if (word.length() == 1) {
+		setSpelling(from, 1, true);
+		return true; //A single letter is always consider as good
+	}
+	
+	//Extract the language code
+	std::string lang_code = getFontSettings(bparams, from).language()->code();
+	WordLangTuple wordTuple = WordLangTuple(word, lang_code);
+	
+	//Check spelling
+	SpellBase* speller = getSpeller(bparams);
+#warning "It can be better to do not create a new speller object each time"
+	SpellBase::Result res = SpellBase::OK;
+	res = speller->check(wordTuple);
+	delete speller;
+	if (res != SpellBase::OK && res != SpellBase::IGNORED_WORD) {
+		setSpelling(from, to-from, false);
+	} else {
+		setSpelling(from, to-from, true);
+	}
+	
+	return true;
+}
+
+std::string Paragraph::getWordAtPos(lyx::pos_type const pos, 
+				lyx::pos_type & from, lyx::pos_type & to) const
+{
+	std::string word;
+	
+	//Get current word position
+	from = ( pos > 0 ) ? pos - 1 : 0;
+	for (; from > 0; from--) {
+		if (!isSpellcheckerLetter(from)) {
+			from++;
+			break;
+		}
+	}
+	
+	to = from;
+	for (; to < size(); to++) {
+		if (!isSpellcheckerLetter(to))
+			break;
+		if (!isInset(to)) {
+			const char c = getChar(to);
+			word += c;
+		}
+	}
+	
+	return word;
+}
+
+void Paragraph::checkAllSpelling(BufferParams const & bparams)
+{
+	//lyxerr[Debug::INFO] << "CHECK SPELLING (PAR)" << std::endl;
+	
+	pimpl_->spellinglist.clear();
+	
+	lyx::pos_type pos = 0;
+	lyx::pos_type from = 0;
+	lyx::pos_type to = 0;
+	bool inword = false;
+	
+	for(; pos < size(); pos++) {
+		if (isSpellcheckerLetter(pos)) {
+			if (!inword) {
+				inword = true;
+				from = to = pos;
+			}
+			if (!isInset(pos)) {
+				to++;
+			}
+		} else {
+			if (inword)
+				if (to - from > 1)
+					checkWordSpelling(bparams, from, to);
+			inword = false;
+		}
+	}
+
+}
+
+bool Paragraph::isMisspelled(lyx::pos_type cursor_pos) const
+{
+	Pimpl::SpellingList& list = pimpl_->spellinglist;
+	Pimpl::SpellingList::iterator it = list.begin();
+	
+	for (; it != list.end() && it->pos() <= cursor_pos; it++) {
+		if (cursor_pos >= it->pos() && cursor_pos <= it->pos()+it->len()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+void Paragraph::setSpelling(lyx::pos_type word_pos, lyx::pos_type word_len, bool correct)
+{
+	Pimpl::SpellingList& list = pimpl_->spellinglist;
+	
+	if (!correct) {
+		//Add the position to the list
+		Pimpl::SpellingList::iterator it;
+		Pimpl::SpellingList::iterator ins = list.end();
+		for (it=list.begin(); it != list.end(); it++){
+			if (word_pos >= it->pos() && word_pos <= it->pos()+it->len()) {
+				//Already in the list
+				it->pos(word_pos);
+				it->len(word_len);
+				//lyxerr[Debug::INFO] << "UPDATING word to SpellList(" << word_pos << "," 
+				//	<< word_len << ")" << endl;
+				return;
+			} else if (word_pos < it->pos()) {
+				//Not in the list
+				ins = it;
+				break;
+			}
+		}	
+		
+		list.insert(ins, Pimpl::SpellingTable(word_pos, word_len));
+		//lyxerr[Debug::INFO] << "ADDING word to SpellList(" << word_pos << "," 
+		//	<< word_len << ")" << endl;
+
+	} else {
+		//Remove from the list
+		Pimpl::SpellingList::iterator it = list.begin();
+		for (; it != list.end() && it->pos() <= word_pos; it++) {
+			if (word_pos <= it->pos()+it->len()) {
+				list.erase(it);
+				//lyxerr[Debug::INFO] << "REMOVING word from SpellList" << endl;
+				break;
+			}
+		}
+	}
+}
 
 void Paragraph::makeSameLayout(Paragraph const & par)
 {
@@ -1478,6 +1677,13 @@
 	}
 }
 
+/// Used by the inline spellchecker
+bool Paragraph::isSpellcheckerLetter(lyx::pos_type pos) const
+{
+	return (isLetter(pos) || contains(lyxrc.isp_esc_chars + '\'', 
+				getChar(pos))
+		) && !isDeletedText(*this, pos);
+}
 
 Language const *
 Paragraph::getParLanguage(BufferParams const & bparams) const
Index: src/paragraph.h
===================================================================
--- src/paragraph.h	(revision 13875)
+++ src/paragraph.h	(working copy)
@@ -364,6 +364,10 @@
 	// Note that digits in particular are considered as letters
 	bool isLetter(lyx::pos_type pos) const;
 
+	/// True if it is a letter (isLetter) or if the character is esacaped or
+	/// must be ignored by the spellchecker.
+	bool isSpellcheckerLetter(lyx::pos_type pos) const;
+
 	/// returns -1 if inset not found
 	int getPositionOfInset(InsetBase const * inset) const;
 
@@ -409,6 +413,21 @@
 
 	/// dump some information to lyxerr
 	void dump() const;
+	
+	/// return the ehole word at pos. Used by the inline spell checker
+	std::string getWordAtPos(lyx::pos_type const pos, 
+		lyx::pos_type & from, lyx::pos_type & to) const;
+	
+	/// return true if the word at this position is misspelled
+	bool isMisspelled(lyx::pos_type cursor_pos) const;
+	/// Indicate if the word beginning at word_pos is 
+	/// correctly spelled (correct=false if misspelled)
+	void setSpelling(lyx::pos_type word_pos, lyx::pos_type word_len, bool correct);
+	// Check the spelling of the word at pos
+	bool checkWordSpelling(BufferParams const & bparams, 
+			lyx::pos_type const from, lyx::pos_type const to);
+	/// Lunch the spellchecker on the entire paragraph
+	void checkAllSpelling(BufferParams const & bparams);
 
 public:
 	///
Index: src/paragraph_pimpl.C
===================================================================
--- src/paragraph_pimpl.C	(revision 13875)
+++ src/paragraph_pimpl.C	(working copy)
@@ -74,6 +74,7 @@
 {
 	inset_owner = p.inset_owner;
 	fontlist = p.fontlist;
+	spellinglist = p.spellinglist;
 	id_ = paragraph_id++;
 
 	if (p.tracking())
@@ -301,6 +302,13 @@
 	{
 		it->pos(it->pos() + 1);
 	}
+	
+	// Update the spelling table
+	SpellingList::iterator it = spellinglist.begin();
+	for (; it != spellinglist.end(); it++) {
+		if (it->pos() > pos)
+			it->pos(it->pos() + 1);
+	}
 
 	// Update the insets
 	owner_->insetlist.increasePosAfterPos(pos);
@@ -359,7 +367,37 @@
 	FontList::iterator fend = fontlist.end();
 	for (; it != fend; ++it)
 		it->pos(it->pos() - 1);
-
+		
+	// Update Spelling table
+	
+	SpellingList::iterator sit = spellinglist.begin();
+	SpellingList::iterator prev = spellinglist.begin();
+	for (unsigned int i = 0; i < spellinglist.size(); i++) {
+		sit = spellinglist.begin() + i;
+		prev = sit;
+    	if (i != 0)
+    		prev = spellinglist.begin() + i - 1;
+		
+		if (pos >= sit->pos() && pos <= sit->pos()+sit->len()) {
+		    sit->len(sit->len() - 1);
+		    if (sit->len() == 0) {
+		        spellinglist.erase(sit);
+		        continue;
+		    }
+		}
+		
+		if (sit->pos() > pos)
+			sit->pos(sit->pos() - 1);
+		
+		//When two words are combined
+    	if (prev->pos()+prev->len() == sit->pos()) {
+    	    prev->len(prev->len() + sit->len());
+    	    spellinglist.erase(sit);
+    	    continue;
+    	}
+    	    
+	}
+			
 	// Update the insetlist.
 	owner_->insetlist.decreasePosAfterPos(pos);
 }
Index: src/paragraph_pimpl.h
===================================================================
--- src/paragraph_pimpl.h	(revision 13875)
+++ src/paragraph_pimpl.h	(working copy)
@@ -134,6 +134,33 @@
 	typedef std::vector<FontTable> FontList;
 	///
 	FontList fontlist;
+	
+	/** A spelling entry is a block of words (>1) identified as misspelled
+	 *  by the inline-spellchecker.
+	 */
+	class SpellingTable {
+	public:
+		///
+		SpellingTable(lyx::pos_type p, lyx::pos_type l)
+			: pos_(p), len_(l)
+		{}
+		///
+		lyx::pos_type pos() const { return pos_; }
+		///
+		void pos(lyx::pos_type p) { pos_ = p; }
+		///
+		lyx::pos_type len() const { return len_; }
+		///
+		void len(lyx::pos_type l) { len_ = l; }
+	private:
+		lyx::pos_type pos_;
+		lyx::pos_type len_;
+	};
+	
+	///
+	typedef std::vector<SpellingTable> SpellingList;
+	//
+	SpellingList spellinglist;
 
 	///
 	void simpleTeXBlanks(std::ostream &, TexRow & texrow,
Index: src/rowpainter.C
===================================================================
--- src/rowpainter.C	(revision 13875)
+++ src/rowpainter.C	(working copy)
@@ -73,6 +73,7 @@
 
 private:
 	void paintForeignMark(double orig_x, LyXFont const & font, int desc = 0);
+	void paintSpellMark(double orig_x, lyx::pos_type pos, lyx::pos_type lastpos, int desc = 0);
 	void paintHebrewComposeChar(lyx::pos_type & vpos, LyXFont const & font);
 	void paintArabicComposeChar(lyx::pos_type & vpos, LyXFont const & font);
 	void paintChars(lyx::pos_type & vpos, LyXFont font,
@@ -312,6 +313,50 @@
 }
 
 
+void RowPainter::paintSpellMark(double orig_x, lyx::pos_type pos, lyx::pos_type lastpos, int desc)
+{
+	//The text to draw can be composed by multiple words
+	//but without any space or non-printable char.
+	//
+	//We must extract each word (separated by a non Letter)
+	
+	int xo = int(orig_x);
+	int x = xo;
+	int xn = xo;
+	pos_type orig_pos;
+	
+    while (pos < lastpos) {
+    	
+    	xo = xn;
+    	orig_pos = pos;
+    	
+    	//Goto the end of the word (first non-letter)
+    	while (pos < lastpos) {
+    		LyXFont font = text_.getFont(par_, pos);
+            const int width =
+	        	text_.singleWidth(par_, pos, par_.getChar(pos), font);
+	        	
+	        xn += width;
+	        pos++;
+
+    		if (!par_.isSpellcheckerLetter(pos - 1))
+    		    break;
+    		    
+    	    x = xn;
+       	}
+    	
+        //Check the word
+    	if (!par_.isMisspelled(orig_pos))
+	    	continue;
+	
+	    //If misspelled, underline it
+    	int const y = yo_ + 1 + desc;
+    	pain_.line(xo, y, x, y, 
+    	    LColor::spelling, Painter::line_onoffdash);
+    }
+}
+
+
 void RowPainter::paintFromPos(pos_type & vpos)
 {
 	pos_type const pos = text_.bidi.vis2log(vpos);
@@ -349,6 +394,11 @@
 	}
 
 	paintForeignMark(orig_x, orig_font);
+	
+  	pos_type last_pos = text_.bidi.vis2log(vpos);
+  	if (last_pos == -1)
+  	    last_pos = row_.endpos();
+	paintSpellMark(orig_x, pos, last_pos);
 }
 
 
Index: src/text.C
===================================================================
--- src/text.C	(revision 13875)
+++ src/text.C	(working copy)
@@ -384,6 +384,7 @@
 
 	// Initialize begin_of_body_ on load; redoParagraph maintains
 	par.setBeginOfBody();
+	par.checkAllSpelling(buf.params());
 }
 
 
Index: src/text2.C
===================================================================
--- src/text2.C	(revision 13875)
+++ src/text2.C	(working copy)
@@ -503,7 +503,10 @@
 		else
 			--end;
 	}
-	return setCursor(cur, cur.pit(), end, true, boundary);
+	LCursor old = cur;
+	bool needsUpdate = setCursor(cur, cur.pit(), end, true, boundary);
+	needsUpdate |= updateSpelling(old, cur, false);
+	return needsUpdate;
 }
 
 
@@ -510,7 +513,10 @@
 bool LyXText::cursorTop(LCursor & cur)
 {
 	BOOST_ASSERT(this == cur.text());
-	return setCursor(cur, 0, 0);
+	LCursor old = cur;
+	bool needsUpdate = setCursor(cur, 0, 0);
+	needsUpdate = updateSpelling(old, cur, false);
+	return needsUpdate;
 }
 
 
@@ -517,7 +523,47 @@
 bool LyXText::cursorBottom(LCursor & cur)
 {
 	BOOST_ASSERT(this == cur.text());
-	return setCursor(cur, cur.lastpit(), boost::prior(paragraphs().end())->size());
+	LCursor old = cur;
+	bool needsUpdate = setCursor(cur, cur.lastpit(), boost::prior(paragraphs().end())->size());
+	needsUpdate = updateSpelling(old, cur, false);
+	return needsUpdate;
+}
+
+
+bool LyXText::updateSpelling(LCursor & oldc, LCursor & newc, bool force)
+{
+	if (!oldc.inTexted())
+		return false;
+	
+	//Strategy: We check the word if the cursor is not still on the same word.
+	Paragraph & oldPar = oldc.paragraph();
+	lyx::pos_type from_new, from_old, to_new, to_old;
+	
+	oldPar.getWordAtPos(oldc.pos(), from_old, to_old);
+	
+	bool check = true;
+	if (!force) {
+	    check = false;
+    	if (&newc.top().inset() != &oldc.top().inset() || newc.top().pit() != oldc.top().pit()) {
+    		check = true;
+    	} else {
+			Paragraph const & newPar = newc.paragraph();
+    	    newPar.getWordAtPos(newc.pos(), from_new, to_new);
+        	
+        	if (from_new != from_old)
+        		check = true;
+        	if (from_old == to_old) //Word doesn't exist anymore
+        		check = true; //We must update the spelling table
+    	}
+	}
+        	 
+    if (check) {
+    	if(!oldPar.empty() && to_old <= oldPar.size()) {
+			return oldPar.checkWordSpelling(
+				oldc.buffer().params(), from_old, to_old);
+    	}
+    }
+	return false;
 }
 
 
@@ -661,7 +707,8 @@
 {
 	LCursor old = cur;
 	setCursorIntern(cur, par, pos, setfont, boundary);
-	return deleteEmptyParagraphMechanism(cur, old);
+	return updateSpelling(old, cur, false) | 
+		deleteEmptyParagraphMechanism(cur, old);
 }
 
 
@@ -1004,6 +1051,8 @@
 		// Steps into the paragraph above
 		return setCursor(cur, cur.pit() - 1, getPar(cur.pit() - 1).size());
 	}
+	
+	updateSpelling(cur, cur, true); //Force a spelling update.
 	return false;
 }
 
@@ -1033,6 +1082,8 @@
 
 	if (cur.pit() != cur.lastpit())
 		return setCursor(cur, cur.pit() + 1, 0);
+	
+	updateSpelling(cur, cur, true); //Force a spelling update.
 	return false;
 }
 
@@ -1062,7 +1113,8 @@
 		if (dummy == old)
 			++dummy.pos();
 
-		return deleteEmptyParagraphMechanism(dummy, old);
+		return updateSpelling(old, cur, false) || 
+			deleteEmptyParagraphMechanism(dummy, old);
 	}
 
 	bool updateNeeded = false;
@@ -1107,8 +1159,8 @@
 		LCursor dummy = cur;
 		if (dummy == old)
 			++dummy.pos();
-
-		bool const changed = deleteEmptyParagraphMechanism(dummy, old);
+			
+		bool changed = deleteEmptyParagraphMechanism(dummy, old);
 
 		// Make sure that cur gets back whatever happened to dummy(Lgb)
 		if (changed)
@@ -1114,6 +1166,7 @@
 		if (changed)
 			cur = dummy;
 
+		changed |= updateSpelling(old, cur, false);
 		return changed;
 	}
 
Index: src/text3.C
===================================================================
--- src/text3.C	(revision 13875)
+++ src/text3.C	(working copy)
@@ -116,6 +116,7 @@
 
 	void moveCursor(LCursor & cur, bool selecting)
 	{
+		LCursor old = cur;
 		if (selecting || cur.mark())
 			cur.setSelection();
 		if (!cur.selection())
@@ -272,6 +273,8 @@
 
 	if (gotsel && pastesel)
 		cur.bv().owner()->dispatch(FuncRequest(LFUN_PASTE));
+		
+	(*cur.bv().getLyXText()).updateSpelling(cur, cur, true);
 	return true;
 }
 

Attachment: pgph03pV68Fqr.pgp
Description: PGP signature

Reply via email to