Revision: 7660
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7660&view=rev
Author:   milek_pl
Date:     2012-07-07 09:48:09 +0000 (Sat, 07 Jul 2012)
Log Message:
-----------
remove hack and make class safer for different tokenizers that result with 
variable-length separators

Modified Paths:
--------------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
     2012-07-07 09:01:46 UTC (rev 7659)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
     2012-07-07 09:48:09 UTC (rev 7660)
@@ -51,9 +51,4 @@
     return BRETON_TOKENIZING_CHARS;
   }
 
-  @Override
-  public int separatorLength() {
-    return 1;
-  }
-
 }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
  2012-07-07 09:01:46 UTC (rev 7659)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
  2012-07-07 09:48:09 UTC (rev 7660)
@@ -26,6 +26,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.ResourceBundle;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import morfologik.speller.Speller;
@@ -73,7 +74,7 @@
     
     @Override
     public RuleMatch[] match(AnalyzedSentence text) throws IOException {
-        
+
         final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
         final AnalyzedTokenReadings[] tokens = 
text.getTokensWithoutWhitespace();
         //lazy init
@@ -93,14 +94,22 @@
                 if (tokenizingPattern() == null) {
                     ruleMatches.addAll(getRuleMatch(word, 
token.getStartPos()));
                 } else {
-                    int i = 0;
-                    for (final String internalSplit : 
tokenizingPattern().split(word)) {
-                        ruleMatches.addAll(getRuleMatch(internalSplit, 
token.getStartPos() + i));
-                        i += internalSplit.length() + separatorLength();
+                    int index = 0;
+                    final Matcher m = tokenizingPattern().matcher(word);
+                    while(m.find()) {
+                        final String match = word.subSequence(index, 
m.start()).toString();
+                        index = m.end();
+                        ruleMatches.addAll(getRuleMatch(match, 
token.getStartPos() + index));
                     }
+                    if (index == 0) { // tokenizing char not found
+                        ruleMatches.addAll(getRuleMatch(word, 
token.getStartPos()));
+                    } else {
+                        ruleMatches.addAll(getRuleMatch(word.subSequence(
+                                index, word.length()).toString(), 
token.getStartPos() + index)); 
+                    }
                 }
             }
-        }
+        }        
         return toRuleMatchArray(ruleMatches);
     }
     
@@ -142,13 +151,16 @@
         }
         return false;
     }
-    
+               
+    /**
+     * Get the regular expression pattern used to tokenize
+     * the words as in the source dictionary. For example,
+     * it may contain a hyphen, if the words with hyphens are
+     * not included in the dictionary
+     * @return A compiled {@link #Pattern} that is used to tokenize words. 
+     */
     public Pattern tokenizingPattern() {
         return null;
     }
     
-    public int separatorLength() {
-        return 0;
-    }
-
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to