[LanguageTool] SF.net SVN: languagetool:[7206] trunk/JLanguageTool

milek_pl Sun, 03 Jun 2012 05:21:22 -0700

Revision: 7206
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7206&view=rev
Author:   milek_pl
Date:     2012-06-03 12:21:08 +0000 (Sun, 03 Jun 2012)
Log Message:
-----------
new feature: suppress misspelled suggestions


Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
    trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java
    
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
    trunk/JLanguageTool/src/rules/en/grammar.xml
    trunk/JLanguageTool/src/rules/pattern.xsd
    trunk/JLanguageTool/src/rules/rules.xsd
    trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/CHANGES.txt     2012-06-03 12:21:08 UTC (rev 7206)
@@ -78,7 +78,14 @@
 
  -The XML format for rules has been changed to use <marker>...</marker> tags 
instead
   of mark_from and mark_to attributes
-
+  
+ -Now it is possible to suppress misspelled suggestions altogether in XML 
rules by applying 
+  an attribute suppress_misspelled="yes" on the <suggestion> element, AND on 
the <match>
+  element. If only <match> element has this attribute set to "yes", then the 
suggestion is
+  displayed, but no content of <match> is contained within (this might be a 
conditional part
+  of the suggestion). Note: for this to work, the tagger dictionary needs to 
be fairly complete;
+  words without lemmas and POS tags are considered to be misspelled.   
+  
  -GUI: made the result of "Tag Text" more readable
 
  -Improved startup speed (Jarek Lipski)
@@ -88,9 +95,10 @@
  -In the profiling rules' mode on the command-line, you can now enable 
   and disable rules.
   
- -Some internal bug fixing in disambiguation and pattern rules.  
+ -Some internal bug fixing in disambiguation and pattern rules.
+ 
+ 
 
-
 1.7 (2012-03-25)
 
  -English:

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java     
2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java     
2012-06-03 12:21:08 UTC (rev 7206)
@@ -28,6 +28,7 @@
 import org.languagetool.AnalyzedToken;
 import org.languagetool.AnalyzedTokenReadings;
 import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
 import org.languagetool.synthesis.Synthesizer;
 import org.languagetool.tools.StringTools;
 
@@ -80,6 +81,7 @@
 
   private final String posTag;
   private boolean postagRegexp;
+  private final boolean suppressMisspelled;
   private final String regexReplace;
   private final String posTagReplace;
   private final CaseConversion caseConversionType;
@@ -121,6 +123,7 @@
       final boolean postagRegexp, final String regexMatch,
       final String regexReplace, final CaseConversion caseConversionType,
       final boolean setPOS,
+      final boolean suppressMisspelled,
       final IncludeRange includeSkipped) {
     this.posTag = posTag;
     this.postagRegexp = postagRegexp;
@@ -137,6 +140,7 @@
     this.posTagReplace = posTagReplace;
     this.setPos = setPOS;
     this.includeSkipped = includeSkipped;
+    this.suppressMisspelled = suppressMisspelled;
   }
 
   /**
@@ -234,14 +238,24 @@
     synthesizer = synth;
   }
 
+  
   /**
+   * Used to tell whether the Match class will spell-check the result.
+   * @return True if this is so.
+   */
+  public final boolean checksSpelling() {
+      return suppressMisspelled;
+  }
+  
+  /**
    * Gets all strings formatted using the match element.
+ * @param lang TODO
    * 
    * @return array of strings
    * @throws IOException
    *           in case of synthesizer-related disk problems.
    */
-  public final String[] toFinalString() throws IOException {
+  public final String[] toFinalString(Language lang) throws IOException {
     String[] formattedString = new String[1];
     if (formattedToken != null) {
       final int readingCount = formattedToken.getReadingsLength();
@@ -287,8 +301,12 @@
               }
             }
           }
-          if (wordForms.isEmpty()) {
-            formattedString[0] = "(" + formattedToken.getToken() + ")";
+          if (wordForms.isEmpty()) {            
+              if (this.suppressMisspelled) {
+                  formattedString[0] = "";
+              } else {
+                  formattedString[0] = "(" + formattedToken.getToken() + ")";
+              }
           } else {
             formattedString = wordForms.toArray(new String[wordForms.size()]);
           }
@@ -324,8 +342,24 @@
         }
         helper[i] = formattedString[i] + skippedTokens;  
       }
-      formattedString = helper;
+      
+          formattedString = helper;      
+    
     }
+    if (this.suppressMisspelled && lang != null) {
+      List<String> formattedStringElements = new 
ArrayList<String>(formattedString.length);
+      for (final String str : formattedString) {
+          formattedStringElements.add(str);
+      }
+      //tagger-based speller
+      List<AnalyzedTokenReadings> analyzed = 
lang.getTagger().tag(formattedStringElements);      
+      for (int i = 0; i < formattedString.length; i++) {
+          if (analyzed.get(i).getAnalyzedToken(0).getLemma() == null
+                  && analyzed.get(i).getAnalyzedToken(0).hasNoTag()) {
+              formattedString[i] = "";
+          }
+      }
+    }
     return formattedString;
   }
 
@@ -393,7 +427,7 @@
    */
   public final String toTokenString() throws IOException {
     final StringBuilder output = new StringBuilder();
-    final String[] stringToFormat = toFinalString();
+    final String[] stringToFormat = toFinalString(null);
     for (int i = 0; i < stringToFormat.length; i++) {
       output.append(stringToFormat[i]);
       if (i + 1 < stringToFormat.length) {

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
        2012-06-03 11:35:42 UTC (rev 7205)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
        2012-06-03 12:21:08 UTC (rev 7206)
@@ -141,7 +141,10 @@
       defaultOn = "on".equals(attrs.getValue(DEFAULT));
       inRuleGroup = true;
       subId = 0;
-    } else if ("suggestion".equals(qName) && inMessage) {
+    } else if ("suggestion".equals(qName) && inMessage) {      
+      if (YES.equals(attrs.getValue("suppress_misspelled"))) {
+          message.append("<pleasespellme/>");
+      }
       message.append("<suggestion>");
       inSuggestion = true;
     } else if (MATCH.equals(qName)) {
@@ -250,7 +253,7 @@
     } else if (RULEGROUP.equals(qName)) {
       inRuleGroup = false;
     } else if ("suggestion".equals(qName) && inMessage) {
-      message.append("</suggestion>");
+      message.append("</suggestion>");      
       inSuggestion = false;
     } else if (MARKER.equals(qName) && inCorrectExample) {
       correctExample.append("</marker>");

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
        2012-06-03 11:35:42 UTC (rev 7205)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
        2012-06-03 12:21:08 UTC (rev 7206)
@@ -155,8 +155,16 @@
       + tokens[lastMatchToken + correctedEndPos].getToken().length();
       if (fromPos < toPos) { // this can happen with some skip="-1" when the 
last
         // token is not matched
+
+         //now do some spell-checking:
+        if (!(errMessage.contains("<pleasespellme/>") && 
+                errMessage.contains("<mistake/>"))) {
+            //remove stupid markers
+        errMessage.replace("<pleasespellme/>", "");
+        errMessage.replace("<mistake/>","");
         return new RuleMatch(rule, fromPos, toPos,
             errMessage, rule.getShortMessage(), startsWithUppercase);
+        }
       } // failed to create any rule match...
       return null;
     }
@@ -360,7 +368,14 @@
         final int skippedTokens = nextTokenPos - tokenIndex;
         suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, 
skippedTokens);
         suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
-        finalMatch = suggestionMatches.get(start).toFinalString();
+        finalMatch = suggestionMatches.get(start).toFinalString(language);
+        if (suggestionMatches.get(start).checksSpelling()
+                && finalMatch.length == 1
+                && "".equals(finalMatch[0])) {
+            finalMatch = new String[1];
+            finalMatch[0] = "<mistake/>";
+        }
+                
       } else {
         final List<String[]> matchList = new ArrayList<String[]>();
         for (int i = 0; i < len; i++) {
@@ -368,7 +383,7 @@
           suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, 
skippedTokens);
           suggestionMatches.get(start)
                   .setSynthesizer(language.getSynthesizer());
-          matchList.add(suggestionMatches.get(start).toFinalString());
+          matchList.add(suggestionMatches.get(start).toFinalString(language));
         }
         return combineLists(matchList.toArray(new String[matchList.size()][]),
             new String[matchList.size()], 0, language);
@@ -407,25 +422,26 @@
    * @return Combined array of @String.
    */
   private static String[] combineLists(final String[][] input,
-      final String[] output, final int r, final Language lang) {
-    final List<String> outputList = new ArrayList<String>();
-    if (r == input.length) {
-      final StringBuilder sb = new StringBuilder();
-      for (int k = 0; k < output.length; k++) {
-        sb.append(output[k]);
-        if (k < output.length - 1) {
-          sb.append(StringTools.addSpace(output[k + 1], lang));
-        }
+          final String[] output, final int r, final Language lang) {
+      final List<String> outputList = new ArrayList<String>();
+      if (r == input.length) {
+          final StringBuilder sb = new StringBuilder();
+          for (int k = 0; k < output.length; k++) {
+              sb.append(output[k]);
+              if (k < output.length - 1) {
+                  sb.append(StringTools.addSpace(output[k + 1], lang));
+              }
+
+          }
+          outputList.add(sb.toString());
+      } else {
+          for (int c = 0; c < input[r].length; c++) {
+              output[r] = input[r][c];
+              final String[] sList = combineLists(input, output, r + 1, lang);
+              outputList.addAll(Arrays.asList(sList));              
+          }
       }
-      outputList.add(sb.toString());
-    } else {
-      for (int c = 0; c < input[r].length; c++) {
-        output[r] = input[r][c];
-        final String[] sList = combineLists(input, output, r + 1, lang);
-        outputList.addAll(Arrays.asList(sList));
-      }
-    }
-    return outputList.toArray(new String[outputList.size()]);
+      return outputList.toArray(new String[outputList.size()]);
   }
 
 }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
    2012-06-03 11:35:42 UTC (rev 7205)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
    2012-06-03 12:21:08 UTC (rev 7206)
@@ -325,6 +325,7 @@
         .equals(attrs.getValue(POSTAG_REGEXP)), attrs
         .getValue("regexp_match"), attrs.getValue("regexp_replace"),
         caseConversion, YES.equals(attrs.getValue("setpos")),
+        YES.equals(attrs.getValue("suppress_misspelled")),
         includeRange);
     mWorker.setInMessageOnly(!inSuggestion);
     if (inMessage) {
@@ -460,7 +461,7 @@
         if (Character.isDigit(messageStr.charAt(pos + 1))) {
           if (pos == 1 || messageStr.charAt(pos - 1) != '\u0001') {
             final Match mWorker = new Match(null, null, false, null, 
-                null, Match.CaseConversion.NONE, false, 
Match.IncludeRange.NONE);
+                null, Match.CaseConversion.NONE, false, false, 
Match.IncludeRange.NONE);
             mWorker.setInMessageOnly(true);
             sugMatch.add(mWorker);
           } else if (messageStr.charAt(pos - 1) == '\u0001') { // real 
suggestion marker

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java    
2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java    
2012-06-03 12:21:08 UTC (rev 7206)
@@ -40,8 +40,8 @@
   /** Generates a form of the word with a given POS tag for a given lemma.
    * POS tag can be specified using regular expressions. 
    * @param token the token to be used for synthesis
-   * @param posTag POS tag of the form to be generated.
-   * @param posTagRegExp Specifies whether the posTag string is a 
+ * @param posTag POS tag of the form to be generated.
+ * @param posTagRegExp Specifies whether the posTag string is a 
    *  regular expression. 
    **/
   public String[] synthesize(AnalyzedToken token, String posTag, boolean 
posTagRegExp) throws IOException;

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
   2012-06-03 11:35:42 UTC (rev 7205)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
   2012-06-03 12:21:08 UTC (rev 7206)
@@ -280,7 +280,7 @@
       if (matchElement == null) { // same as REPLACE if using <match>
         final Match tmpMatchToken = new Match(disambiguatedPOS, null, true,
             disambiguatedPOS, null, Match.CaseConversion.NONE, 
-            false, Match.IncludeRange.NONE);
+            false, false, Match.IncludeRange.NONE);
         tmpMatchToken.setToken(whTokens[fromPos]);        
         whTokens[fromPos] = tmpMatchToken.filterReadings();        
         filtered = true;

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
    2012-06-03 11:35:42 UTC (rev 7205)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
    2012-06-03 12:21:08 UTC (rev 7206)
@@ -173,6 +173,7 @@
           .equals(attrs.getValue(POSTAG_REGEXP)), attrs
           .getValue("regexp_match"), attrs.getValue("regexp_replace"),
           caseConversion, YES.equals(attrs.getValue("setpos")),
+          YES.equals(attrs.getValue("supress_mispelled")),
           includeRange);
       if (inDisambiguation) {
         if (attrs.getValue(NO) != null) {

Modified: trunk/JLanguageTool/src/rules/en/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/en/grammar.xml        2012-06-03 11:35:42 UTC 
(rev 7205)
+++ trunk/JLanguageTool/src/rules/en/grammar.xml        2012-06-03 12:21:08 UTC 
(rev 7206)
@@ -7798,7 +7798,7 @@
                 </marker>                
                 <token><exception postag="NN.*" 
postag_regexp="yes"></exception><exception>'</exception></token>                
             </pattern>
-            <message>Possible agreement error. The noun <match no="2"></match> 
seems to be countable, so probably you should use: <suggestion><match 
no="1"></match> <match no="2" postag="NNS|NNPS" 
postag_regexp="yes"></match></suggestion>.</message>
+            <message>Possible agreement error. The noun <match no="2"></match> 
seems to be countable, so probably you should use: <suggestion 
suppress_misspelled="yes" ><match no="1"></match> <match no="2" 
suppress_misspelled="yes" postag="NNS|NNPS" 
postag_regexp="yes"></match></suggestion>.</message>
             <short>Grammatical problem</short>
             <example correction="five books" type="incorrect">I have 
<marker>five book</marker>.</example>
             <example type="correct">I have <marker>ten 
books</marker>.</example>
@@ -7813,6 +7813,7 @@
             <example type="correct">$800 billion economy was envisioned for 
the 1970s</example>
             <example type="correct">Middle East peace after Israel's 1956 
invasion of Egypt</example>
             <example type="correct">The Apollo 8 mission was well covered in 
the British documentary.</example>
+            <example type="correct">This is 3 H2O.</example>   
         </rule>
         <rule id="MANY_NN" name="Possible agreement error: 'many/several/few' 
+ singular countable noun">
             <pattern>

Modified: trunk/JLanguageTool/src/rules/pattern.xsd
===================================================================
--- trunk/JLanguageTool/src/rules/pattern.xsd   2012-06-03 11:35:42 UTC (rev 
7205)
+++ trunk/JLanguageTool/src/rules/pattern.xsd   2012-06-03 12:21:08 UTC (rev 
7206)
@@ -105,34 +105,36 @@
                <xs:documentation xml:lang="en">Reference to the pattern 
element.
                Might be used in a suggestion or in the 
pattern.</xs:documentation>
        </xs:annotation>
-       <xs:element name="match">       
+       <xs:element name="match">
        <xs:complexType mixed="true">
-                       <xs:attribute name="regexp_match" type="xs:string" 
use="optional" />
-                       <xs:attribute name="postag_regexp" type="binaryYesNo"
-                               use="optional" default="no" />
-                       <xs:attribute name="setpos" type="binaryYesNo" 
use="optional"
-                               default="no" />
-                       <xs:attribute name="case_conversion" use="optional">
-                               <xs:simpleType>
-                                       <xs:restriction base="xs:NMTOKEN">
-                                               <xs:enumeration 
value="startlower" />
-                                               <xs:enumeration 
value="startupper" />
-                                               <xs:enumeration 
value="allupper" />
-                                               <xs:enumeration 
value="alllower" />
-                                               <xs:enumeration 
value="preserve" />
-                                       </xs:restriction>
-                               </xs:simpleType>
-                       </xs:attribute>
-                       <xs:attribute name="regexp_replace" type="xs:string"
-                               use="optional" />
-                       <xs:attribute name="postag_replace" type="xs:string"
-                               use="optional" />
-                       <xs:attribute name="postag" type="xs:string" 
use="optional" />
-                       <xs:attribute name="no" type="xs:nonNegativeInteger"
-                               use="required" />
-                       <xs:attribute name="include_skipped" 
type="includeSelector" use="optional"
-                               default="none"/> 
-               </xs:complexType>
+               <xs:attribute name="regexp_match" type="xs:string"
+                       use="optional" />
+               <xs:attribute name="postag_regexp" type="binaryYesNo"
+                       use="optional" default="no" />
+               <xs:attribute name="setpos" type="binaryYesNo" use="optional"
+                       default="no" />
+               <xs:attribute name="case_conversion" use="optional">
+                       <xs:simpleType>
+                               <xs:restriction base="xs:NMTOKEN">
+                                       <xs:enumeration value="startlower" />
+                                       <xs:enumeration value="startupper" />
+                                       <xs:enumeration value="allupper" />
+                                       <xs:enumeration value="alllower" />
+                                       <xs:enumeration value="preserve" />
+                               </xs:restriction>
+                       </xs:simpleType>
+               </xs:attribute>
+               <xs:attribute name="suppress_misspelled" type="binaryYesNo" 
use="optional"/>
+               <xs:attribute name="regexp_replace" type="xs:string"
+                       use="optional" />
+               <xs:attribute name="postag_replace" type="xs:string"
+                       use="optional" />
+               <xs:attribute name="postag" type="xs:string" use="optional" />
+               <xs:attribute name="no" type="xs:nonNegativeInteger"
+                       use="required" />
+               <xs:attribute name="include_skipped" type="includeSelector"
+                       use="optional" default="none" />
+       </xs:complexType>
        </xs:element>
 
        <xs:element name="token">               

Modified: trunk/JLanguageTool/src/rules/rules.xsd
===================================================================
--- trunk/JLanguageTool/src/rules/rules.xsd     2012-06-03 11:35:42 UTC (rev 
7205)
+++ trunk/JLanguageTool/src/rules/rules.xsd     2012-06-03 12:21:08 UTC (rev 
7206)
@@ -169,11 +169,12 @@
                <xs:documentation xml:lang="en"> Suggestion displayed to
                        the user. </xs:documentation>
        </xs:annotation>
-       <xs:element name="suggestion">
-               <xs:complexType mixed="true">
+       <xs:element name="suggestion">  
+               <xs:complexType mixed="true">           
                        <xs:sequence minOccurs='0' maxOccurs='unbounded'>
                                <xs:element ref="match" />
                        </xs:sequence>
+                       <xs:attribute name="suppress_misspelled" 
type="binaryYesNo" use="optional"/>
                </xs:complexType>
        </xs:element>
 

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 
2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 
2012-06-03 12:21:08 UTC (rev 7206)
@@ -28,6 +28,7 @@
 import org.languagetool.AnalyzedToken;
 import org.languagetool.AnalyzedTokenReadings;
 import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
 import org.languagetool.language.Demo;
 import org.languagetool.rules.patterns.Match.CaseConversion;
 import org.languagetool.rules.patterns.Match.IncludeRange;
@@ -69,13 +70,23 @@
        }
 
        private Match getMatch(String posTag, String posTagReplace, 
CaseConversion caseConversion) throws UnsupportedEncodingException, IOException 
{
-               Match match = new Match(posTag, posTagReplace, true, null, 
null, caseConversion, false, IncludeRange.NONE);
+               Match match = new Match(posTag, posTagReplace, true, null, 
null, caseConversion, false, false, IncludeRange.NONE);
                match.setSynthesizer(synthesizer);
                return match;
        }
+       
+       private Match getMatch(String posTag, String posTagReplace, boolean 
spell) throws UnsupportedEncodingException, IOException {
+        Match match = new Match(posTag, posTagReplace, true, null, null, 
CaseConversion.NONE, false, spell, IncludeRange.NONE);        
+        return match;
+    }
+       
+       private Match getTextMatch(String regexMatch, String regexpReplace, 
boolean spell) throws UnsupportedEncodingException, IOException {
+        Match match = new Match(null, null, false, regexMatch, regexpReplace, 
CaseConversion.NONE, false, spell, IncludeRange.NONE);        
+        return match;
+    }
 
        private Match getMatch(String posTag, String posTagReplace, 
IncludeRange includeRange) throws UnsupportedEncodingException, IOException {
-               Match match = new Match(posTag, posTagReplace, true, null, 
null, CaseConversion.NONE, false, includeRange);
+               Match match = new Match(posTag, posTagReplace, true, null, 
null, CaseConversion.NONE, false, false, includeRange);
                match.setSynthesizer(synthesizer);
                return match;
        }
@@ -110,93 +121,93 @@
        public void testStartUpper() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.STARTUPPER);
                match.setToken(getAnalyzedTokenReadings("inflectedform11", 
"POS1", "Lemma1"));
-               assertEquals("[Inflectedform121, Inflectedform122]", 
Arrays.toString( match.toFinalString()));
+               assertEquals("[Inflectedform121, Inflectedform122]", 
Arrays.toString( match.toFinalString(null)));
        }
 
        public void testStartLower() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.STARTLOWER);
                match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
 
        public void testAllUpper() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.ALLUPPER);
                match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
-               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString(match.toFinalString(null)));
        }
 
        public void testAllLower() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.ALLLOWER);
                match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
 
        public void testPreserveStartUpper() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
                match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
-               assertEquals("[Inflectedform121, Inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[Inflectedform121, Inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testStaticLemmaPreserveStartLower() throws Exception {
                Match match = getMatch("POS2", "POS1", 
Match.CaseConversion.PRESERVE);
                match.setLemmaString("lemma2");
                match.setToken(getAnalyzedTokenReadings("inflectedform121", 
"POS2", "Lemma1"));
-               assertEquals("[inflectedform2]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform2]", 
Arrays.toString(match.toFinalString(null)));
        }
        public void testStaticLemmaPreserveStartUpper() throws Exception {
                Match match = getMatch("POS2", "POS1", 
Match.CaseConversion.PRESERVE);
                match.setLemmaString("lemma2");
                match.setToken(getAnalyzedTokenReadings("InflectedForm121", 
"POS2", "Lemma1"));
-               assertEquals("[Inflectedform2]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[Inflectedform2]", 
Arrays.toString(match.toFinalString(null)));
        }
        public void testStaticLemmaPreserveAllUpper() throws Exception {
                Match match = getMatch("POS2", "POS1", 
Match.CaseConversion.PRESERVE);
                match.setLemmaString("lemma2");
                match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM121", 
"POS2", "Lemma1"));
-               assertEquals("[INFLECTEDFORM2]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[INFLECTEDFORM2]", 
Arrays.toString(match.toFinalString(null)));
        }
        public void testStaticLemmaPreserveMixed() throws Exception {
                Match match = getMatch("POS2", "POS1", 
Match.CaseConversion.PRESERVE);
                match.setLemmaString("lemma2");
                match.setToken(getAnalyzedTokenReadings("infleCtedForm121", 
"POS2", "Lemma1"));
-               assertEquals("[inflectedform2]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform2]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testPreserveStartLower() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
                match.setToken(getAnalyzedTokenReadings("inflectedForm11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testPreserveAllUpper() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
                match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", 
"POS1", "Lemma1"));
-               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testPreserveMixed() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
                match.setToken(getAnalyzedTokenReadings("inflecTedForm11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
                
        }
 
        public void testPreserveNoneUpper() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.NONE);
                match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testPreserveNoneLower() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.NONE);
                match.setToken(getAnalyzedTokenReadings("inflectedform11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testPreserveNoneMixed() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.NONE);
                match.setToken(getAnalyzedTokenReadings("inFLectedFOrm11", 
"POS1", "Lemma1"));
-               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
 
                //-- INCLUDE RANGE 
@@ -204,31 +215,63 @@
        public void testSimpleIncludeFollowing() throws Exception {
                Match match = getMatch(null, null, 
Match.IncludeRange.FOLLOWING);
                match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
-               assertEquals("[inflectedform2 inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform2 inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
 
        public void testPOSIncludeFollowing() throws Exception {
                // POS is ignored when using IncludeRange.Following
                Match match = getMatch("POS2", "POS33", 
Match.IncludeRange.FOLLOWING); 
                match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
-               assertEquals("[inflectedform2 inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform2 inflectedform122]", 
Arrays.toString(match.toFinalString(null)));
        }
        
        public void testIncludeAll() throws Exception {
                Match match = getMatch(null, null, Match.IncludeRange.ALL);
                match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
-               assertEquals("[inflectedform11inflectedform2 
inflectedform122]", Arrays.toString(match.toFinalString()));
-               // the first two tokens come together, it a known issue
+               assertEquals("[inflectedform11inflectedform2 
inflectedform122]", Arrays.toString(match.toFinalString(null)));
+               // the first two tokens come together, it is a known issue
        }
 
        public void testPOSIncludeAll() throws Exception {
                Match match = getMatch("POS1", "POS3", Match.IncludeRange.ALL); 
                match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
-               assertEquals("[inflectedform123inflectedform2 
inflectedform122]", Arrays.toString(match.toFinalString()));
+               assertEquals("[inflectedform123inflectedform2 
inflectedform122]", Arrays.toString(match.toFinalString(null)));
                // Note that in this case the first token has the requested POS 
(POS3 replaces POS1)
-               // the first two tokens come together, it a known issue. 
+               // the first two tokens come together, it is a known issue. 
        }
        
        // TODO ad tests for using Match.IncludeRange with {@link 
Match#staticLemma}
        
+       public void testSpeller() throws Exception {
+           //tests with synthesizer
+        Match match = getMatch("POS1", "POS2", true);
+        match.setSynthesizer(Language.POLISH.getSynthesizer());
+        match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1", 
"Lemma1"));
+        //getting empty strings, which is what we want
+        assertEquals("[]", Arrays.toString( 
match.toFinalString(Language.POLISH)));
+        
+        // contrast with a speller = false!
+        match = getMatch("POS1", "POS2", false);
+        match.setSynthesizer(Language.POLISH.getSynthesizer());
+        match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1", 
"Lemma1"));        
+        assertEquals("[(inflectedform11)]", Arrays.toString( 
match.toFinalString(Language.POLISH)));
+        
+        //and now a real word - we should get something
+        match = getMatch("subst:sg:acc.nom:m3", "subst:sg:gen:m3", true);
+        match.setSynthesizer(Language.POLISH.getSynthesizer());
+        match.setToken(getAnalyzedTokenReadings("AON", "subst:sg:acc.nom:m3", 
"AON"));
+        assertEquals("[AON-u]", Arrays.toString( 
match.toFinalString(Language.POLISH)));
+        
+        //and now pure text changes        
+        match = getTextMatch("^(.*)$", "$0-u", true);
+        match.setSynthesizer(Language.POLISH.getSynthesizer());
+        match.setLemmaString("AON");
+        assertEquals("[AON-u]", Arrays.toString( 
match.toFinalString(Language.POLISH)));
+        match.setLemmaString("batalion");
+        //should be empty
+        assertEquals("[]", Arrays.toString( 
match.toFinalString(Language.POLISH)));
+        match.setLemmaString("ASEAN");
+        //and this one not
+        assertEquals("[ASEAN-u]", Arrays.toString( 
match.toFinalString(Language.POLISH)));
+    }
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[7206] trunk/JLanguageTool

Reply via email to