Revision: 7320
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7320&view=rev
Author:   milek_pl
Date:     2012-06-12 09:41:31 +0000 (Tue, 12 Jun 2012)
Log Message:
-----------
fix some outstanding bugs with disambiguator log, add tests

Modified Paths:
--------------
    trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java
    
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
    
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
    trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
    trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
    
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java    
2012-06-11 17:39:10 UTC (rev 7319)
+++ trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java    
2012-06-12 09:41:31 UTC (rev 7320)
@@ -401,7 +401,7 @@
  * @param historicalAnnotations the historicalAnnotations to set
  */
 public void setHistoricalAnnotations(String historicalAnnotations) {
-    this.historicalAnnotations = this.historicalAnnotations + "\n" + 
historicalAnnotations;
+    this.historicalAnnotations = historicalAnnotations;
 }
   
 }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
  2012-06-11 17:39:10 UTC (rev 7319)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
  2012-06-12 09:41:31 UTC (rev 7320)
@@ -139,8 +139,9 @@
                       anTokens[finalLen].getToken(), "</"
                               + mFull.get(tokens.toString()) + ">", 
tokens.toString());
               oldReading = output[finalLen].toString();
+              final String prevAnot = 
output[finalLen].getHistoricalAnnotations();
               output[finalLen].addReading(tokenEnd);
-              output[finalLen].setHistoricalAnnotations("MULTIWORD_CHUNKER" 
+              output[finalLen].setHistoricalAnnotations(prevAnot + 
"\nMULTIWORD_CHUNKER" 
                       + ": " + oldReading + " -> " + output[i].toString());
             }
             lenCounter++;
@@ -171,8 +172,9 @@
                       "</" + mFull.get(tokens.toString()) + ">",
                       tokens.toString());
               oldReading = output[i + len - 1].toString();
+              final String prevAnot = output[i + len - 
1].getHistoricalAnnotations();
               output[i + len - 1].addReading(tokenEnd);
-              output[i + len - 1].setHistoricalAnnotations("MULTIWORD_CHUNKER" 
+              output[i + len - 1].setHistoricalAnnotations(prevAnot + 
"\nMULTIWORD_CHUNKER" 
                       + ": " + oldReading + " -> " + output[i].toString());
             }
           }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
   2012-06-11 17:39:10 UTC (rev 7319)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
   2012-06-12 09:41:31 UTC (rev 7320)
@@ -205,15 +205,7 @@
         correctedStPos += tokenPositions[l];
       }
       correctedStPos--;
-    }
-    int correctedEndPos = 0;
-    if (endPositionCorrection < 0) {
-      int l = 0;
-      while (l > endPositionCorrection) {
-        correctedEndPos -= tokenPositions[matchingTokens + l - 1];
-        l--;
-      }
-    }
+    }    
     final int fromPos = text.getOriginalPosition(firstMatchToken + 
correctedStPos);
     final int numRead = whTokens[fromPos].getReadingsLength();   
     final boolean spaceBefore = whTokens[fromPos].isWhitespaceBefore();
@@ -232,8 +224,9 @@
                     + i);            
             unifiedTokens[i].setStartPos(whTokens[position].getStartPos());
             final String prevValue = whTokens[position].toString(); 
+            final String prevAnot = 
whTokens[position].getHistoricalAnnotations();
             whTokens[position] = unifiedTokens[i];
-            annotateChange(whTokens[position], prevValue);
+            annotateChange(whTokens[position], prevValue, prevAnot);
           }          
         }
       }
@@ -246,8 +239,9 @@
                     final int position = 
text.getOriginalPosition(firstMatchToken + correctedStPos
                             + i);
                     final String prevValue = whTokens[position].toString();
+                    final String prevAnot = 
whTokens[position].getHistoricalAnnotations();
                     whTokens[position].removeReading(newTokenReadings[i]);
-                    annotateChange(whTokens[position], prevValue);            
+                    annotateChange(whTokens[position], prevValue, prevAnot);   
         
                 }
             }
         }
@@ -274,8 +268,9 @@
             final int position = text.getOriginalPosition(firstMatchToken + 
correctedStPos
                     + i);
             final String prevValue = whTokens[position].toString();
+            final String prevAnot = 
whTokens[position].getHistoricalAnnotations();
             whTokens[position].addReading(newTok);
-            annotateChange(whTokens[position], prevValue);
+            annotateChange(whTokens[position], prevValue, prevAnot);
           }
         }
       }
@@ -291,8 +286,9 @@
             false, false, Match.IncludeRange.NONE);
         tmpMatchToken.setToken(whTokens[fromPos]);
         final String prevValue = whTokens[fromPos].toString();
+        final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
         whTokens[fromPos] = tmpMatchToken.filterReadings();    
-        annotateChange(whTokens[fromPos], prevValue);
+        annotateChange(whTokens[fromPos], prevValue, prevAnot);
         filtered = true;
       }
     case REPLACE:
@@ -326,22 +322,25 @@
             whTokens[fromPos].setParaEnd();
           }
           whTokens[fromPos].setWhitespaceBefore(spaceBefore);
-          annotateChange(whTokens[fromPos], prevValue);
+          final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
+          annotateChange(whTokens[fromPos], prevValue, prevAnot);
         } else {
           // using the match element
           matchElement.setToken(whTokens[fromPos]);
           final String prevValue = whTokens[fromPos].toString();
+          final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
           whTokens[fromPos] = matchElement.filterReadings();
           whTokens[fromPos].setWhitespaceBefore(spaceBefore);
-          annotateChange(whTokens[fromPos], prevValue);
+          annotateChange(whTokens[fromPos], prevValue, prevAnot);
         }
       }
     }
     return whTokens;
   }
 
-  private void annotateChange(AnalyzedTokenReadings atr, final String 
prevValue) {      
-      atr.setHistoricalAnnotations(this.getId() + ": " + prevValue + " -> " + 
atr.toString());
+  private void annotateChange(AnalyzedTokenReadings atr, final String 
prevValue, String prevAnot) {      
+      atr.setHistoricalAnnotations(prevAnot + "\n" +
+              this.getId() + ": " + prevValue + " -> " + atr.toString());
   }
   
   /**

Modified: trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java        
2012-06-11 17:39:10 UTC (rev 7319)
+++ trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java        
2012-06-12 09:41:31 UTC (rev 7320)
@@ -163,6 +163,20 @@
     assertEquals(17, matches.get(0).getColumn());
     matches = tool.check("To jest tekst. To jest linia w której nie ma 
przecinka.");
     assertEquals(24, matches.get(0).getColumn());
+    
+    //and let's test other feats
+    AnalyzedSentence sent = tool.getAnalyzedSentence("Z powodu pogody dobre 
buty są wskazane.");
+    assertEquals("Disambiguator log: "
+            + "\nMULTIWORD_CHUNKER: Z[z/prep:gen.inst] -> Z[z/prep:gen.inst,Z 
powodu/<PREP:GEN>]"
+            + "\n\nMULTIWORD_CHUNKER: powodu[powód/subst:sg:gen:m3] -> 
Z[z/prep:gen.inst,Z powodu/<PREP:GEN>]\n",
+            sent.getAnnotations());
+    sent = tool.getAnalyzedSentence("Nie mamy żadnej ryby.");
+    assertEquals("Disambiguator log: "
+            + "\n\nNIE_ADAMP: 
Nie[nie/qub,on/ppron3:pl:acc:f.m2.m3.n.p2.p3:ter:praep,on/ppron3:sg:acc:n:ter:praep]
 -> Nie[nie/qub]"
+            + "\n\nunify_adj_subst: 
żadnej[żaden/adj:sg:dat:f:pos,żaden/adj:sg:gen:f:pos,żaden/adj:sg:loc:f:pos] -> 
żadnej[żaden/adj:sg:gen:f:pos]" +
+            "\n\nunify_adj_subst: 
ryby[ryba/subst:pl:acc:f,ryba/subst:pl:nom:f,ryba/subst:pl:voc:f,ryba/subst:sg:gen:f]
 -> ryby[ryba/subst:sg:gen:f]\n",
+            sent.getAnnotations());
+    
   }
   
   public void testSlovenian() throws IOException {

Modified: trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java     
2012-06-11 17:39:10 UTC (rev 7319)
+++ trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java     
2012-06-12 09:41:31 UTC (rev 7320)
@@ -26,77 +26,77 @@
 
 public class LanguageTest {
 
-       @Test
-       public void testGetLanguageForShortName() {
-               assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForShortName("en-US"));
-               assertEquals(Language.GERMAN, 
Language.getLanguageForShortName("de"));
-       }
-       
-       @Test
-       public void testGetShortNameWithVariant() {
-               assertEquals("en-US", 
Language.AMERICAN_ENGLISH.getShortNameWithVariant());
-               assertEquals("de", Language.GERMAN.getShortNameWithVariant());
-       }
+    @Test
+    public void testGetLanguageForShortName() {
+        assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForShortName("en-US"));
+        assertEquals(Language.GERMAN, Language.getLanguageForShortName("de"));
+    }
 
-  @Test(expected=IllegalArgumentException.class)
-       public void testInvalidShortName1() {
-    Language.getLanguageForShortName("de-");
-  }
+    @Test
+    public void testGetShortNameWithVariant() {
+        assertEquals("en-US", 
Language.AMERICAN_ENGLISH.getShortNameWithVariant());
+        assertEquals("de", Language.GERMAN.getShortNameWithVariant());
+    }
 
-  @Test(expected=IllegalArgumentException.class)
-       public void testInvalidShortName2() {
-    Language.getLanguageForShortName("dexx");
-  }
+    @Test(expected=IllegalArgumentException.class)
+    public void testInvalidShortName1() {
+        Language.getLanguageForShortName("de-");
+    }
 
-  @Test(expected=IllegalArgumentException.class)
-       public void testInvalidShortName3() {
-    Language.getLanguageForShortName("xyz-xx");
-  }
+    @Test(expected=IllegalArgumentException.class)
+    public void testInvalidShortName2() {
+        Language.getLanguageForShortName("dexx");
+    }
 
-       @Test
-       public void testGetLanguageForName() {
-               assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForName("American English"));
-               assertEquals(Language.GERMAN, 
Language.getLanguageForName("German"));
-       }
+    @Test(expected=IllegalArgumentException.class)
+    public void testInvalidShortName3() {
+        Language.getLanguageForShortName("xyz-xx");
+    }
 
-  @Test
-       public void testIsVariant() {
-               
assertTrue(Language.getLanguageForShortName("en-US").isVariant());
-    assertTrue(Language.getLanguageForShortName("de-CH").isVariant());
+    @Test
+    public void testGetLanguageForName() {
+        assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForName("American English"));
+        assertEquals(Language.GERMAN, Language.getLanguageForName("German"));
+    }
 
-               assertFalse(Language.getLanguageForShortName("en").isVariant());
-    assertFalse(Language.getLanguageForShortName("de").isVariant());
-       }
+    @Test
+    public void testIsVariant() {
+        assertTrue(Language.getLanguageForShortName("en-US").isVariant());
+        assertTrue(Language.getLanguageForShortName("de-CH").isVariant());
 
-  @Test
-       public void testHasVariant() {
-               assertTrue(Language.getLanguageForShortName("en").hasVariant());
-    assertTrue(Language.getLanguageForShortName("de").hasVariant());
+        assertFalse(Language.getLanguageForShortName("en").isVariant());
+        assertFalse(Language.getLanguageForShortName("de").isVariant());
+    }
 
-    assertFalse(Language.getLanguageForShortName("en-US").hasVariant());
-    assertFalse(Language.getLanguageForShortName("de-CH").hasVariant());
-    assertFalse(Language.getLanguageForShortName("ast").hasVariant());
-    assertFalse(Language.getLanguageForShortName("pl").hasVariant());
+    @Test
+    public void testHasVariant() {
+        assertTrue(Language.getLanguageForShortName("en").hasVariant());
+        assertTrue(Language.getLanguageForShortName("de").hasVariant());
 
-    for (Language language : Language.LANGUAGES) {
-      if (language.hasVariant()) {
-        assertNotNull("Language " + language + " needs a default variant", 
language.getDefaultVariant());
-      }
+        assertFalse(Language.getLanguageForShortName("en-US").hasVariant());
+        assertFalse(Language.getLanguageForShortName("de-CH").hasVariant());
+        assertFalse(Language.getLanguageForShortName("ast").hasVariant());
+        assertFalse(Language.getLanguageForShortName("pl").hasVariant());
+
+        for (Language language : Language.LANGUAGES) {
+            if (language.hasVariant()) {
+                assertNotNull("Language " + language + " needs a default 
variant", language.getDefaultVariant());
+            }
+        }
     }
-       }
 
-  @Test
-       public void testGetLanguageForLocale() {
-    assertEquals(Language.GERMANY_GERMAN, Language.getLanguageForLocale(new 
Locale("de", "DE")));
-    assertEquals(Language.AUSTRIAN_GERMAN, Language.getLanguageForLocale(new 
Locale("de", "AT")));
-    assertEquals(Language.AMERICAN_ENGLISH, Language.getLanguageForLocale(new 
Locale("en", "US")));
-    assertEquals(Language.BRITISH_ENGLISH, Language.getLanguageForLocale(new 
Locale("en", "GB")));
-    // fallback to the language's default variant if not specified:
-    assertEquals(Language.AMERICAN_ENGLISH, Language.getLanguageForLocale(new 
Locale("en")));
-    assertEquals(Language.GERMANY_GERMAN, Language.getLanguageForLocale(new 
Locale("de")));
-    // final fallback is everything else fails:
-    assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForLocale(Locale.JAPANESE));
-    assertEquals(Language.AMERICAN_ENGLISH, Language.getLanguageForLocale(new 
Locale("zz")));
-  }
+    @Test
+    public void testGetLanguageForLocale() {
+        assertEquals(Language.GERMANY_GERMAN, 
Language.getLanguageForLocale(new Locale("de", "DE")));
+        assertEquals(Language.AUSTRIAN_GERMAN, 
Language.getLanguageForLocale(new Locale("de", "AT")));
+        assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForLocale(new Locale("en", "US")));
+        assertEquals(Language.BRITISH_ENGLISH, 
Language.getLanguageForLocale(new Locale("en", "GB")));
+        // fallback to the language's default variant if not specified:
+        assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForLocale(new Locale("en")));
+        assertEquals(Language.GERMANY_GERMAN, 
Language.getLanguageForLocale(new Locale("de")));
+        // final fallback is everything else fails:
+        assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForLocale(Locale.JAPANESE));
+        assertEquals(Language.AMERICAN_ENGLISH, 
Language.getLanguageForLocale(new Locale("zz")));
+    }
 
 }

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
      2012-06-11 17:39:10 UTC (rev 7319)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
      2012-06-12 09:41:31 UTC (rev 7320)
@@ -22,6 +22,10 @@
 import java.io.IOException;
 
 import junit.framework.TestCase;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.Language;
+import org.languagetool.JLanguageTool;
 import org.languagetool.TestTools;
 import org.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
 import org.languagetool.tagging.fr.FrenchTagger;
@@ -34,13 +38,19 @@
   private SentenceTokenizer sentenceTokenizer;
   private FrenchRuleDisambiguator disambiguator;
   private DemoDisambiguator disamb2;
+  private JLanguageTool lt; 
   
   public void setUp() {
     tagger = new FrenchTagger();
     tokenizer = new WordTokenizer();
     sentenceTokenizer = new SentenceTokenizer();
     disambiguator = new FrenchRuleDisambiguator();
-    disamb2 = new DemoDisambiguator(); 
+    disamb2 = new DemoDisambiguator();    
+    try {
+        lt = new JLanguageTool(Language.FRENCH);
+    } catch (IOException e) {
+        fail(e.getMessage());
+    }
   }
 
   public void testChunker() throws IOException {
@@ -75,6 +85,14 @@
         "/[null]SENT_START Je/[je]R pers suj 1 s  /[null]null suis/[suivre]V 
imp pres 2 s|suis/[suivre]V ind pres 1 s|suis/[suivre]V ind pres 2 
s|suis/[être]V etre ind pres 1 s  /[null]null petite/[petit]J f 
s|petite/[petit]N f s ./[null]null", 
         tokenizer, sentenceTokenizer, tagger, disamb2);
   }
+
+  public void testAnnotations() throws IOException {
+     AnalyzedSentence sent = lt.getAnalyzedSentence("Les avions");
+     assertEquals(sent.getAnnotations(), "Disambiguator log: \n\n" +
+               "RP-D_N_AMBIG: Les[le/D e p,les/R pers obj 3 p] -> Les[le/D e 
p]"+
+             "\nRB-LE_LA_LES: Les[le/D e p] -> Les[le/D e p]" +
+               "\n\nRP-D_N_AMBIG: avions[avoir/V avoir ind impa 1 p,avion/N m 
p,avoir/SENT_END] -> avions[avion/N m p,avion/SENT_END]\n");
+  }
   
 }
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to