Revision: 7320
http://languagetool.svn.sourceforge.net/languagetool/?rev=7320&view=rev
Author: milek_pl
Date: 2012-06-12 09:41:31 +0000 (Tue, 12 Jun 2012)
Log Message:
-----------
fix some outstanding bugs with disambiguator log, add tests
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
Modified:
trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java
2012-06-11 17:39:10 UTC (rev 7319)
+++ trunk/JLanguageTool/src/java/org/languagetool/AnalyzedTokenReadings.java
2012-06-12 09:41:31 UTC (rev 7320)
@@ -401,7 +401,7 @@
* @param historicalAnnotations the historicalAnnotations to set
*/
public void setHistoricalAnnotations(String historicalAnnotations) {
- this.historicalAnnotations = this.historicalAnnotations + "\n" +
historicalAnnotations;
+ this.historicalAnnotations = historicalAnnotations;
}
}
Modified:
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
2012-06-11 17:39:10 UTC (rev 7319)
+++
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/MultiWordChunker.java
2012-06-12 09:41:31 UTC (rev 7320)
@@ -139,8 +139,9 @@
anTokens[finalLen].getToken(), "</"
+ mFull.get(tokens.toString()) + ">",
tokens.toString());
oldReading = output[finalLen].toString();
+ final String prevAnot =
output[finalLen].getHistoricalAnnotations();
output[finalLen].addReading(tokenEnd);
- output[finalLen].setHistoricalAnnotations("MULTIWORD_CHUNKER"
+ output[finalLen].setHistoricalAnnotations(prevAnot +
"\nMULTIWORD_CHUNKER"
+ ": " + oldReading + " -> " + output[i].toString());
}
lenCounter++;
@@ -171,8 +172,9 @@
"</" + mFull.get(tokens.toString()) + ">",
tokens.toString());
oldReading = output[i + len - 1].toString();
+ final String prevAnot = output[i + len -
1].getHistoricalAnnotations();
output[i + len - 1].addReading(tokenEnd);
- output[i + len - 1].setHistoricalAnnotations("MULTIWORD_CHUNKER"
+ output[i + len - 1].setHistoricalAnnotations(prevAnot +
"\nMULTIWORD_CHUNKER"
+ ": " + oldReading + " -> " + output[i].toString());
}
}
Modified:
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
2012-06-11 17:39:10 UTC (rev 7319)
+++
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
2012-06-12 09:41:31 UTC (rev 7320)
@@ -205,15 +205,7 @@
correctedStPos += tokenPositions[l];
}
correctedStPos--;
- }
- int correctedEndPos = 0;
- if (endPositionCorrection < 0) {
- int l = 0;
- while (l > endPositionCorrection) {
- correctedEndPos -= tokenPositions[matchingTokens + l - 1];
- l--;
- }
- }
+ }
final int fromPos = text.getOriginalPosition(firstMatchToken +
correctedStPos);
final int numRead = whTokens[fromPos].getReadingsLength();
final boolean spaceBefore = whTokens[fromPos].isWhitespaceBefore();
@@ -232,8 +224,9 @@
+ i);
unifiedTokens[i].setStartPos(whTokens[position].getStartPos());
final String prevValue = whTokens[position].toString();
+ final String prevAnot =
whTokens[position].getHistoricalAnnotations();
whTokens[position] = unifiedTokens[i];
- annotateChange(whTokens[position], prevValue);
+ annotateChange(whTokens[position], prevValue, prevAnot);
}
}
}
@@ -246,8 +239,9 @@
final int position =
text.getOriginalPosition(firstMatchToken + correctedStPos
+ i);
final String prevValue = whTokens[position].toString();
+ final String prevAnot =
whTokens[position].getHistoricalAnnotations();
whTokens[position].removeReading(newTokenReadings[i]);
- annotateChange(whTokens[position], prevValue);
+ annotateChange(whTokens[position], prevValue, prevAnot);
}
}
}
@@ -274,8 +268,9 @@
final int position = text.getOriginalPosition(firstMatchToken +
correctedStPos
+ i);
final String prevValue = whTokens[position].toString();
+ final String prevAnot =
whTokens[position].getHistoricalAnnotations();
whTokens[position].addReading(newTok);
- annotateChange(whTokens[position], prevValue);
+ annotateChange(whTokens[position], prevValue, prevAnot);
}
}
}
@@ -291,8 +286,9 @@
false, false, Match.IncludeRange.NONE);
tmpMatchToken.setToken(whTokens[fromPos]);
final String prevValue = whTokens[fromPos].toString();
+ final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
whTokens[fromPos] = tmpMatchToken.filterReadings();
- annotateChange(whTokens[fromPos], prevValue);
+ annotateChange(whTokens[fromPos], prevValue, prevAnot);
filtered = true;
}
case REPLACE:
@@ -326,22 +322,25 @@
whTokens[fromPos].setParaEnd();
}
whTokens[fromPos].setWhitespaceBefore(spaceBefore);
- annotateChange(whTokens[fromPos], prevValue);
+ final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
+ annotateChange(whTokens[fromPos], prevValue, prevAnot);
} else {
// using the match element
matchElement.setToken(whTokens[fromPos]);
final String prevValue = whTokens[fromPos].toString();
+ final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
whTokens[fromPos] = matchElement.filterReadings();
whTokens[fromPos].setWhitespaceBefore(spaceBefore);
- annotateChange(whTokens[fromPos], prevValue);
+ annotateChange(whTokens[fromPos], prevValue, prevAnot);
}
}
}
return whTokens;
}
- private void annotateChange(AnalyzedTokenReadings atr, final String
prevValue) {
- atr.setHistoricalAnnotations(this.getId() + ": " + prevValue + " -> " +
atr.toString());
+ private void annotateChange(AnalyzedTokenReadings atr, final String
prevValue, String prevAnot) {
+ atr.setHistoricalAnnotations(prevAnot + "\n" +
+ this.getId() + ": " + prevValue + " -> " + atr.toString());
}
/**
Modified: trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
2012-06-11 17:39:10 UTC (rev 7319)
+++ trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
2012-06-12 09:41:31 UTC (rev 7320)
@@ -163,6 +163,20 @@
assertEquals(17, matches.get(0).getColumn());
matches = tool.check("To jest tekst. To jest linia w której nie ma
przecinka.");
assertEquals(24, matches.get(0).getColumn());
+
+ //and let's test other feats
+ AnalyzedSentence sent = tool.getAnalyzedSentence("Z powodu pogody dobre
buty są wskazane.");
+ assertEquals("Disambiguator log: "
+ + "\nMULTIWORD_CHUNKER: Z[z/prep:gen.inst] -> Z[z/prep:gen.inst,Z
powodu/<PREP:GEN>]"
+ + "\n\nMULTIWORD_CHUNKER: powodu[powód/subst:sg:gen:m3] ->
Z[z/prep:gen.inst,Z powodu/<PREP:GEN>]\n",
+ sent.getAnnotations());
+ sent = tool.getAnalyzedSentence("Nie mamy żadnej ryby.");
+ assertEquals("Disambiguator log: "
+ + "\n\nNIE_ADAMP:
Nie[nie/qub,on/ppron3:pl:acc:f.m2.m3.n.p2.p3:ter:praep,on/ppron3:sg:acc:n:ter:praep]
-> Nie[nie/qub]"
+ + "\n\nunify_adj_subst:
żadnej[żaden/adj:sg:dat:f:pos,żaden/adj:sg:gen:f:pos,żaden/adj:sg:loc:f:pos] ->
żadnej[żaden/adj:sg:gen:f:pos]" +
+ "\n\nunify_adj_subst:
ryby[ryba/subst:pl:acc:f,ryba/subst:pl:nom:f,ryba/subst:pl:voc:f,ryba/subst:sg:gen:f]
-> ryby[ryba/subst:sg:gen:f]\n",
+ sent.getAnnotations());
+
}
public void testSlovenian() throws IOException {
Modified: trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
2012-06-11 17:39:10 UTC (rev 7319)
+++ trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
2012-06-12 09:41:31 UTC (rev 7320)
@@ -26,77 +26,77 @@
public class LanguageTest {
- @Test
- public void testGetLanguageForShortName() {
- assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForShortName("en-US"));
- assertEquals(Language.GERMAN,
Language.getLanguageForShortName("de"));
- }
-
- @Test
- public void testGetShortNameWithVariant() {
- assertEquals("en-US",
Language.AMERICAN_ENGLISH.getShortNameWithVariant());
- assertEquals("de", Language.GERMAN.getShortNameWithVariant());
- }
+ @Test
+ public void testGetLanguageForShortName() {
+ assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForShortName("en-US"));
+ assertEquals(Language.GERMAN, Language.getLanguageForShortName("de"));
+ }
- @Test(expected=IllegalArgumentException.class)
- public void testInvalidShortName1() {
- Language.getLanguageForShortName("de-");
- }
+ @Test
+ public void testGetShortNameWithVariant() {
+ assertEquals("en-US",
Language.AMERICAN_ENGLISH.getShortNameWithVariant());
+ assertEquals("de", Language.GERMAN.getShortNameWithVariant());
+ }
- @Test(expected=IllegalArgumentException.class)
- public void testInvalidShortName2() {
- Language.getLanguageForShortName("dexx");
- }
+ @Test(expected=IllegalArgumentException.class)
+ public void testInvalidShortName1() {
+ Language.getLanguageForShortName("de-");
+ }
- @Test(expected=IllegalArgumentException.class)
- public void testInvalidShortName3() {
- Language.getLanguageForShortName("xyz-xx");
- }
+ @Test(expected=IllegalArgumentException.class)
+ public void testInvalidShortName2() {
+ Language.getLanguageForShortName("dexx");
+ }
- @Test
- public void testGetLanguageForName() {
- assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForName("American English"));
- assertEquals(Language.GERMAN,
Language.getLanguageForName("German"));
- }
+ @Test(expected=IllegalArgumentException.class)
+ public void testInvalidShortName3() {
+ Language.getLanguageForShortName("xyz-xx");
+ }
- @Test
- public void testIsVariant() {
-
assertTrue(Language.getLanguageForShortName("en-US").isVariant());
- assertTrue(Language.getLanguageForShortName("de-CH").isVariant());
+ @Test
+ public void testGetLanguageForName() {
+ assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForName("American English"));
+ assertEquals(Language.GERMAN, Language.getLanguageForName("German"));
+ }
- assertFalse(Language.getLanguageForShortName("en").isVariant());
- assertFalse(Language.getLanguageForShortName("de").isVariant());
- }
+ @Test
+ public void testIsVariant() {
+ assertTrue(Language.getLanguageForShortName("en-US").isVariant());
+ assertTrue(Language.getLanguageForShortName("de-CH").isVariant());
- @Test
- public void testHasVariant() {
- assertTrue(Language.getLanguageForShortName("en").hasVariant());
- assertTrue(Language.getLanguageForShortName("de").hasVariant());
+ assertFalse(Language.getLanguageForShortName("en").isVariant());
+ assertFalse(Language.getLanguageForShortName("de").isVariant());
+ }
- assertFalse(Language.getLanguageForShortName("en-US").hasVariant());
- assertFalse(Language.getLanguageForShortName("de-CH").hasVariant());
- assertFalse(Language.getLanguageForShortName("ast").hasVariant());
- assertFalse(Language.getLanguageForShortName("pl").hasVariant());
+ @Test
+ public void testHasVariant() {
+ assertTrue(Language.getLanguageForShortName("en").hasVariant());
+ assertTrue(Language.getLanguageForShortName("de").hasVariant());
- for (Language language : Language.LANGUAGES) {
- if (language.hasVariant()) {
- assertNotNull("Language " + language + " needs a default variant",
language.getDefaultVariant());
- }
+ assertFalse(Language.getLanguageForShortName("en-US").hasVariant());
+ assertFalse(Language.getLanguageForShortName("de-CH").hasVariant());
+ assertFalse(Language.getLanguageForShortName("ast").hasVariant());
+ assertFalse(Language.getLanguageForShortName("pl").hasVariant());
+
+ for (Language language : Language.LANGUAGES) {
+ if (language.hasVariant()) {
+ assertNotNull("Language " + language + " needs a default
variant", language.getDefaultVariant());
+ }
+ }
}
- }
- @Test
- public void testGetLanguageForLocale() {
- assertEquals(Language.GERMANY_GERMAN, Language.getLanguageForLocale(new
Locale("de", "DE")));
- assertEquals(Language.AUSTRIAN_GERMAN, Language.getLanguageForLocale(new
Locale("de", "AT")));
- assertEquals(Language.AMERICAN_ENGLISH, Language.getLanguageForLocale(new
Locale("en", "US")));
- assertEquals(Language.BRITISH_ENGLISH, Language.getLanguageForLocale(new
Locale("en", "GB")));
- // fallback to the language's default variant if not specified:
- assertEquals(Language.AMERICAN_ENGLISH, Language.getLanguageForLocale(new
Locale("en")));
- assertEquals(Language.GERMANY_GERMAN, Language.getLanguageForLocale(new
Locale("de")));
- // final fallback is everything else fails:
- assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForLocale(Locale.JAPANESE));
- assertEquals(Language.AMERICAN_ENGLISH, Language.getLanguageForLocale(new
Locale("zz")));
- }
+ @Test
+ public void testGetLanguageForLocale() {
+ assertEquals(Language.GERMANY_GERMAN,
Language.getLanguageForLocale(new Locale("de", "DE")));
+ assertEquals(Language.AUSTRIAN_GERMAN,
Language.getLanguageForLocale(new Locale("de", "AT")));
+ assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForLocale(new Locale("en", "US")));
+ assertEquals(Language.BRITISH_ENGLISH,
Language.getLanguageForLocale(new Locale("en", "GB")));
+ // fallback to the language's default variant if not specified:
+ assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForLocale(new Locale("en")));
+ assertEquals(Language.GERMANY_GERMAN,
Language.getLanguageForLocale(new Locale("de")));
+ // final fallback is everything else fails:
+ assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForLocale(Locale.JAPANESE));
+ assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForLocale(new Locale("zz")));
+ }
}
Modified:
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
2012-06-11 17:39:10 UTC (rev 7319)
+++
trunk/JLanguageTool/src/test/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
2012-06-12 09:41:31 UTC (rev 7320)
@@ -22,6 +22,10 @@
import java.io.IOException;
import junit.framework.TestCase;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.Language;
+import org.languagetool.JLanguageTool;
import org.languagetool.TestTools;
import org.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
import org.languagetool.tagging.fr.FrenchTagger;
@@ -34,13 +38,19 @@
private SentenceTokenizer sentenceTokenizer;
private FrenchRuleDisambiguator disambiguator;
private DemoDisambiguator disamb2;
+ private JLanguageTool lt;
public void setUp() {
tagger = new FrenchTagger();
tokenizer = new WordTokenizer();
sentenceTokenizer = new SentenceTokenizer();
disambiguator = new FrenchRuleDisambiguator();
- disamb2 = new DemoDisambiguator();
+ disamb2 = new DemoDisambiguator();
+ try {
+ lt = new JLanguageTool(Language.FRENCH);
+ } catch (IOException e) {
+ fail(e.getMessage());
+ }
}
public void testChunker() throws IOException {
@@ -75,6 +85,14 @@
"/[null]SENT_START Je/[je]R pers suj 1 s /[null]null suis/[suivre]V
imp pres 2 s|suis/[suivre]V ind pres 1 s|suis/[suivre]V ind pres 2
s|suis/[être]V etre ind pres 1 s /[null]null petite/[petit]J f
s|petite/[petit]N f s ./[null]null",
tokenizer, sentenceTokenizer, tagger, disamb2);
}
+
+ public void testAnnotations() throws IOException {
+ AnalyzedSentence sent = lt.getAnalyzedSentence("Les avions");
+ assertEquals(sent.getAnnotations(), "Disambiguator log: \n\n" +
+ "RP-D_N_AMBIG: Les[le/D e p,les/R pers obj 3 p] -> Les[le/D e
p]"+
+ "\nRB-LE_LA_LES: Les[le/D e p] -> Les[le/D e p]" +
+ "\n\nRP-D_N_AMBIG: avions[avoir/V avoir ind impa 1 p,avion/N m
p,avoir/SENT_END] -> avions[avion/N m p,avion/SENT_END]\n");
+ }
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs