languagetool

archeus Mon, 16 Jan 2012 07:19:01 -0800

Revision: 6263
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6263&view=rev
Author:   archeus
Date:     2012-01-16 15:18:46 +0000 (Mon, 16 Jan 2012)
Log Message:
-----------
MatchTest: refactoring + adding some tests for Match.IncludeRange


Modified Paths:
--------------
    trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java

Added Paths:
-----------
    
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
    
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 
2012-01-16 12:48:59 UTC (rev 6262)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 
2012-01-16 15:18:46 UTC (rev 6263)
@@ -21,21 +21,22 @@
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.List;
 
 import junit.framework.TestCase;
-import morfologik.stemming.IStemmer;
-import morfologik.stemming.WordData;
 
 import org.languagetool.AnalyzedToken;
 import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.JLanguageTool;
+import org.languagetool.language.Demo;
 import org.languagetool.rules.patterns.Match.CaseConversion;
 import org.languagetool.rules.patterns.Match.IncludeRange;
-import org.languagetool.synthesis.BaseSynthesizer;
 import org.languagetool.synthesis.ManualSynthesizer;
+import org.languagetool.synthesis.ManualSynthesizerAdapter;
 import org.languagetool.synthesis.Synthesizer;
+import org.languagetool.tagging.ManualTagger;
+import org.languagetool.tagging.Tagger;
+import org.languagetool.tokenizers.ManualTaggerAdapter;
 
 /**
  * Test class for {@link Match}.
@@ -43,65 +44,65 @@
  */
 public class MatchTest extends TestCase {
 
-       /**
-        *  Adapter from {@link ManualSynthesizer} to {@link Synthesizer}. 
<br/> 
-        *  Note: This could be extracted as a standalone class.
-        */
-       public static class ManualSynthesizerAdapter extends BaseSynthesizer 
implements Synthesizer  {
-               private ManualSynthesizer manualSynthesizer;
-               public ManualSynthesizerAdapter(ManualSynthesizer 
manualSynthesizer) {
-                       super(null, null); // no file
-                       this.manualSynthesizer = manualSynthesizer;
-               }
-               @Override
-               protected void initSynthesizer() throws IOException {
-                       synthesizer = new IStemmer() { // null synthesiser 
-                               @Override
-                               public List<WordData> lookup(CharSequence word) 
{
-                                       return new ArrayList<WordData>();
-                               }
-                       };
-               }
-               @Override
-               protected void initPossibleTags() throws IOException {
-                       if (possibleTags == null) {
-                               possibleTags = new 
ArrayList<String>(manualSynthesizer.getPossibleTags());
-                       }
-               }
-               @Override
-               protected void lookup(String lemma, String posTag, List<String> 
results) {
-                       super.lookup(lemma, posTag, results);
-                       // add words that are missing from the 
romanian_synth.dict file
-                       final List<String> manualForms = 
manualSynthesizer.lookup(lemma.toLowerCase(), posTag);
-                       if (manualForms != null) {
-                               results.addAll(manualForms); 
-                       }
-               }
-       }
+       final static String TEST_DATA = 
+                       "# some test data\n" +
+                                       "inflectedform11\tlemma1\tPOS1\n" +
+                                       "inflectedform121\tlemma1\tPOS2\n" +
+                                       "inflectedform122\tlemma1\tPOS2\n" +
+                                       "inflectedform123\tlemma1\tPOS3\n" +
+                                       "inflectedform2\tlemma2\tPOS1\n"
+                                       ;
+
+       protected JLanguageTool languageTool;
+       protected Synthesizer synthesizer;
+       protected Tagger tagger;
        
+       
        //-- helper methods
        
-       private Synthesizer getTestSynthesizer() throws 
UnsupportedEncodingException, IOException {
-                final String data = 
-                             "# some test data\n" +
-                             "inflectedform11\tlemma1\tPOS1\n" +
-                             "inflectedform121\tlemma1\tPOS2\n" +
-                             "inflectedform122\tlemma1\tPOS2\n" +
-                             "inflectedform2\tlemma2\tPOS1\n"
-                             ;
-               return new ManualSynthesizerAdapter(new ManualSynthesizer(new 
ByteArrayInputStream(data.getBytes("UTF-8"))));
+       private AnalyzedTokenReadings[] getAnalyzedTokenReadings(final String 
input) throws IOException {
+          return 
languageTool.getAnalyzedSentence(input).getTokensWithoutWhitespace();
        }
-
+       
        private AnalyzedTokenReadings getAnalyzedTokenReadings(String token, 
String posTag, String lemma) {
                return new AnalyzedTokenReadings(new AnalyzedToken(token, 
posTag, lemma), 0);
        }
 
        private Match getMatch(String posTag, String posTagReplace, 
CaseConversion caseConversion) throws UnsupportedEncodingException, IOException 
{
                Match match = new Match(posTag, posTagReplace, true, null, 
null, caseConversion, false, IncludeRange.NONE);
-               match.setSynthesizer(getTestSynthesizer());
+               match.setSynthesizer(synthesizer);
                return match;
        }
 
+       private Match getMatch(String posTag, String posTagReplace, 
IncludeRange includeRange) throws UnsupportedEncodingException, IOException {
+               Match match = new Match(posTag, posTagReplace, true, null, 
null, CaseConversion.NONE, false, includeRange);
+               match.setSynthesizer(synthesizer);
+               return match;
+       }
+
+       //-- setup
+
+       @Override
+       protected void setUp() throws Exception {
+               super.setUp();
+               tagger = new ManualTaggerAdapter(new ManualTagger(new 
ByteArrayInputStream(TEST_DATA.getBytes("UTF-8"))));
+               synthesizer = new ManualSynthesizerAdapter(new 
ManualSynthesizer(new ByteArrayInputStream(TEST_DATA.getBytes("UTF-8"))));
+               languageTool = new JLanguageTool(new Demo() {
+                       @Override
+                       public String getName() {
+                               return "TEST";
+                       }
+                       @Override
+                       public Synthesizer getSynthesizer() {
+                               return MatchTest.this.synthesizer;
+                       }
+                       @Override
+                       public Tagger getTagger() {
+                               return MatchTest.this.tagger;
+                       }
+               });
+       }
+       
        //-- test methods
        
                //-- CASE CONVERSION
@@ -145,7 +146,7 @@
        public void testPreserveAllUpper() throws Exception {
                Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
                match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", 
"POS1", "Lemma1"));
-               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString( match.toFinalString()));
+               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString(match.toFinalString()));
        }
        
        public void testPreserveMixed() throws Exception {
@@ -173,4 +174,34 @@
                assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
        }
 
+               //-- INCLUDE RANGE 
+       
+       public void testSimpleIncludeFollowing() throws Exception {
+               Match match = getMatch(null, null, 
Match.IncludeRange.FOLLOWING);
+               match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+               assertEquals("[inflectedform2 inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+
+       public void testPOSIncludeFollowing() throws Exception {
+               // POS is ignored when using IncludeRange.Following
+               Match match = getMatch("POS2", "POS33", 
Match.IncludeRange.FOLLOWING); 
+               match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+               assertEquals("[inflectedform2 inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+       
+       public void testIncludeAll() throws Exception {
+               Match match = getMatch(null, null, Match.IncludeRange.ALL);
+               match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+               assertEquals("[inflectedform11inflectedform2 
inflectedform122]", Arrays.toString(match.toFinalString()));
+               // the first two tokens come together, it a known issue
+       }
+
+       public void testPOSIncludeAll() throws Exception {
+               Match match = getMatch("POS1", "POS3", Match.IncludeRange.ALL); 
+               match.setToken(getAnalyzedTokenReadings("inflectedform11 
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+               assertEquals("[inflectedform123inflectedform2 
inflectedform122]", Arrays.toString(match.toFinalString()));
+               // Note that in this case the first token has the requested POS 
(POS3 replaces POS1)
+               // the first two tokens come together, it a known issue. 
+       }
+       
 }

Added: 
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
                               (rev 0)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
       2012-01-16 15:18:46 UTC (rev 6263)
@@ -0,0 +1,49 @@
+package org.languagetool.synthesis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+
+/**
+ *  Adapter from {@link ManualSynthesizer} to {@link Synthesizer}. <br/> 
+ *  Note: It resides in "test" package because for now it is only used on unit 
testing.
+ */
+public class ManualSynthesizerAdapter extends BaseSynthesizer implements 
Synthesizer  {
+       
+       private ManualSynthesizer manualSynthesizer;
+       
+       public ManualSynthesizerAdapter(ManualSynthesizer manualSynthesizer) {
+               super(null, null); // no file
+               this.manualSynthesizer = manualSynthesizer;
+       }
+       
+       @Override
+       protected void initSynthesizer() throws IOException {
+               synthesizer = new IStemmer() { // null synthesiser 
+                       @Override
+                       public List<WordData> lookup(CharSequence word) {
+                               return new ArrayList<WordData>();
+                       }
+               };
+       }
+       
+       @Override
+       protected void initPossibleTags() throws IOException {
+               if (possibleTags == null) {
+                       possibleTags = new 
ArrayList<String>(manualSynthesizer.getPossibleTags());
+               }
+       }
+       
+       @Override
+       protected void lookup(String lemma, String posTag, List<String> 
results) {
+               super.lookup(lemma, posTag, results);
+               final List<String> manualForms = 
manualSynthesizer.lookup(lemma.toLowerCase(), posTag);
+               if (manualForms != null) {
+                       results.addAll(manualForms); 
+               }
+       }
+
+}

Added: 
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
                           (rev 0)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
   2012-01-16 15:18:46 UTC (rev 6263)
@@ -0,0 +1,61 @@
+package org.languagetool.tokenizers;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.tagging.ManualTagger;
+import org.languagetool.tagging.Tagger;
+
+/**
+ * Adapter from {@link ManualTagger} to {@link Tagger}. <br/>
+ * Note: It resides in "test" package because for now it is only used on unit
+ * testing.
+ */
+public class ManualTaggerAdapter implements Tagger {
+
+       private ManualTagger manualTagger;
+
+       public ManualTaggerAdapter(ManualTagger manualTagger) {
+               this.manualTagger = manualTagger;
+       }
+
+       @Override
+       public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens)
+                       throws IOException {
+               final List<AnalyzedTokenReadings> tokenReadings = new 
ArrayList<AnalyzedTokenReadings>();
+               int pos = 0;
+               for (final String word : sentenceTokens) {
+                       final List<AnalyzedToken> l = new 
ArrayList<AnalyzedToken>();
+                       final String[] manualTags = 
manualTagger.lookup(word.toLowerCase());
+                       if (manualTags != null) {
+                               for (int i = 0; i < manualTags.length / 2; i = 
i + 2) {
+                                       l.add(new AnalyzedToken(word, 
manualTags[i + 1],
+                                                       manualTags[i]));
+                               }
+                       }
+                       if (l.isEmpty()) {
+                               l.add(new AnalyzedToken(word, null, null));
+                       }
+                       tokenReadings.add(new AnalyzedTokenReadings(l
+                                       .toArray(new AnalyzedToken[l.size()]), 
pos));
+                       pos += word.length();
+               }
+
+               return tokenReadings;
+       }
+
+       @Override
+       public AnalyzedTokenReadings createNullToken(String token, int 
startPos) {
+               return new AnalyzedTokenReadings(new AnalyzedToken(token, null, 
null),
+                               startPos);
+       }
+
+       @Override
+       public AnalyzedToken createToken(String token, String posTag) {
+               return new AnalyzedToken(token, posTag, null);
+       }
+
+}
\ No newline at end of file

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
RSA(R) Conference 2012
Mar 27 - Feb 2
Save $400 by Jan. 27
Register now!
http://p.sf.net/sfu/rsa-sfdev2dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[6263] trunk/JLanguageTool/src/test/org/ languagetool

Reply via email to