Revision: 6263
http://languagetool.svn.sourceforge.net/languagetool/?rev=6263&view=rev
Author: archeus
Date: 2012-01-16 15:18:46 +0000 (Mon, 16 Jan 2012)
Log Message:
-----------
MatchTest: refactoring + adding some tests for Match.IncludeRange
Modified Paths:
--------------
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
Added Paths:
-----------
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
2012-01-16 12:48:59 UTC (rev 6262)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
2012-01-16 15:18:46 UTC (rev 6263)
@@ -21,21 +21,22 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
import junit.framework.TestCase;
-import morfologik.stemming.IStemmer;
-import morfologik.stemming.WordData;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.JLanguageTool;
+import org.languagetool.language.Demo;
import org.languagetool.rules.patterns.Match.CaseConversion;
import org.languagetool.rules.patterns.Match.IncludeRange;
-import org.languagetool.synthesis.BaseSynthesizer;
import org.languagetool.synthesis.ManualSynthesizer;
+import org.languagetool.synthesis.ManualSynthesizerAdapter;
import org.languagetool.synthesis.Synthesizer;
+import org.languagetool.tagging.ManualTagger;
+import org.languagetool.tagging.Tagger;
+import org.languagetool.tokenizers.ManualTaggerAdapter;
/**
* Test class for {@link Match}.
@@ -43,65 +44,65 @@
*/
public class MatchTest extends TestCase {
- /**
- * Adapter from {@link ManualSynthesizer} to {@link Synthesizer}.
<br/>
- * Note: This could be extracted as a standalone class.
- */
- public static class ManualSynthesizerAdapter extends BaseSynthesizer
implements Synthesizer {
- private ManualSynthesizer manualSynthesizer;
- public ManualSynthesizerAdapter(ManualSynthesizer
manualSynthesizer) {
- super(null, null); // no file
- this.manualSynthesizer = manualSynthesizer;
- }
- @Override
- protected void initSynthesizer() throws IOException {
- synthesizer = new IStemmer() { // null synthesiser
- @Override
- public List<WordData> lookup(CharSequence word)
{
- return new ArrayList<WordData>();
- }
- };
- }
- @Override
- protected void initPossibleTags() throws IOException {
- if (possibleTags == null) {
- possibleTags = new
ArrayList<String>(manualSynthesizer.getPossibleTags());
- }
- }
- @Override
- protected void lookup(String lemma, String posTag, List<String>
results) {
- super.lookup(lemma, posTag, results);
- // add words that are missing from the
romanian_synth.dict file
- final List<String> manualForms =
manualSynthesizer.lookup(lemma.toLowerCase(), posTag);
- if (manualForms != null) {
- results.addAll(manualForms);
- }
- }
- }
+ final static String TEST_DATA =
+ "# some test data\n" +
+ "inflectedform11\tlemma1\tPOS1\n" +
+ "inflectedform121\tlemma1\tPOS2\n" +
+ "inflectedform122\tlemma1\tPOS2\n" +
+ "inflectedform123\tlemma1\tPOS3\n" +
+ "inflectedform2\tlemma2\tPOS1\n"
+ ;
+
+ protected JLanguageTool languageTool;
+ protected Synthesizer synthesizer;
+ protected Tagger tagger;
+
//-- helper methods
- private Synthesizer getTestSynthesizer() throws
UnsupportedEncodingException, IOException {
- final String data =
- "# some test data\n" +
- "inflectedform11\tlemma1\tPOS1\n" +
- "inflectedform121\tlemma1\tPOS2\n" +
- "inflectedform122\tlemma1\tPOS2\n" +
- "inflectedform2\tlemma2\tPOS1\n"
- ;
- return new ManualSynthesizerAdapter(new ManualSynthesizer(new
ByteArrayInputStream(data.getBytes("UTF-8"))));
+ private AnalyzedTokenReadings[] getAnalyzedTokenReadings(final String
input) throws IOException {
+ return
languageTool.getAnalyzedSentence(input).getTokensWithoutWhitespace();
}
-
+
private AnalyzedTokenReadings getAnalyzedTokenReadings(String token,
String posTag, String lemma) {
return new AnalyzedTokenReadings(new AnalyzedToken(token,
posTag, lemma), 0);
}
private Match getMatch(String posTag, String posTagReplace,
CaseConversion caseConversion) throws UnsupportedEncodingException, IOException
{
Match match = new Match(posTag, posTagReplace, true, null,
null, caseConversion, false, IncludeRange.NONE);
- match.setSynthesizer(getTestSynthesizer());
+ match.setSynthesizer(synthesizer);
return match;
}
+ private Match getMatch(String posTag, String posTagReplace,
IncludeRange includeRange) throws UnsupportedEncodingException, IOException {
+ Match match = new Match(posTag, posTagReplace, true, null,
null, CaseConversion.NONE, false, includeRange);
+ match.setSynthesizer(synthesizer);
+ return match;
+ }
+
+ //-- setup
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ tagger = new ManualTaggerAdapter(new ManualTagger(new
ByteArrayInputStream(TEST_DATA.getBytes("UTF-8"))));
+ synthesizer = new ManualSynthesizerAdapter(new
ManualSynthesizer(new ByteArrayInputStream(TEST_DATA.getBytes("UTF-8"))));
+ languageTool = new JLanguageTool(new Demo() {
+ @Override
+ public String getName() {
+ return "TEST";
+ }
+ @Override
+ public Synthesizer getSynthesizer() {
+ return MatchTest.this.synthesizer;
+ }
+ @Override
+ public Tagger getTagger() {
+ return MatchTest.this.tagger;
+ }
+ });
+ }
+
//-- test methods
//-- CASE CONVERSION
@@ -145,7 +146,7 @@
public void testPreserveAllUpper() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11",
"POS1", "Lemma1"));
- assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString( match.toFinalString()));
+ assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString(match.toFinalString()));
}
public void testPreserveMixed() throws Exception {
@@ -173,4 +174,34 @@
assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
}
+ //-- INCLUDE RANGE
+
+ public void testSimpleIncludeFollowing() throws Exception {
+ Match match = getMatch(null, null,
Match.IncludeRange.FOLLOWING);
+ match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+ assertEquals("[inflectedform2 inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testPOSIncludeFollowing() throws Exception {
+ // POS is ignored when using IncludeRange.Following
+ Match match = getMatch("POS2", "POS33",
Match.IncludeRange.FOLLOWING);
+ match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+ assertEquals("[inflectedform2 inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testIncludeAll() throws Exception {
+ Match match = getMatch(null, null, Match.IncludeRange.ALL);
+ match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+ assertEquals("[inflectedform11inflectedform2
inflectedform122]", Arrays.toString(match.toFinalString()));
+ // the first two tokens come together, it a known issue
+ }
+
+ public void testPOSIncludeAll() throws Exception {
+ Match match = getMatch("POS1", "POS3", Match.IncludeRange.ALL);
+ match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
+ assertEquals("[inflectedform123inflectedform2
inflectedform122]", Arrays.toString(match.toFinalString()));
+ // Note that in this case the first token has the requested POS
(POS3 replaces POS1)
+ // the first two tokens come together, it a known issue.
+ }
+
}
Added:
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
(rev 0)
+++
trunk/JLanguageTool/src/test/org/languagetool/synthesis/ManualSynthesizerAdapter.java
2012-01-16 15:18:46 UTC (rev 6263)
@@ -0,0 +1,49 @@
+package org.languagetool.synthesis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+
+/**
+ * Adapter from {@link ManualSynthesizer} to {@link Synthesizer}. <br/>
+ * Note: It resides in "test" package because for now it is only used on unit
testing.
+ */
+public class ManualSynthesizerAdapter extends BaseSynthesizer implements
Synthesizer {
+
+ private ManualSynthesizer manualSynthesizer;
+
+ public ManualSynthesizerAdapter(ManualSynthesizer manualSynthesizer) {
+ super(null, null); // no file
+ this.manualSynthesizer = manualSynthesizer;
+ }
+
+ @Override
+ protected void initSynthesizer() throws IOException {
+ synthesizer = new IStemmer() { // null synthesiser
+ @Override
+ public List<WordData> lookup(CharSequence word) {
+ return new ArrayList<WordData>();
+ }
+ };
+ }
+
+ @Override
+ protected void initPossibleTags() throws IOException {
+ if (possibleTags == null) {
+ possibleTags = new
ArrayList<String>(manualSynthesizer.getPossibleTags());
+ }
+ }
+
+ @Override
+ protected void lookup(String lemma, String posTag, List<String>
results) {
+ super.lookup(lemma, posTag, results);
+ final List<String> manualForms =
manualSynthesizer.lookup(lemma.toLowerCase(), posTag);
+ if (manualForms != null) {
+ results.addAll(manualForms);
+ }
+ }
+
+}
Added:
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
(rev 0)
+++
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/ManualTaggerAdapter.java
2012-01-16 15:18:46 UTC (rev 6263)
@@ -0,0 +1,61 @@
+package org.languagetool.tokenizers;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.tagging.ManualTagger;
+import org.languagetool.tagging.Tagger;
+
+/**
+ * Adapter from {@link ManualTagger} to {@link Tagger}. <br/>
+ * Note: It resides in "test" package because for now it is only used on unit
+ * testing.
+ */
+public class ManualTaggerAdapter implements Tagger {
+
+ private ManualTagger manualTagger;
+
+ public ManualTaggerAdapter(ManualTagger manualTagger) {
+ this.manualTagger = manualTagger;
+ }
+
+ @Override
+ public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens)
+ throws IOException {
+ final List<AnalyzedTokenReadings> tokenReadings = new
ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ for (final String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new
ArrayList<AnalyzedToken>();
+ final String[] manualTags =
manualTagger.lookup(word.toLowerCase());
+ if (manualTags != null) {
+ for (int i = 0; i < manualTags.length / 2; i =
i + 2) {
+ l.add(new AnalyzedToken(word,
manualTags[i + 1],
+ manualTags[i]));
+ }
+ }
+ if (l.isEmpty()) {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ tokenReadings.add(new AnalyzedTokenReadings(l
+ .toArray(new AnalyzedToken[l.size()]),
pos));
+ pos += word.length();
+ }
+
+ return tokenReadings;
+ }
+
+ @Override
+ public AnalyzedTokenReadings createNullToken(String token, int
startPos) {
+ return new AnalyzedTokenReadings(new AnalyzedToken(token, null,
null),
+ startPos);
+ }
+
+ @Override
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedToken(token, posTag, null);
+ }
+
+}
\ No newline at end of file
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
RSA(R) Conference 2012
Mar 27 - Feb 2
Save $400 by Jan. 27
Register now!
http://p.sf.net/sfu/rsa-sfdev2dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs