Revision: 6229
http://languagetool.svn.sourceforge.net/languagetool/?rev=6229&view=rev
Author: archeus
Date: 2012-01-12 12:28:59 +0000 (Thu, 12 Jan 2012)
Log Message:
-----------
Extend manual synthesises: let the base synthesiser know about the possible
tags from manual synthesises
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/synthesis/BaseSynthesizer.java
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ManualSynthesizer.java
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ro/RomanianSynthesizer.java
Modified:
trunk/JLanguageTool/src/java/org/languagetool/synthesis/BaseSynthesizer.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/synthesis/BaseSynthesizer.java
2012-01-12 11:55:43 UTC (rev 6228)
+++
trunk/JLanguageTool/src/java/org/languagetool/synthesis/BaseSynthesizer.java
2012-01-12 12:28:59 UTC (rev 6229)
@@ -19,7 +19,7 @@
protected IStemmer synthesizer;
- private ArrayList<String> possibleTags;
+ protected ArrayList<String> possibleTags;
private final String tagFileName;
private final String resourceFileName;
@@ -63,10 +63,8 @@
public String[] synthesize(final AnalyzedToken token, final String posTag,
final boolean posTagRegExp) throws IOException {
if (posTagRegExp) {
- if (possibleTags == null) {
- possibleTags =
SynthesizerTools.loadWords(Tools.getStream(tagFileName));
- }
initSynthesizer();
+ initPossibleTags();
final Pattern p = Pattern.compile(posTag);
final ArrayList<String> results = new ArrayList<String>();
for (final String tag : possibleTags) {
@@ -80,6 +78,12 @@
return synthesize(token, posTag);
}
+ protected void initPossibleTags() throws IOException {
+ if (possibleTags == null) {
+ possibleTags = SynthesizerTools.loadWords(Tools.getStream(tagFileName));
+ }
+ }
+
protected void initSynthesizer() throws IOException {
if (synthesizer == null) {
final URL url = this.getClass().getResource(resourceFileName);
Modified:
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ManualSynthesizer.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ManualSynthesizer.java
2012-01-12 11:55:43 UTC (rev 6228)
+++
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ManualSynthesizer.java
2012-01-12 12:28:59 UTC (rev 6229)
@@ -23,9 +23,12 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.languagetool.tagging.ManualTagger;
import org.languagetool.tools.StringTools;
@@ -43,14 +46,23 @@
*/
public class ManualSynthesizer {
- /** a map with the key composed by the lema and POS (separated by "|"). The
values are lists of inflected forms. */
+ /** a map with the key composed by the lemma and POS (separated by "|"). The
values are lists of inflected forms. */
private final Map<String, List<String>> mapping;
+ private Set<String> possibleTags = new HashSet<String>();
public ManualSynthesizer(final InputStream file) throws IOException {
mapping = loadMapping(file, "utf8");
+ possibleTags = Collections.unmodifiableSet(possibleTags); // lock
}
/**
+ * Retrieve all the possible POS values.
+ */
+ public Set<String> getPossibleTags() {
+ return possibleTags;
+ }
+
+ /**
* Look up a word's inflected form as specified by the lemma and POS tag.
*
* @param lemma the lemma to inflect.
@@ -84,6 +96,7 @@
map.put(key, new ArrayList<String>());
}
map.get(key).add(parts[0]);
+ possibleTags.add(parts[2]); // POS
}
} finally {
if (br != null) {
Modified:
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ro/RomanianSynthesizer.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ro/RomanianSynthesizer.java
2012-01-12 11:55:43 UTC (rev 6228)
+++
trunk/JLanguageTool/src/java/org/languagetool/synthesis/ro/RomanianSynthesizer.java
2012-01-12 12:28:59 UTC (rev 6229)
@@ -61,4 +61,14 @@
manualSynthesizer = new
ManualSynthesizer(JLanguageTool.getDataBroker().getFromResourceDirAsStream(USER_DICT_FILENAME));
}
}
+ @Override
+ protected void initPossibleTags() throws IOException {
+ super.initPossibleTags();
+ // add any possible tag from manual synthesiser
+ for (String tag : manualSynthesizer.getPossibleTags()) {
+ if (!possibleTags.contains(tag)) {
+ possibleTags.add(tag);
+ }
+ }
+ }
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
RSA(R) Conference 2012
Mar 27 - Feb 2
Save $400 by Jan. 27
Register now!
http://p.sf.net/sfu/rsa-sfdev2dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs