Revision: 7206
http://languagetool.svn.sourceforge.net/languagetool/?rev=7206&view=rev
Author: milek_pl
Date: 2012-06-03 12:21:08 +0000 (Sun, 03 Jun 2012)
Log Message:
-----------
new feature: suppress misspelled suggestions
Modified Paths:
--------------
trunk/JLanguageTool/CHANGES.txt
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
trunk/JLanguageTool/src/rules/en/grammar.xml
trunk/JLanguageTool/src/rules/pattern.xsd
trunk/JLanguageTool/src/rules/rules.xsd
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt 2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/CHANGES.txt 2012-06-03 12:21:08 UTC (rev 7206)
@@ -78,7 +78,14 @@
-The XML format for rules has been changed to use <marker>...</marker> tags
instead
of mark_from and mark_to attributes
-
+
+ -Now it is possible to suppress misspelled suggestions altogether in XML
rules by applying
+ an attribute suppress_misspelled="yes" on the <suggestion> element, AND on
the <match>
+ element. If only <match> element has this attribute set to "yes", then the
suggestion is
+ displayed, but no content of <match> is contained within (this might be a
conditional part
+ of the suggestion). Note: for this to work, the tagger dictionary needs to
be fairly complete;
+ words without lemmas and POS tags are considered to be misspelled.
+
-GUI: made the result of "Tag Text" more readable
-Improved startup speed (Jarek Lipski)
@@ -88,9 +95,10 @@
-In the profiling rules' mode on the command-line, you can now enable
and disable rules.
- -Some internal bug fixing in disambiguation and pattern rules.
+ -Some internal bug fixing in disambiguation and pattern rules.
+
+
-
1.7 (2012-03-25)
-English:
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -28,6 +28,7 @@
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.tools.StringTools;
@@ -80,6 +81,7 @@
private final String posTag;
private boolean postagRegexp;
+ private final boolean suppressMisspelled;
private final String regexReplace;
private final String posTagReplace;
private final CaseConversion caseConversionType;
@@ -121,6 +123,7 @@
final boolean postagRegexp, final String regexMatch,
final String regexReplace, final CaseConversion caseConversionType,
final boolean setPOS,
+ final boolean suppressMisspelled,
final IncludeRange includeSkipped) {
this.posTag = posTag;
this.postagRegexp = postagRegexp;
@@ -137,6 +140,7 @@
this.posTagReplace = posTagReplace;
this.setPos = setPOS;
this.includeSkipped = includeSkipped;
+ this.suppressMisspelled = suppressMisspelled;
}
/**
@@ -234,14 +238,24 @@
synthesizer = synth;
}
+
/**
+ * Used to tell whether the Match class will spell-check the result.
+ * @return True if this is so.
+ */
+ public final boolean checksSpelling() {
+ return suppressMisspelled;
+ }
+
+ /**
* Gets all strings formatted using the match element.
+ * @param lang TODO
*
* @return array of strings
* @throws IOException
* in case of synthesizer-related disk problems.
*/
- public final String[] toFinalString() throws IOException {
+ public final String[] toFinalString(Language lang) throws IOException {
String[] formattedString = new String[1];
if (formattedToken != null) {
final int readingCount = formattedToken.getReadingsLength();
@@ -287,8 +301,12 @@
}
}
}
- if (wordForms.isEmpty()) {
- formattedString[0] = "(" + formattedToken.getToken() + ")";
+ if (wordForms.isEmpty()) {
+ if (this.suppressMisspelled) {
+ formattedString[0] = "";
+ } else {
+ formattedString[0] = "(" + formattedToken.getToken() + ")";
+ }
} else {
formattedString = wordForms.toArray(new String[wordForms.size()]);
}
@@ -324,8 +342,24 @@
}
helper[i] = formattedString[i] + skippedTokens;
}
- formattedString = helper;
+
+ formattedString = helper;
+
}
+ if (this.suppressMisspelled && lang != null) {
+ List<String> formattedStringElements = new
ArrayList<String>(formattedString.length);
+ for (final String str : formattedString) {
+ formattedStringElements.add(str);
+ }
+ //tagger-based speller
+ List<AnalyzedTokenReadings> analyzed =
lang.getTagger().tag(formattedStringElements);
+ for (int i = 0; i < formattedString.length; i++) {
+ if (analyzed.get(i).getAnalyzedToken(0).getLemma() == null
+ && analyzed.get(i).getAnalyzedToken(0).hasNoTag()) {
+ formattedString[i] = "";
+ }
+ }
+ }
return formattedString;
}
@@ -393,7 +427,7 @@
*/
public final String toTokenString() throws IOException {
final StringBuilder output = new StringBuilder();
- final String[] stringToFormat = toFinalString();
+ final String[] stringToFormat = toFinalString(null);
for (int i = 0; i < stringToFormat.length; i++) {
output.append(stringToFormat[i]);
if (i + 1 < stringToFormat.length) {
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
2012-06-03 11:35:42 UTC (rev 7205)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -141,7 +141,10 @@
defaultOn = "on".equals(attrs.getValue(DEFAULT));
inRuleGroup = true;
subId = 0;
- } else if ("suggestion".equals(qName) && inMessage) {
+ } else if ("suggestion".equals(qName) && inMessage) {
+ if (YES.equals(attrs.getValue("suppress_misspelled"))) {
+ message.append("<pleasespellme/>");
+ }
message.append("<suggestion>");
inSuggestion = true;
} else if (MATCH.equals(qName)) {
@@ -250,7 +253,7 @@
} else if (RULEGROUP.equals(qName)) {
inRuleGroup = false;
} else if ("suggestion".equals(qName) && inMessage) {
- message.append("</suggestion>");
+ message.append("</suggestion>");
inSuggestion = false;
} else if (MARKER.equals(qName) && inCorrectExample) {
correctExample.append("</marker>");
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
2012-06-03 11:35:42 UTC (rev 7205)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -155,8 +155,16 @@
+ tokens[lastMatchToken + correctedEndPos].getToken().length();
if (fromPos < toPos) { // this can happen with some skip="-1" when the
last
// token is not matched
+
+ //now do some spell-checking:
+ if (!(errMessage.contains("<pleasespellme/>") &&
+ errMessage.contains("<mistake/>"))) {
+ //remove stupid markers
+ errMessage.replace("<pleasespellme/>", "");
+ errMessage.replace("<mistake/>","");
return new RuleMatch(rule, fromPos, toPos,
errMessage, rule.getShortMessage(), startsWithUppercase);
+ }
} // failed to create any rule match...
return null;
}
@@ -360,7 +368,14 @@
final int skippedTokens = nextTokenPos - tokenIndex;
suggestionMatches.get(start).setToken(tokens, tokenIndex - 1,
skippedTokens);
suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
- finalMatch = suggestionMatches.get(start).toFinalString();
+ finalMatch = suggestionMatches.get(start).toFinalString(language);
+ if (suggestionMatches.get(start).checksSpelling()
+ && finalMatch.length == 1
+ && "".equals(finalMatch[0])) {
+ finalMatch = new String[1];
+ finalMatch[0] = "<mistake/>";
+ }
+
} else {
final List<String[]> matchList = new ArrayList<String[]>();
for (int i = 0; i < len; i++) {
@@ -368,7 +383,7 @@
suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i,
skippedTokens);
suggestionMatches.get(start)
.setSynthesizer(language.getSynthesizer());
- matchList.add(suggestionMatches.get(start).toFinalString());
+ matchList.add(suggestionMatches.get(start).toFinalString(language));
}
return combineLists(matchList.toArray(new String[matchList.size()][]),
new String[matchList.size()], 0, language);
@@ -407,25 +422,26 @@
* @return Combined array of @String.
*/
private static String[] combineLists(final String[][] input,
- final String[] output, final int r, final Language lang) {
- final List<String> outputList = new ArrayList<String>();
- if (r == input.length) {
- final StringBuilder sb = new StringBuilder();
- for (int k = 0; k < output.length; k++) {
- sb.append(output[k]);
- if (k < output.length - 1) {
- sb.append(StringTools.addSpace(output[k + 1], lang));
- }
+ final String[] output, final int r, final Language lang) {
+ final List<String> outputList = new ArrayList<String>();
+ if (r == input.length) {
+ final StringBuilder sb = new StringBuilder();
+ for (int k = 0; k < output.length; k++) {
+ sb.append(output[k]);
+ if (k < output.length - 1) {
+ sb.append(StringTools.addSpace(output[k + 1], lang));
+ }
+
+ }
+ outputList.add(sb.toString());
+ } else {
+ for (int c = 0; c < input[r].length; c++) {
+ output[r] = input[r][c];
+ final String[] sList = combineLists(input, output, r + 1, lang);
+ outputList.addAll(Arrays.asList(sList));
+ }
}
- outputList.add(sb.toString());
- } else {
- for (int c = 0; c < input[r].length; c++) {
- output[r] = input[r][c];
- final String[] sList = combineLists(input, output, r + 1, lang);
- outputList.addAll(Arrays.asList(sList));
- }
- }
- return outputList.toArray(new String[outputList.size()]);
+ return outputList.toArray(new String[outputList.size()]);
}
}
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
2012-06-03 11:35:42 UTC (rev 7205)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -325,6 +325,7 @@
.equals(attrs.getValue(POSTAG_REGEXP)), attrs
.getValue("regexp_match"), attrs.getValue("regexp_replace"),
caseConversion, YES.equals(attrs.getValue("setpos")),
+ YES.equals(attrs.getValue("suppress_misspelled")),
includeRange);
mWorker.setInMessageOnly(!inSuggestion);
if (inMessage) {
@@ -460,7 +461,7 @@
if (Character.isDigit(messageStr.charAt(pos + 1))) {
if (pos == 1 || messageStr.charAt(pos - 1) != '\u0001') {
final Match mWorker = new Match(null, null, false, null,
- null, Match.CaseConversion.NONE, false,
Match.IncludeRange.NONE);
+ null, Match.CaseConversion.NONE, false, false,
Match.IncludeRange.NONE);
mWorker.setInMessageOnly(true);
sugMatch.add(mWorker);
} else if (messageStr.charAt(pos - 1) == '\u0001') { // real
suggestion marker
Modified:
trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java
2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -40,8 +40,8 @@
/** Generates a form of the word with a given POS tag for a given lemma.
* POS tag can be specified using regular expressions.
* @param token the token to be used for synthesis
- * @param posTag POS tag of the form to be generated.
- * @param posTagRegExp Specifies whether the posTag string is a
+ * @param posTag POS tag of the form to be generated.
+ * @param posTagRegExp Specifies whether the posTag string is a
* regular expression.
**/
public String[] synthesize(AnalyzedToken token, String posTag, boolean
posTagRegExp) throws IOException;
Modified:
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
2012-06-03 11:35:42 UTC (rev 7205)
+++
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -280,7 +280,7 @@
if (matchElement == null) { // same as REPLACE if using <match>
final Match tmpMatchToken = new Match(disambiguatedPOS, null, true,
disambiguatedPOS, null, Match.CaseConversion.NONE,
- false, Match.IncludeRange.NONE);
+ false, false, Match.IncludeRange.NONE);
tmpMatchToken.setToken(whTokens[fromPos]);
whTokens[fromPos] = tmpMatchToken.filterReadings();
filtered = true;
Modified:
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
2012-06-03 11:35:42 UTC (rev 7205)
+++
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -173,6 +173,7 @@
.equals(attrs.getValue(POSTAG_REGEXP)), attrs
.getValue("regexp_match"), attrs.getValue("regexp_replace"),
caseConversion, YES.equals(attrs.getValue("setpos")),
+ YES.equals(attrs.getValue("supress_mispelled")),
includeRange);
if (inDisambiguation) {
if (attrs.getValue(NO) != null) {
Modified: trunk/JLanguageTool/src/rules/en/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/en/grammar.xml 2012-06-03 11:35:42 UTC
(rev 7205)
+++ trunk/JLanguageTool/src/rules/en/grammar.xml 2012-06-03 12:21:08 UTC
(rev 7206)
@@ -7798,7 +7798,7 @@
</marker>
<token><exception postag="NN.*"
postag_regexp="yes"></exception><exception>'</exception></token>
</pattern>
- <message>Possible agreement error. The noun <match no="2"></match>
seems to be countable, so probably you should use: <suggestion><match
no="1"></match> <match no="2" postag="NNS|NNPS"
postag_regexp="yes"></match></suggestion>.</message>
+ <message>Possible agreement error. The noun <match no="2"></match>
seems to be countable, so probably you should use: <suggestion
suppress_misspelled="yes" ><match no="1"></match> <match no="2"
suppress_misspelled="yes" postag="NNS|NNPS"
postag_regexp="yes"></match></suggestion>.</message>
<short>Grammatical problem</short>
<example correction="five books" type="incorrect">I have
<marker>five book</marker>.</example>
<example type="correct">I have <marker>ten
books</marker>.</example>
@@ -7813,6 +7813,7 @@
<example type="correct">$800 billion economy was envisioned for
the 1970s</example>
<example type="correct">Middle East peace after Israel's 1956
invasion of Egypt</example>
<example type="correct">The Apollo 8 mission was well covered in
the British documentary.</example>
+ <example type="correct">This is 3 H2O.</example>
</rule>
<rule id="MANY_NN" name="Possible agreement error: 'many/several/few'
+ singular countable noun">
<pattern>
Modified: trunk/JLanguageTool/src/rules/pattern.xsd
===================================================================
--- trunk/JLanguageTool/src/rules/pattern.xsd 2012-06-03 11:35:42 UTC (rev
7205)
+++ trunk/JLanguageTool/src/rules/pattern.xsd 2012-06-03 12:21:08 UTC (rev
7206)
@@ -105,34 +105,36 @@
<xs:documentation xml:lang="en">Reference to the pattern
element.
Might be used in a suggestion or in the
pattern.</xs:documentation>
</xs:annotation>
- <xs:element name="match">
+ <xs:element name="match">
<xs:complexType mixed="true">
- <xs:attribute name="regexp_match" type="xs:string"
use="optional" />
- <xs:attribute name="postag_regexp" type="binaryYesNo"
- use="optional" default="no" />
- <xs:attribute name="setpos" type="binaryYesNo"
use="optional"
- default="no" />
- <xs:attribute name="case_conversion" use="optional">
- <xs:simpleType>
- <xs:restriction base="xs:NMTOKEN">
- <xs:enumeration
value="startlower" />
- <xs:enumeration
value="startupper" />
- <xs:enumeration
value="allupper" />
- <xs:enumeration
value="alllower" />
- <xs:enumeration
value="preserve" />
- </xs:restriction>
- </xs:simpleType>
- </xs:attribute>
- <xs:attribute name="regexp_replace" type="xs:string"
- use="optional" />
- <xs:attribute name="postag_replace" type="xs:string"
- use="optional" />
- <xs:attribute name="postag" type="xs:string"
use="optional" />
- <xs:attribute name="no" type="xs:nonNegativeInteger"
- use="required" />
- <xs:attribute name="include_skipped"
type="includeSelector" use="optional"
- default="none"/>
- </xs:complexType>
+ <xs:attribute name="regexp_match" type="xs:string"
+ use="optional" />
+ <xs:attribute name="postag_regexp" type="binaryYesNo"
+ use="optional" default="no" />
+ <xs:attribute name="setpos" type="binaryYesNo" use="optional"
+ default="no" />
+ <xs:attribute name="case_conversion" use="optional">
+ <xs:simpleType>
+ <xs:restriction base="xs:NMTOKEN">
+ <xs:enumeration value="startlower" />
+ <xs:enumeration value="startupper" />
+ <xs:enumeration value="allupper" />
+ <xs:enumeration value="alllower" />
+ <xs:enumeration value="preserve" />
+ </xs:restriction>
+ </xs:simpleType>
+ </xs:attribute>
+ <xs:attribute name="suppress_misspelled" type="binaryYesNo"
use="optional"/>
+ <xs:attribute name="regexp_replace" type="xs:string"
+ use="optional" />
+ <xs:attribute name="postag_replace" type="xs:string"
+ use="optional" />
+ <xs:attribute name="postag" type="xs:string" use="optional" />
+ <xs:attribute name="no" type="xs:nonNegativeInteger"
+ use="required" />
+ <xs:attribute name="include_skipped" type="includeSelector"
+ use="optional" default="none" />
+ </xs:complexType>
</xs:element>
<xs:element name="token">
Modified: trunk/JLanguageTool/src/rules/rules.xsd
===================================================================
--- trunk/JLanguageTool/src/rules/rules.xsd 2012-06-03 11:35:42 UTC (rev
7205)
+++ trunk/JLanguageTool/src/rules/rules.xsd 2012-06-03 12:21:08 UTC (rev
7206)
@@ -169,11 +169,12 @@
<xs:documentation xml:lang="en"> Suggestion displayed to
the user. </xs:documentation>
</xs:annotation>
- <xs:element name="suggestion">
- <xs:complexType mixed="true">
+ <xs:element name="suggestion">
+ <xs:complexType mixed="true">
<xs:sequence minOccurs='0' maxOccurs='unbounded'>
<xs:element ref="match" />
</xs:sequence>
+ <xs:attribute name="suppress_misspelled"
type="binaryYesNo" use="optional"/>
</xs:complexType>
</xs:element>
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
2012-06-03 11:35:42 UTC (rev 7205)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
2012-06-03 12:21:08 UTC (rev 7206)
@@ -28,6 +28,7 @@
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
import org.languagetool.language.Demo;
import org.languagetool.rules.patterns.Match.CaseConversion;
import org.languagetool.rules.patterns.Match.IncludeRange;
@@ -69,13 +70,23 @@
}
private Match getMatch(String posTag, String posTagReplace,
CaseConversion caseConversion) throws UnsupportedEncodingException, IOException
{
- Match match = new Match(posTag, posTagReplace, true, null,
null, caseConversion, false, IncludeRange.NONE);
+ Match match = new Match(posTag, posTagReplace, true, null,
null, caseConversion, false, false, IncludeRange.NONE);
match.setSynthesizer(synthesizer);
return match;
}
+
+ private Match getMatch(String posTag, String posTagReplace, boolean
spell) throws UnsupportedEncodingException, IOException {
+ Match match = new Match(posTag, posTagReplace, true, null, null,
CaseConversion.NONE, false, spell, IncludeRange.NONE);
+ return match;
+ }
+
+ private Match getTextMatch(String regexMatch, String regexpReplace,
boolean spell) throws UnsupportedEncodingException, IOException {
+ Match match = new Match(null, null, false, regexMatch, regexpReplace,
CaseConversion.NONE, false, spell, IncludeRange.NONE);
+ return match;
+ }
private Match getMatch(String posTag, String posTagReplace,
IncludeRange includeRange) throws UnsupportedEncodingException, IOException {
- Match match = new Match(posTag, posTagReplace, true, null,
null, CaseConversion.NONE, false, includeRange);
+ Match match = new Match(posTag, posTagReplace, true, null,
null, CaseConversion.NONE, false, false, includeRange);
match.setSynthesizer(synthesizer);
return match;
}
@@ -110,93 +121,93 @@
public void testStartUpper() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.STARTUPPER);
match.setToken(getAnalyzedTokenReadings("inflectedform11",
"POS1", "Lemma1"));
- assertEquals("[Inflectedform121, Inflectedform122]",
Arrays.toString( match.toFinalString()));
+ assertEquals("[Inflectedform121, Inflectedform122]",
Arrays.toString( match.toFinalString(null)));
}
public void testStartLower() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.STARTLOWER);
match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testAllUpper() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.ALLUPPER);
match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
- assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString(match.toFinalString(null)));
}
public void testAllLower() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.ALLLOWER);
match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveStartUpper() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
- assertEquals("[Inflectedform121, Inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[Inflectedform121, Inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testStaticLemmaPreserveStartLower() throws Exception {
Match match = getMatch("POS2", "POS1",
Match.CaseConversion.PRESERVE);
match.setLemmaString("lemma2");
match.setToken(getAnalyzedTokenReadings("inflectedform121",
"POS2", "Lemma1"));
- assertEquals("[inflectedform2]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform2]",
Arrays.toString(match.toFinalString(null)));
}
public void testStaticLemmaPreserveStartUpper() throws Exception {
Match match = getMatch("POS2", "POS1",
Match.CaseConversion.PRESERVE);
match.setLemmaString("lemma2");
match.setToken(getAnalyzedTokenReadings("InflectedForm121",
"POS2", "Lemma1"));
- assertEquals("[Inflectedform2]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[Inflectedform2]",
Arrays.toString(match.toFinalString(null)));
}
public void testStaticLemmaPreserveAllUpper() throws Exception {
Match match = getMatch("POS2", "POS1",
Match.CaseConversion.PRESERVE);
match.setLemmaString("lemma2");
match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM121",
"POS2", "Lemma1"));
- assertEquals("[INFLECTEDFORM2]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[INFLECTEDFORM2]",
Arrays.toString(match.toFinalString(null)));
}
public void testStaticLemmaPreserveMixed() throws Exception {
Match match = getMatch("POS2", "POS1",
Match.CaseConversion.PRESERVE);
match.setLemmaString("lemma2");
match.setToken(getAnalyzedTokenReadings("infleCtedForm121",
"POS2", "Lemma1"));
- assertEquals("[inflectedform2]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform2]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveStartLower() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
match.setToken(getAnalyzedTokenReadings("inflectedForm11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveAllUpper() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11",
"POS1", "Lemma1"));
- assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveMixed() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
match.setToken(getAnalyzedTokenReadings("inflecTedForm11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveNoneUpper() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.NONE);
match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveNoneLower() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.NONE);
match.setToken(getAnalyzedTokenReadings("inflectedform11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPreserveNoneMixed() throws Exception {
Match match = getMatch("POS1", "POS2",
Match.CaseConversion.NONE);
match.setToken(getAnalyzedTokenReadings("inFLectedFOrm11",
"POS1", "Lemma1"));
- assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
//-- INCLUDE RANGE
@@ -204,31 +215,63 @@
public void testSimpleIncludeFollowing() throws Exception {
Match match = getMatch(null, null,
Match.IncludeRange.FOLLOWING);
match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
- assertEquals("[inflectedform2 inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform2 inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testPOSIncludeFollowing() throws Exception {
// POS is ignored when using IncludeRange.Following
Match match = getMatch("POS2", "POS33",
Match.IncludeRange.FOLLOWING);
match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
- assertEquals("[inflectedform2 inflectedform122]",
Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform2 inflectedform122]",
Arrays.toString(match.toFinalString(null)));
}
public void testIncludeAll() throws Exception {
Match match = getMatch(null, null, Match.IncludeRange.ALL);
match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
- assertEquals("[inflectedform11inflectedform2
inflectedform122]", Arrays.toString(match.toFinalString()));
- // the first two tokens come together, it a known issue
+ assertEquals("[inflectedform11inflectedform2
inflectedform122]", Arrays.toString(match.toFinalString(null)));
+ // the first two tokens come together, it is a known issue
}
public void testPOSIncludeAll() throws Exception {
Match match = getMatch("POS1", "POS3", Match.IncludeRange.ALL);
match.setToken(getAnalyzedTokenReadings("inflectedform11
inflectedform2 inflectedform122 inflectedform122"), 1, 3);
- assertEquals("[inflectedform123inflectedform2
inflectedform122]", Arrays.toString(match.toFinalString()));
+ assertEquals("[inflectedform123inflectedform2
inflectedform122]", Arrays.toString(match.toFinalString(null)));
// Note that in this case the first token has the requested POS
(POS3 replaces POS1)
- // the first two tokens come together, it a known issue.
+ // the first two tokens come together, it is a known issue.
}
// TODO ad tests for using Match.IncludeRange with {@link
Match#staticLemma}
+ public void testSpeller() throws Exception {
+ //tests with synthesizer
+ Match match = getMatch("POS1", "POS2", true);
+ match.setSynthesizer(Language.POLISH.getSynthesizer());
+ match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1",
"Lemma1"));
+ //getting empty strings, which is what we want
+ assertEquals("[]", Arrays.toString(
match.toFinalString(Language.POLISH)));
+
+ // contrast with a speller = false!
+ match = getMatch("POS1", "POS2", false);
+ match.setSynthesizer(Language.POLISH.getSynthesizer());
+ match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1",
"Lemma1"));
+ assertEquals("[(inflectedform11)]", Arrays.toString(
match.toFinalString(Language.POLISH)));
+
+ //and now a real word - we should get something
+ match = getMatch("subst:sg:acc.nom:m3", "subst:sg:gen:m3", true);
+ match.setSynthesizer(Language.POLISH.getSynthesizer());
+ match.setToken(getAnalyzedTokenReadings("AON", "subst:sg:acc.nom:m3",
"AON"));
+ assertEquals("[AON-u]", Arrays.toString(
match.toFinalString(Language.POLISH)));
+
+ //and now pure text changes
+ match = getTextMatch("^(.*)$", "$0-u", true);
+ match.setSynthesizer(Language.POLISH.getSynthesizer());
+ match.setLemmaString("AON");
+ assertEquals("[AON-u]", Arrays.toString(
match.toFinalString(Language.POLISH)));
+ match.setLemmaString("batalion");
+ //should be empty
+ assertEquals("[]", Arrays.toString(
match.toFinalString(Language.POLISH)));
+ match.setLemmaString("ASEAN");
+ //and this one not
+ assertEquals("[ASEAN-u]", Arrays.toString(
match.toFinalString(Language.POLISH)));
+ }
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs