Revision: 7206 http://languagetool.svn.sourceforge.net/languagetool/?rev=7206&view=rev Author: milek_pl Date: 2012-06-03 12:21:08 +0000 (Sun, 03 Jun 2012) Log Message: ----------- new feature: suppress misspelled suggestions
Modified Paths: -------------- trunk/JLanguageTool/CHANGES.txt trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java trunk/JLanguageTool/src/rules/en/grammar.xml trunk/JLanguageTool/src/rules/pattern.xsd trunk/JLanguageTool/src/rules/rules.xsd trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java Modified: trunk/JLanguageTool/CHANGES.txt =================================================================== --- trunk/JLanguageTool/CHANGES.txt 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/CHANGES.txt 2012-06-03 12:21:08 UTC (rev 7206) @@ -78,7 +78,14 @@ -The XML format for rules has been changed to use <marker>...</marker> tags instead of mark_from and mark_to attributes - + + -Now it is possible to suppress misspelled suggestions altogether in XML rules by applying + an attribute suppress_misspelled="yes" on the <suggestion> element, AND on the <match> + element. If only <match> element has this attribute set to "yes", then the suggestion is + displayed, but no content of <match> is contained within (this might be a conditional part + of the suggestion). Note: for this to work, the tagger dictionary needs to be fairly complete; + words without lemmas and POS tags are considered to be misspelled. + -GUI: made the result of "Tag Text" more readable -Improved startup speed (Jarek Lipski) @@ -88,9 +95,10 @@ -In the profiling rules' mode on the command-line, you can now enable and disable rules. - -Some internal bug fixing in disambiguation and pattern rules. + -Some internal bug fixing in disambiguation and pattern rules. + + - 1.7 (2012-03-25) -English: Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -28,6 +28,7 @@ import org.languagetool.AnalyzedToken; import org.languagetool.AnalyzedTokenReadings; import org.languagetool.JLanguageTool; +import org.languagetool.Language; import org.languagetool.synthesis.Synthesizer; import org.languagetool.tools.StringTools; @@ -80,6 +81,7 @@ private final String posTag; private boolean postagRegexp; + private final boolean suppressMisspelled; private final String regexReplace; private final String posTagReplace; private final CaseConversion caseConversionType; @@ -121,6 +123,7 @@ final boolean postagRegexp, final String regexMatch, final String regexReplace, final CaseConversion caseConversionType, final boolean setPOS, + final boolean suppressMisspelled, final IncludeRange includeSkipped) { this.posTag = posTag; this.postagRegexp = postagRegexp; @@ -137,6 +140,7 @@ this.posTagReplace = posTagReplace; this.setPos = setPOS; this.includeSkipped = includeSkipped; + this.suppressMisspelled = suppressMisspelled; } /** @@ -234,14 +238,24 @@ synthesizer = synth; } + /** + * Used to tell whether the Match class will spell-check the result. + * @return True if this is so. + */ + public final boolean checksSpelling() { + return suppressMisspelled; + } + + /** * Gets all strings formatted using the match element. + * @param lang TODO * * @return array of strings * @throws IOException * in case of synthesizer-related disk problems. */ - public final String[] toFinalString() throws IOException { + public final String[] toFinalString(Language lang) throws IOException { String[] formattedString = new String[1]; if (formattedToken != null) { final int readingCount = formattedToken.getReadingsLength(); @@ -287,8 +301,12 @@ } } } - if (wordForms.isEmpty()) { - formattedString[0] = "(" + formattedToken.getToken() + ")"; + if (wordForms.isEmpty()) { + if (this.suppressMisspelled) { + formattedString[0] = ""; + } else { + formattedString[0] = "(" + formattedToken.getToken() + ")"; + } } else { formattedString = wordForms.toArray(new String[wordForms.size()]); } @@ -324,8 +342,24 @@ } helper[i] = formattedString[i] + skippedTokens; } - formattedString = helper; + + formattedString = helper; + } + if (this.suppressMisspelled && lang != null) { + List<String> formattedStringElements = new ArrayList<String>(formattedString.length); + for (final String str : formattedString) { + formattedStringElements.add(str); + } + //tagger-based speller + List<AnalyzedTokenReadings> analyzed = lang.getTagger().tag(formattedStringElements); + for (int i = 0; i < formattedString.length; i++) { + if (analyzed.get(i).getAnalyzedToken(0).getLemma() == null + && analyzed.get(i).getAnalyzedToken(0).hasNoTag()) { + formattedString[i] = ""; + } + } + } return formattedString; } @@ -393,7 +427,7 @@ */ public final String toTokenString() throws IOException { final StringBuilder output = new StringBuilder(); - final String[] stringToFormat = toFinalString(); + final String[] stringToFormat = toFinalString(null); for (int i = 0; i < stringToFormat.length; i++) { output.append(stringToFormat[i]); if (i + 1 < stringToFormat.length) { Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -141,7 +141,10 @@ defaultOn = "on".equals(attrs.getValue(DEFAULT)); inRuleGroup = true; subId = 0; - } else if ("suggestion".equals(qName) && inMessage) { + } else if ("suggestion".equals(qName) && inMessage) { + if (YES.equals(attrs.getValue("suppress_misspelled"))) { + message.append("<pleasespellme/>"); + } message.append("<suggestion>"); inSuggestion = true; } else if (MATCH.equals(qName)) { @@ -250,7 +253,7 @@ } else if (RULEGROUP.equals(qName)) { inRuleGroup = false; } else if ("suggestion".equals(qName) && inMessage) { - message.append("</suggestion>"); + message.append("</suggestion>"); inSuggestion = false; } else if (MARKER.equals(qName) && inCorrectExample) { correctExample.append("</marker>"); Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -155,8 +155,16 @@ + tokens[lastMatchToken + correctedEndPos].getToken().length(); if (fromPos < toPos) { // this can happen with some skip="-1" when the last // token is not matched + + //now do some spell-checking: + if (!(errMessage.contains("<pleasespellme/>") && + errMessage.contains("<mistake/>"))) { + //remove stupid markers + errMessage.replace("<pleasespellme/>", ""); + errMessage.replace("<mistake/>",""); return new RuleMatch(rule, fromPos, toPos, errMessage, rule.getShortMessage(), startsWithUppercase); + } } // failed to create any rule match... return null; } @@ -360,7 +368,14 @@ final int skippedTokens = nextTokenPos - tokenIndex; suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, skippedTokens); suggestionMatches.get(start).setSynthesizer(language.getSynthesizer()); - finalMatch = suggestionMatches.get(start).toFinalString(); + finalMatch = suggestionMatches.get(start).toFinalString(language); + if (suggestionMatches.get(start).checksSpelling() + && finalMatch.length == 1 + && "".equals(finalMatch[0])) { + finalMatch = new String[1]; + finalMatch[0] = "<mistake/>"; + } + } else { final List<String[]> matchList = new ArrayList<String[]>(); for (int i = 0; i < len; i++) { @@ -368,7 +383,7 @@ suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, skippedTokens); suggestionMatches.get(start) .setSynthesizer(language.getSynthesizer()); - matchList.add(suggestionMatches.get(start).toFinalString()); + matchList.add(suggestionMatches.get(start).toFinalString(language)); } return combineLists(matchList.toArray(new String[matchList.size()][]), new String[matchList.size()], 0, language); @@ -407,25 +422,26 @@ * @return Combined array of @String. */ private static String[] combineLists(final String[][] input, - final String[] output, final int r, final Language lang) { - final List<String> outputList = new ArrayList<String>(); - if (r == input.length) { - final StringBuilder sb = new StringBuilder(); - for (int k = 0; k < output.length; k++) { - sb.append(output[k]); - if (k < output.length - 1) { - sb.append(StringTools.addSpace(output[k + 1], lang)); - } + final String[] output, final int r, final Language lang) { + final List<String> outputList = new ArrayList<String>(); + if (r == input.length) { + final StringBuilder sb = new StringBuilder(); + for (int k = 0; k < output.length; k++) { + sb.append(output[k]); + if (k < output.length - 1) { + sb.append(StringTools.addSpace(output[k + 1], lang)); + } + + } + outputList.add(sb.toString()); + } else { + for (int c = 0; c < input[r].length; c++) { + output[r] = input[r][c]; + final String[] sList = combineLists(input, output, r + 1, lang); + outputList.addAll(Arrays.asList(sList)); + } } - outputList.add(sb.toString()); - } else { - for (int c = 0; c < input[r].length; c++) { - output[r] = input[r][c]; - final String[] sList = combineLists(input, output, r + 1, lang); - outputList.addAll(Arrays.asList(sList)); - } - } - return outputList.toArray(new String[outputList.size()]); + return outputList.toArray(new String[outputList.size()]); } } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/XMLRuleHandler.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -325,6 +325,7 @@ .equals(attrs.getValue(POSTAG_REGEXP)), attrs .getValue("regexp_match"), attrs.getValue("regexp_replace"), caseConversion, YES.equals(attrs.getValue("setpos")), + YES.equals(attrs.getValue("suppress_misspelled")), includeRange); mWorker.setInMessageOnly(!inSuggestion); if (inMessage) { @@ -460,7 +461,7 @@ if (Character.isDigit(messageStr.charAt(pos + 1))) { if (pos == 1 || messageStr.charAt(pos - 1) != '\u0001') { final Match mWorker = new Match(null, null, false, null, - null, Match.CaseConversion.NONE, false, Match.IncludeRange.NONE); + null, Match.CaseConversion.NONE, false, false, Match.IncludeRange.NONE); mWorker.setInMessageOnly(true); sugMatch.add(mWorker); } else if (messageStr.charAt(pos - 1) == '\u0001') { // real suggestion marker Modified: trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/synthesis/Synthesizer.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -40,8 +40,8 @@ /** Generates a form of the word with a given POS tag for a given lemma. * POS tag can be specified using regular expressions. * @param token the token to be used for synthesis - * @param posTag POS tag of the form to be generated. - * @param posTagRegExp Specifies whether the posTag string is a + * @param posTag POS tag of the form to be generated. + * @param posTagRegExp Specifies whether the posTag string is a * regular expression. **/ public String[] synthesize(AnalyzedToken token, String posTag, boolean posTagRegExp) throws IOException; Modified: trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -280,7 +280,7 @@ if (matchElement == null) { // same as REPLACE if using <match> final Match tmpMatchToken = new Match(disambiguatedPOS, null, true, disambiguatedPOS, null, Match.CaseConversion.NONE, - false, Match.IncludeRange.NONE); + false, false, Match.IncludeRange.NONE); tmpMatchToken.setToken(whTokens[fromPos]); whTokens[fromPos] = tmpMatchToken.filterReadings(); filtered = true; Modified: trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -173,6 +173,7 @@ .equals(attrs.getValue(POSTAG_REGEXP)), attrs .getValue("regexp_match"), attrs.getValue("regexp_replace"), caseConversion, YES.equals(attrs.getValue("setpos")), + YES.equals(attrs.getValue("supress_mispelled")), includeRange); if (inDisambiguation) { if (attrs.getValue(NO) != null) { Modified: trunk/JLanguageTool/src/rules/en/grammar.xml =================================================================== --- trunk/JLanguageTool/src/rules/en/grammar.xml 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/rules/en/grammar.xml 2012-06-03 12:21:08 UTC (rev 7206) @@ -7798,7 +7798,7 @@ </marker> <token><exception postag="NN.*" postag_regexp="yes"></exception><exception>'</exception></token> </pattern> - <message>Possible agreement error. The noun <match no="2"></match> seems to be countable, so probably you should use: <suggestion><match no="1"></match> <match no="2" postag="NNS|NNPS" postag_regexp="yes"></match></suggestion>.</message> + <message>Possible agreement error. The noun <match no="2"></match> seems to be countable, so probably you should use: <suggestion suppress_misspelled="yes" ><match no="1"></match> <match no="2" suppress_misspelled="yes" postag="NNS|NNPS" postag_regexp="yes"></match></suggestion>.</message> <short>Grammatical problem</short> <example correction="five books" type="incorrect">I have <marker>five book</marker>.</example> <example type="correct">I have <marker>ten books</marker>.</example> @@ -7813,6 +7813,7 @@ <example type="correct">$800 billion economy was envisioned for the 1970s</example> <example type="correct">Middle East peace after Israel's 1956 invasion of Egypt</example> <example type="correct">The Apollo 8 mission was well covered in the British documentary.</example> + <example type="correct">This is 3 H2O.</example> </rule> <rule id="MANY_NN" name="Possible agreement error: 'many/several/few' + singular countable noun"> <pattern> Modified: trunk/JLanguageTool/src/rules/pattern.xsd =================================================================== --- trunk/JLanguageTool/src/rules/pattern.xsd 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/rules/pattern.xsd 2012-06-03 12:21:08 UTC (rev 7206) @@ -105,34 +105,36 @@ <xs:documentation xml:lang="en">Reference to the pattern element. Might be used in a suggestion or in the pattern.</xs:documentation> </xs:annotation> - <xs:element name="match"> + <xs:element name="match"> <xs:complexType mixed="true"> - <xs:attribute name="regexp_match" type="xs:string" use="optional" /> - <xs:attribute name="postag_regexp" type="binaryYesNo" - use="optional" default="no" /> - <xs:attribute name="setpos" type="binaryYesNo" use="optional" - default="no" /> - <xs:attribute name="case_conversion" use="optional"> - <xs:simpleType> - <xs:restriction base="xs:NMTOKEN"> - <xs:enumeration value="startlower" /> - <xs:enumeration value="startupper" /> - <xs:enumeration value="allupper" /> - <xs:enumeration value="alllower" /> - <xs:enumeration value="preserve" /> - </xs:restriction> - </xs:simpleType> - </xs:attribute> - <xs:attribute name="regexp_replace" type="xs:string" - use="optional" /> - <xs:attribute name="postag_replace" type="xs:string" - use="optional" /> - <xs:attribute name="postag" type="xs:string" use="optional" /> - <xs:attribute name="no" type="xs:nonNegativeInteger" - use="required" /> - <xs:attribute name="include_skipped" type="includeSelector" use="optional" - default="none"/> - </xs:complexType> + <xs:attribute name="regexp_match" type="xs:string" + use="optional" /> + <xs:attribute name="postag_regexp" type="binaryYesNo" + use="optional" default="no" /> + <xs:attribute name="setpos" type="binaryYesNo" use="optional" + default="no" /> + <xs:attribute name="case_conversion" use="optional"> + <xs:simpleType> + <xs:restriction base="xs:NMTOKEN"> + <xs:enumeration value="startlower" /> + <xs:enumeration value="startupper" /> + <xs:enumeration value="allupper" /> + <xs:enumeration value="alllower" /> + <xs:enumeration value="preserve" /> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="suppress_misspelled" type="binaryYesNo" use="optional"/> + <xs:attribute name="regexp_replace" type="xs:string" + use="optional" /> + <xs:attribute name="postag_replace" type="xs:string" + use="optional" /> + <xs:attribute name="postag" type="xs:string" use="optional" /> + <xs:attribute name="no" type="xs:nonNegativeInteger" + use="required" /> + <xs:attribute name="include_skipped" type="includeSelector" + use="optional" default="none" /> + </xs:complexType> </xs:element> <xs:element name="token"> Modified: trunk/JLanguageTool/src/rules/rules.xsd =================================================================== --- trunk/JLanguageTool/src/rules/rules.xsd 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/rules/rules.xsd 2012-06-03 12:21:08 UTC (rev 7206) @@ -169,11 +169,12 @@ <xs:documentation xml:lang="en"> Suggestion displayed to the user. </xs:documentation> </xs:annotation> - <xs:element name="suggestion"> - <xs:complexType mixed="true"> + <xs:element name="suggestion"> + <xs:complexType mixed="true"> <xs:sequence minOccurs='0' maxOccurs='unbounded'> <xs:element ref="match" /> </xs:sequence> + <xs:attribute name="suppress_misspelled" type="binaryYesNo" use="optional"/> </xs:complexType> </xs:element> Modified: trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 2012-06-03 11:35:42 UTC (rev 7205) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 2012-06-03 12:21:08 UTC (rev 7206) @@ -28,6 +28,7 @@ import org.languagetool.AnalyzedToken; import org.languagetool.AnalyzedTokenReadings; import org.languagetool.JLanguageTool; +import org.languagetool.Language; import org.languagetool.language.Demo; import org.languagetool.rules.patterns.Match.CaseConversion; import org.languagetool.rules.patterns.Match.IncludeRange; @@ -69,13 +70,23 @@ } private Match getMatch(String posTag, String posTagReplace, CaseConversion caseConversion) throws UnsupportedEncodingException, IOException { - Match match = new Match(posTag, posTagReplace, true, null, null, caseConversion, false, IncludeRange.NONE); + Match match = new Match(posTag, posTagReplace, true, null, null, caseConversion, false, false, IncludeRange.NONE); match.setSynthesizer(synthesizer); return match; } + + private Match getMatch(String posTag, String posTagReplace, boolean spell) throws UnsupportedEncodingException, IOException { + Match match = new Match(posTag, posTagReplace, true, null, null, CaseConversion.NONE, false, spell, IncludeRange.NONE); + return match; + } + + private Match getTextMatch(String regexMatch, String regexpReplace, boolean spell) throws UnsupportedEncodingException, IOException { + Match match = new Match(null, null, false, regexMatch, regexpReplace, CaseConversion.NONE, false, spell, IncludeRange.NONE); + return match; + } private Match getMatch(String posTag, String posTagReplace, IncludeRange includeRange) throws UnsupportedEncodingException, IOException { - Match match = new Match(posTag, posTagReplace, true, null, null, CaseConversion.NONE, false, includeRange); + Match match = new Match(posTag, posTagReplace, true, null, null, CaseConversion.NONE, false, false, includeRange); match.setSynthesizer(synthesizer); return match; } @@ -110,93 +121,93 @@ public void testStartUpper() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.STARTUPPER); match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1", "Lemma1")); - assertEquals("[Inflectedform121, Inflectedform122]", Arrays.toString( match.toFinalString())); + assertEquals("[Inflectedform121, Inflectedform122]", Arrays.toString( match.toFinalString(null))); } public void testStartLower() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.STARTLOWER); match.setToken(getAnalyzedTokenReadings("InflectedForm11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testAllUpper() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.ALLUPPER); match.setToken(getAnalyzedTokenReadings("InflectedForm11", "POS1", "Lemma1")); - assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", Arrays.toString(match.toFinalString())); + assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", Arrays.toString(match.toFinalString(null))); } public void testAllLower() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.ALLLOWER); match.setToken(getAnalyzedTokenReadings("InflectedForm11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testPreserveStartUpper() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.PRESERVE); match.setToken(getAnalyzedTokenReadings("InflectedForm11", "POS1", "Lemma1")); - assertEquals("[Inflectedform121, Inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[Inflectedform121, Inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testStaticLemmaPreserveStartLower() throws Exception { Match match = getMatch("POS2", "POS1", Match.CaseConversion.PRESERVE); match.setLemmaString("lemma2"); match.setToken(getAnalyzedTokenReadings("inflectedform121", "POS2", "Lemma1")); - assertEquals("[inflectedform2]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform2]", Arrays.toString(match.toFinalString(null))); } public void testStaticLemmaPreserveStartUpper() throws Exception { Match match = getMatch("POS2", "POS1", Match.CaseConversion.PRESERVE); match.setLemmaString("lemma2"); match.setToken(getAnalyzedTokenReadings("InflectedForm121", "POS2", "Lemma1")); - assertEquals("[Inflectedform2]", Arrays.toString(match.toFinalString())); + assertEquals("[Inflectedform2]", Arrays.toString(match.toFinalString(null))); } public void testStaticLemmaPreserveAllUpper() throws Exception { Match match = getMatch("POS2", "POS1", Match.CaseConversion.PRESERVE); match.setLemmaString("lemma2"); match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM121", "POS2", "Lemma1")); - assertEquals("[INFLECTEDFORM2]", Arrays.toString(match.toFinalString())); + assertEquals("[INFLECTEDFORM2]", Arrays.toString(match.toFinalString(null))); } public void testStaticLemmaPreserveMixed() throws Exception { Match match = getMatch("POS2", "POS1", Match.CaseConversion.PRESERVE); match.setLemmaString("lemma2"); match.setToken(getAnalyzedTokenReadings("infleCtedForm121", "POS2", "Lemma1")); - assertEquals("[inflectedform2]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform2]", Arrays.toString(match.toFinalString(null))); } public void testPreserveStartLower() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.PRESERVE); match.setToken(getAnalyzedTokenReadings("inflectedForm11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testPreserveAllUpper() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.PRESERVE); match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", "POS1", "Lemma1")); - assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", Arrays.toString(match.toFinalString())); + assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", Arrays.toString(match.toFinalString(null))); } public void testPreserveMixed() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.PRESERVE); match.setToken(getAnalyzedTokenReadings("inflecTedForm11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testPreserveNoneUpper() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.NONE); match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testPreserveNoneLower() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.NONE); match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testPreserveNoneMixed() throws Exception { Match match = getMatch("POS1", "POS2", Match.CaseConversion.NONE); match.setToken(getAnalyzedTokenReadings("inFLectedFOrm11", "POS1", "Lemma1")); - assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform121, inflectedform122]", Arrays.toString(match.toFinalString(null))); } //-- INCLUDE RANGE @@ -204,31 +215,63 @@ public void testSimpleIncludeFollowing() throws Exception { Match match = getMatch(null, null, Match.IncludeRange.FOLLOWING); match.setToken(getAnalyzedTokenReadings("inflectedform11 inflectedform2 inflectedform122 inflectedform122"), 1, 3); - assertEquals("[inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testPOSIncludeFollowing() throws Exception { // POS is ignored when using IncludeRange.Following Match match = getMatch("POS2", "POS33", Match.IncludeRange.FOLLOWING); match.setToken(getAnalyzedTokenReadings("inflectedform11 inflectedform2 inflectedform122 inflectedform122"), 1, 3); - assertEquals("[inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString(null))); } public void testIncludeAll() throws Exception { Match match = getMatch(null, null, Match.IncludeRange.ALL); match.setToken(getAnalyzedTokenReadings("inflectedform11 inflectedform2 inflectedform122 inflectedform122"), 1, 3); - assertEquals("[inflectedform11inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString())); - // the first two tokens come together, it a known issue + assertEquals("[inflectedform11inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString(null))); + // the first two tokens come together, it is a known issue } public void testPOSIncludeAll() throws Exception { Match match = getMatch("POS1", "POS3", Match.IncludeRange.ALL); match.setToken(getAnalyzedTokenReadings("inflectedform11 inflectedform2 inflectedform122 inflectedform122"), 1, 3); - assertEquals("[inflectedform123inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString())); + assertEquals("[inflectedform123inflectedform2 inflectedform122]", Arrays.toString(match.toFinalString(null))); // Note that in this case the first token has the requested POS (POS3 replaces POS1) - // the first two tokens come together, it a known issue. + // the first two tokens come together, it is a known issue. } // TODO ad tests for using Match.IncludeRange with {@link Match#staticLemma} + public void testSpeller() throws Exception { + //tests with synthesizer + Match match = getMatch("POS1", "POS2", true); + match.setSynthesizer(Language.POLISH.getSynthesizer()); + match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1", "Lemma1")); + //getting empty strings, which is what we want + assertEquals("[]", Arrays.toString( match.toFinalString(Language.POLISH))); + + // contrast with a speller = false! + match = getMatch("POS1", "POS2", false); + match.setSynthesizer(Language.POLISH.getSynthesizer()); + match.setToken(getAnalyzedTokenReadings("inflectedform11", "POS1", "Lemma1")); + assertEquals("[(inflectedform11)]", Arrays.toString( match.toFinalString(Language.POLISH))); + + //and now a real word - we should get something + match = getMatch("subst:sg:acc.nom:m3", "subst:sg:gen:m3", true); + match.setSynthesizer(Language.POLISH.getSynthesizer()); + match.setToken(getAnalyzedTokenReadings("AON", "subst:sg:acc.nom:m3", "AON")); + assertEquals("[AON-u]", Arrays.toString( match.toFinalString(Language.POLISH))); + + //and now pure text changes + match = getTextMatch("^(.*)$", "$0-u", true); + match.setSynthesizer(Language.POLISH.getSynthesizer()); + match.setLemmaString("AON"); + assertEquals("[AON-u]", Arrays.toString( match.toFinalString(Language.POLISH))); + match.setLemmaString("batalion"); + //should be empty + assertEquals("[]", Arrays.toString( match.toFinalString(Language.POLISH))); + match.setLemmaString("ASEAN"); + //and this one not + assertEquals("[ASEAN-u]", Arrays.toString( match.toFinalString(Language.POLISH))); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs