Revision: 8039 http://languagetool.svn.sourceforge.net/languagetool/?rev=8039&view=rev Author: dnaber Date: 2012-09-14 19:40:30 +0000 (Fri, 14 Sep 2012) Log Message: ----------- [en] don't consider this an error: "Ellipsis . . . as suggested by The Chicago Manual of Style"
Modified Paths: -------------- trunk/JLanguageTool/CHANGES.txt trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java Added Paths: ----------- trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java Modified: trunk/JLanguageTool/CHANGES.txt =================================================================== --- trunk/JLanguageTool/CHANGES.txt 2012-09-14 19:20:24 UTC (rev 8038) +++ trunk/JLanguageTool/CHANGES.txt 2012-09-14 19:40:30 UTC (rev 8039) @@ -31,6 +31,8 @@ -English: -fixed false alarm (sf bug #3543914) + -don't consider these ellipsis an error: "foo . . . bar" (source: + Chicago Manual of Style) -German: -added simple verb/subject agreement checker Modified: trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java =================================================================== --- trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java 2012-09-14 19:20:24 UTC (rev 8038) +++ trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java 2012-09-14 19:40:30 UTC (rev 8039) @@ -18,22 +18,9 @@ */ package org.languagetool.language; -import java.util.Arrays; -import java.util.List; -import java.util.Locale; - import org.languagetool.Language; -import org.languagetool.rules.CommaWhitespaceRule; -import org.languagetool.rules.DoublePunctuationRule; -import org.languagetool.rules.LongSentenceRule; -import org.languagetool.rules.Rule; -import org.languagetool.rules.UppercaseSentenceStartRule; -import org.languagetool.rules.WhitespaceRule; -import org.languagetool.rules.en.AvsAnRule; -import org.languagetool.rules.en.CompoundRule; -import org.languagetool.rules.en.EnglishUnpairedBracketsRule; -import org.languagetool.rules.en.EnglishWordRepeatBeginningRule; -import org.languagetool.rules.en.EnglishWordRepeatRule; +import org.languagetool.rules.*; +import org.languagetool.rules.en.*; import org.languagetool.synthesis.Synthesizer; import org.languagetool.synthesis.en.EnglishSynthesizer; import org.languagetool.tagging.Tagger; @@ -45,6 +32,10 @@ import org.languagetool.tokenizers.Tokenizer; import org.languagetool.tokenizers.en.EnglishWordTokenizer; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + public class English extends Language { private Tagger tagger; @@ -127,7 +118,7 @@ @Override public List<Class<? extends Rule>> getRelevantRules() { return Arrays.asList( - CommaWhitespaceRule.class, + EnglishCommaWhitespaceRule.class, DoublePunctuationRule.class, EnglishUnpairedBracketsRule.class, UppercaseSentenceStartRule.class, Modified: trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java =================================================================== --- trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java 2012-09-14 19:20:24 UTC (rev 8038) +++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java 2012-09-14 19:40:30 UTC (rev 8039) @@ -40,7 +40,7 @@ } @Override - public final String getId() { + public String getId() { return "COMMA_PARENTHESIS_WHITESPACE"; } @@ -59,12 +59,15 @@ int prevLen = 0; for (int i = 0; i < tokens.length; i++) { final String token = tokens[i].getToken(); - final boolean isWhite = tokens[i].isWhitespace() - || tokens[i].isFieldCode(); + final boolean isWhite = tokens[i].isWhitespace() || tokens[i].isFieldCode(); String msg = null; int fixLen = 0; String suggestionText = null; - if (isWhite && isLeftBracket(prevToken)) { + final int skip = getExceptionSkip(tokens, i); + if (skip > 0) { + // ignore + i += skip; + } else if (isWhite && isLeftBracket(prevToken)) { msg = messages.getString("no_space_after"); suggestionText = prevToken; fixLen = 1; @@ -85,9 +88,8 @@ suggestionText = ","; fixLen = 1; //exception for duplicated comma (we already have another rule for that) - if (i + 1 < tokens.length - && ",".equals(tokens[i + 1].getToken())) { - msg = null; + if (i + 1 < tokens.length && ",".equals(tokens[i + 1].getToken())) { + msg = null; } } else if (token.equals(".")) { msg = messages.getString("no_space_before_dot"); @@ -117,6 +119,14 @@ return toRuleMatchArray(ruleMatches); } + /** + * @return return 0 if there is no exception here, return the amount of tokens to be skipped + * if there's an exception here, i.e. if you want to skip over tokens that would otherwise be an error + */ + protected int getExceptionSkip(AnalyzedTokenReadings[] tokens, int pos) { + return 0; + } + static boolean isNotQuoteOrHyphen(final String str) { if (str.length() == 1) { final char c = str.charAt(0); Added: trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java =================================================================== --- trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java (rev 0) +++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java 2012-09-14 19:40:30 UTC (rev 8039) @@ -0,0 +1,66 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.en; + +import org.languagetool.AnalyzedTokenReadings; +import org.languagetool.rules.CommaWhitespaceRule; + +import java.util.ResourceBundle; + +/** + * Exception for English, according to The Chicago Manual of Style as quoted by + * http://en.wikipedia.org/wiki/Ellipsis. + */ +public class EnglishCommaWhitespaceRule extends CommaWhitespaceRule { + + public EnglishCommaWhitespaceRule(final ResourceBundle messages) { + super(messages); + } + + @Override + public final String getId() { + return "ENGLISH_COMMA_PARENTHESIS_WHITESPACE"; + } + + @Override + protected int getExceptionSkip(AnalyzedTokenReadings[] tokens, int pos) { + // allow spaced end ellipsis, i.e. " . . . .": + if (pos + 8 < tokens.length + && isDotAt(tokens, pos + 2) + && isDotAt(tokens, pos + 4) + && isDotAt(tokens, pos + 6) + && isDotAt(tokens, pos + 8)) { + return 7; + } + // allow spaced ellipsis, i.e. " . . . ": + if (pos + 6 < tokens.length + && isDotAt(tokens, pos + 2) + && isDotAt(tokens, pos + 4) + && isDotAt(tokens, pos + 6)) { + return 5; + } + return 0; + } + + private boolean isDotAt(AnalyzedTokenReadings[] tokens, int pos) { + final String str = tokens[pos].getToken(); + return str.length() > 0 && str.charAt(0) == '.'; + } + +} Modified: trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java =================================================================== --- trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java 2012-09-14 19:20:24 UTC (rev 8038) +++ trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java 2012-09-14 19:40:30 UTC (rev 8039) @@ -19,6 +19,7 @@ package org.languagetool.rules; import java.io.IOException; +import java.util.Arrays; import junit.framework.TestCase; @@ -28,7 +29,8 @@ public class CommaWhitespaceRuleTest extends TestCase { - private CommaWhitespaceRule rule; + protected CommaWhitespaceRule rule; + private JLanguageTool langTool; @Override @@ -73,12 +75,14 @@ assertEquals(6, matches[0].getToPos()); assertEquals(11, matches[1].getFromPos()); assertEquals(13, matches[1].getToPos()); + } + public void testSpecialCaseForEnglish() throws IOException { assertMatches("Ellipsis . . . as suggested by The Chicago Manual of Style", 3); assertMatches("Ellipsis . . . . as suggested by The Chicago Manual of Style", 4); } - private void assertMatches(String text, int expectedMatches) throws IOException { + protected void assertMatches(String text, int expectedMatches) throws IOException { assertEquals(expectedMatches, rule.match(langTool.getAnalyzedSentence(text)).length); } Added: trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java =================================================================== --- trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java (rev 0) +++ trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java 2012-09-14 19:40:30 UTC (rev 8039) @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.en; + +import org.languagetool.TestTools; +import org.languagetool.rules.CommaWhitespaceRuleTest; + +import java.io.IOException; + +public class EnglishCommaWhitespaceRuleTest extends CommaWhitespaceRuleTest { + + @Override + public void setUp() throws IOException { + super.setUp(); + rule = new EnglishCommaWhitespaceRule(TestTools.getEnglishMessages()); + } + + @Override + public void testSpecialCaseForEnglish() throws IOException { + assertMatches("Ellipsis . . . as suggested by The Chicago Manual of Style", 0); + assertMatches("Ellipsis . . . as suggested . But this is wrong.", 1); + assertMatches("Ellipsis . . . . as suggested by The Chicago Manual of Style", 0); + assertMatches("Ellipsis . . . . as suggested . But this is wrong.", 1); + assertMatches("Ellipsis . . . ", 0); + assertMatches("Ellipsis . . . . ", 0); + assertMatches("Ellipsis . . .", 0); + assertMatches("Ellipsis . . . .", 0); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Got visibility? Most devs has no idea what their production app looks like. Find out how fast your code is with AppDynamics Lite. http://ad.doubleclick.net/clk;262219671;13503038;y? http://info.appdynamics.com/FreeJavaPerformanceDownload.html _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs