Revision: 6030
http://languagetool.svn.sourceforge.net/languagetool/?rev=6030&view=rev
Author: gulp21-1
Date: 2011-12-11 17:33:41 +0000 (Sun, 11 Dec 2011)
Log Message:
-----------
added WORD_REPEAT_BEGINNING_RULE which detects repetition at the beginning of
successive sentences
enabled for [en] and [de]
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties
trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties
trunk/JLanguageTool/src/java/org/languagetool/language/English.java
trunk/JLanguageTool/src/java/org/languagetool/language/German.java
Added Paths:
-----------
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
Modified:
trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties
2011-12-11 17:33:41 UTC (rev 6030)
@@ -41,6 +41,14 @@
desc_repetition_short = Word repetition
+desc_repetition_beginning = Successive sentences beginning with the same word
+
+desc_repetition_beginning_word = Three successive sentences begin with the
same word.
+
+desc_repetition_beginning_adv = Two successive sentences begin with the same
adverb.
+
+desc_repetition_beginning_thesaurus = Reword the sentence or use the thesaurus
(menu Tools > Language) to find a synonym.
+
desc_unpaired_brackets = Unpaired braces, brackets, quotation marks and
similar symbols
desc_uppercase_sentence = Checks that a sentence starts with an uppercase
letter
Modified:
trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties
2011-12-11 17:33:41 UTC (rev 6030)
@@ -41,6 +41,14 @@
desc_repetition_short = Wortwiederholung
+desc_repetition_beginning = Satzanfangwiederholung bei aufeinanderfolgenden
S\u00e4tzen
+
+desc_repetition_beginning_word = Drei aufeinanderfolgende S\u00e4tze beginnen
mit demselben Wort.
+
+desc_repetition_beginning_adv = Zwei aufeinanderfolgende S\u00e4tze beginnen
mit demselben Konjunktionaladverb.
+
+desc_repetition_beginning_thesaurus = Formulieren Sie den Satz um oder
verwenden Sie den Thesaurus (Men\u00fc Extras > Sprache), um ein Synonym zu
finden.
+
desc_unpaired_brackets = Unpaarige Anf\u00fchrungszeichen und Klammern
desc_uppercase_sentence = Gro\u00dfschreibung am Satzanfang
Modified: trunk/JLanguageTool/src/java/org/languagetool/language/English.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/language/English.java
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/language/English.java
2011-12-11 17:33:41 UTC (rev 6030)
@@ -25,6 +25,7 @@
import org.languagetool.rules.en.AvsAnRule;
import org.languagetool.rules.en.CompoundRule;
import org.languagetool.rules.en.EnglishUnpairedBracketsRule;
+import org.languagetool.rules.en.EnglishWordRepeatBeginningRule;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.synthesis.en.EnglishSynthesizer;
import org.languagetool.tagging.Tagger;
@@ -122,6 +123,7 @@
LongSentenceRule.class,
// specific to English:
AvsAnRule.class,
+ EnglishWordRepeatBeginningRule.class,
CompoundRule.class
);
}
Modified: trunk/JLanguageTool/src/java/org/languagetool/language/German.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/language/German.java
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/language/German.java
2011-12-11 17:33:41 UTC (rev 6030)
@@ -98,6 +98,7 @@
WhitespaceRule.class,
// specific to German:
GermanWordRepeatRule.class,
+ GermanWordRepeatBeginningRule.class,
AgreementRule.class,
CaseRule.class,
CompoundRule.class,
Added:
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
(rev 0)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,113 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+
+/**
+ * Check if three successive sentences begin with the same word, e.g. "I am
Max. I am living in Germany. I like ice cream.",
+ * and if two successive sentences begin with the same adverb, e.g.
"Furthermore, he is ill. Furthermore, he likes her."
+ *
+ * @author Markus Brenneis
+ */
+public class WordRepeatBeginningRule extends Rule {
+
+ private String lastToken = "", beforeLastToken = "";
+
+ public WordRepeatBeginningRule(final ResourceBundle messages, final Language
language) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ @Override
+ public String getId() {
+ return "WORD_REPEAT_BEGINNING_RULE";
+ }
+
+ @Override
+ public String getDescription() {
+ return messages.getString("desc_repetition_beginning");
+ }
+
+ public boolean isAdverb(String token) {
+ return false;
+ }
+
+ public boolean isException(String token) {
+ // avoid warning when having lists like "2007: ..." or the like
+ if (token.equals(":") || token.equals("–") || token.equals("-")) return
true;
+ return false;
+ }
+
+ @Override
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ if (tokens.length>3) {
+ final String token = tokens[1].getToken();
+ // avoid "..." etc. to be matched:
+ boolean isWord = true;
+ if (token.length() == 1) {
+ final char c = token.charAt(0);
+ if (!Character.isLetter(c)) {
+ isWord = false;
+ }
+ }
+
+ if (isWord && lastToken.equals(token)
+ && !isException(token) && !isException(tokens[2].getToken()) &&
!isException(tokens[3].getToken())) {
+ final String shortMsg;
+ if (isAdverb(token))
+ shortMsg = messages.getString("desc_repetition_beginning_adv");
+ else if (beforeLastToken.equals(token))
+ shortMsg = messages.getString("desc_repetition_beginning_word");
+ else
+ shortMsg = "";
+
+ if (!shortMsg.equals("")) {
+ final String msg = shortMsg + " " +
messages.getString("desc_repetition_beginning_thesaurus");
+ final int startPos = tokens[1].getStartPos();
+ final int endPos = startPos + token.length();
+ final RuleMatch ruleMatch = new RuleMatch(this, startPos, endPos,
msg, shortMsg);
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ beforeLastToken = lastToken;
+ lastToken = token;
+ }
+
+ //TODO should we ignore repetitions involving multiple paragraphs?
+ //if (tokens[tokens.length - 1].isParaEnd()) beforeLastToken = "";
+
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ @Override
+ public void reset() {
+ // nothing
+ }
+
+}
Added:
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
(rev 0)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,70 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.de;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.WordRepeatBeginningRule;
+
+/**
+ * List of German adverbs ("Konjunktionaladverben") for WordRepeatBeginningRule
+ *
+ * @author Markus Brenneis
+ */
+public class GermanWordRepeatBeginningRule extends WordRepeatBeginningRule {
+
+ public GermanWordRepeatBeginningRule(final ResourceBundle messages, final
Language language) {
+ super(messages, language);
+ }
+
+ @Override
+ public String getId() {
+ return "GERMAN_WORD_REPEAT_BEGINNING_RULE";
+ }
+
+ private static final Set<String> ADVERBS = new HashSet<String>();
+ static {
+ ADVERBS.add("Auch");
+ ADVERBS.add("Anschließend");
+ ADVERBS.add("Außerdem");
+ ADVERBS.add("Danach");
+ ADVERBS.add("Darüberhinaus");
+ ADVERBS.add("Ferner");
+ ADVERBS.add("Nebenher");
+ ADVERBS.add("Nebenbei");
+ ADVERBS.add("Überdies");
+ ADVERBS.add("Weiterführend");
+ ADVERBS.add("Zudem");
+ ADVERBS.add("Zusätzlich");
+ }
+
+ @Override
+ public boolean isAdverb(final String token) {
+ if (ADVERBS.contains(token)) return true;
+ return false;
+ }
+
+}
Added:
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
(rev 0)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.en;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.WordRepeatBeginningRule;
+
+/**
+ * List of English adverbs for WordRepeatBeginningRule
+ *
+ * @author Markus Brenneis
+ */
+public class EnglishWordRepeatBeginningRule extends WordRepeatBeginningRule {
+
+ public EnglishWordRepeatBeginningRule(final ResourceBundle messages, final
Language language) {
+ super(messages, language);
+ }
+
+ @Override
+ public String getId() {
+ return "ENGLISH_WORD_REPEAT_BEGINNING_RULE";
+ }
+
+ private static final Set<String> ADVERBS = new HashSet<String>();
+ static {
+ ADVERBS.add("Additionally");
+ ADVERBS.add("Besides");
+ ADVERBS.add("Furthermore");
+ ADVERBS.add("Moreover");
+ }
+
+ @Override
+ public boolean isAdverb(final String token) {
+ if (ADVERBS.contains(token)) return true;
+ return false;
+ }
+
+}
Added:
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
(rev 0)
+++
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,45 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.de;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
+import org.languagetool.TestTools;
+
+/**
+ * @author Markus Brenneis
+ */
+public class GermanWordRepeatBeginningRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ GermanWordRepeatBeginningRule rule = new
GermanWordRepeatBeginningRule(TestTools.getMessages("de"), Language.GERMAN);
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+ // correct sentences:
+ assertEquals(0, langTool.check("Er ist nett. Er heißt Max.").size());
+ assertEquals(0, langTool.check("Außerdem kommt er. Ferner kommt sie.
Außerdem kommt es.").size());
+ assertEquals(0, langTool.check("2011: Dieses passiert. 2011: Jenes
passiert. 2011: Nicht passiert").size());
+ // errors:
+ assertEquals(1, langTool.check("Er ist nett. Er heißt Max. Er ist
11.").size());
+ assertEquals(1, langTool.check("Außerdem kommt er. Außerdem kommt
sie.").size());
+ }
+
+}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Learn Windows Azure Live! Tuesday, Dec 13, 2011
Microsoft is holding a special Learn Windows Azure training event for
developers. It will provide a great way to learn Windows Azure and what it
provides. You can attend the event by watching it streamed LIVE online.
Learn more at http://p.sf.net/sfu/ms-windowsazure
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs