Revision: 6030
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6030&view=rev
Author:   gulp21-1
Date:     2011-12-11 17:33:41 +0000 (Sun, 11 Dec 2011)
Log Message:
-----------
added WORD_REPEAT_BEGINNING_RULE which detects repetition at the beginning of 
successive sentences
enabled for [en] and [de]

Modified Paths:
--------------
    trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties
    trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties
    trunk/JLanguageTool/src/java/org/languagetool/language/English.java
    trunk/JLanguageTool/src/java/org/languagetool/language/German.java

Added Paths:
-----------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
    
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties     
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle.properties     
2011-12-11 17:33:41 UTC (rev 6030)
@@ -41,6 +41,14 @@
 
 desc_repetition_short = Word repetition
 
+desc_repetition_beginning = Successive sentences beginning with the same word
+
+desc_repetition_beginning_word = Three successive sentences begin with the 
same word.
+
+desc_repetition_beginning_adv = Two successive sentences begin with the same 
adverb.
+
+desc_repetition_beginning_thesaurus = Reword the sentence or use the thesaurus 
(menu Tools > Language) to find a synonym.
+
 desc_unpaired_brackets = Unpaired braces, brackets, quotation marks and 
similar symbols
 
 desc_uppercase_sentence = Checks that a sentence starts with an uppercase 
letter

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties  
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/MessagesBundle_de.properties  
2011-12-11 17:33:41 UTC (rev 6030)
@@ -41,6 +41,14 @@
 
 desc_repetition_short = Wortwiederholung
 
+desc_repetition_beginning = Satzanfangwiederholung bei aufeinanderfolgenden 
S\u00e4tzen
+
+desc_repetition_beginning_word = Drei aufeinanderfolgende S\u00e4tze beginnen 
mit demselben Wort.
+
+desc_repetition_beginning_adv = Zwei aufeinanderfolgende S\u00e4tze beginnen 
mit demselben Konjunktionaladverb.
+
+desc_repetition_beginning_thesaurus = Formulieren Sie den Satz um oder 
verwenden Sie den Thesaurus (Men\u00fc Extras > Sprache), um ein Synonym zu 
finden.
+
 desc_unpaired_brackets = Unpaarige Anf\u00fchrungszeichen und Klammern
 
 desc_uppercase_sentence = Gro\u00dfschreibung am Satzanfang

Modified: trunk/JLanguageTool/src/java/org/languagetool/language/English.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/language/English.java 
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/language/English.java 
2011-12-11 17:33:41 UTC (rev 6030)
@@ -25,6 +25,7 @@
 import org.languagetool.rules.en.AvsAnRule;
 import org.languagetool.rules.en.CompoundRule;
 import org.languagetool.rules.en.EnglishUnpairedBracketsRule;
+import org.languagetool.rules.en.EnglishWordRepeatBeginningRule;
 import org.languagetool.synthesis.Synthesizer;
 import org.languagetool.synthesis.en.EnglishSynthesizer;
 import org.languagetool.tagging.Tagger;
@@ -122,6 +123,7 @@
             LongSentenceRule.class,
             // specific to English:
             AvsAnRule.class,
+            EnglishWordRepeatBeginningRule.class,
             CompoundRule.class
     );
   }

Modified: trunk/JLanguageTool/src/java/org/languagetool/language/German.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/language/German.java  
2011-12-11 15:22:38 UTC (rev 6029)
+++ trunk/JLanguageTool/src/java/org/languagetool/language/German.java  
2011-12-11 17:33:41 UTC (rev 6030)
@@ -98,6 +98,7 @@
             WhitespaceRule.class,
             // specific to German:
             GermanWordRepeatRule.class,
+            GermanWordRepeatBeginningRule.class,
             AgreementRule.class,
             CaseRule.class,
             CompoundRule.class,

Added: 
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
                            (rev 0)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/WordRepeatBeginningRule.java
    2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,113 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+
+/**
+ * Check if three successive sentences begin with the same word, e.g. "I am 
Max. I am living in Germany. I like ice cream.",
+ * and if two successive sentences begin with the same adverb, e.g. 
"Furthermore, he is ill. Furthermore, he likes her."
+ * 
+ * @author Markus Brenneis
+ */
+public class WordRepeatBeginningRule extends Rule {
+  
+  private String lastToken = "", beforeLastToken = "";
+  
+  public WordRepeatBeginningRule(final ResourceBundle messages, final Language 
language) {
+    super(messages);
+    super.setCategory(new Category(messages.getString("category_misc")));
+  }
+
+  @Override
+  public String getId() {
+    return "WORD_REPEAT_BEGINNING_RULE";
+  }
+
+  @Override
+  public String getDescription() {
+    return messages.getString("desc_repetition_beginning");
+  }
+  
+  public boolean isAdverb(String token) {
+    return false;
+  }
+  
+  public boolean isException(String token) {
+    // avoid warning when having lists like "2007: ..." or the like
+    if (token.equals(":") || token.equals("–") || token.equals("-")) return 
true;
+    return false;
+  }
+
+  @Override
+  public RuleMatch[] match(final AnalyzedSentence text) {
+    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+    
+    if (tokens.length>3) {
+      final String token = tokens[1].getToken();
+      // avoid "..." etc. to be matched:
+      boolean isWord = true;
+      if (token.length() == 1) {
+        final char c = token.charAt(0);
+        if (!Character.isLetter(c)) {
+          isWord = false;
+        }
+      }
+      
+      if (isWord && lastToken.equals(token)
+          && !isException(token) && !isException(tokens[2].getToken()) && 
!isException(tokens[3].getToken())) {
+        final String shortMsg;
+        if (isAdverb(token))
+          shortMsg = messages.getString("desc_repetition_beginning_adv");
+        else if (beforeLastToken.equals(token))
+          shortMsg = messages.getString("desc_repetition_beginning_word");
+        else
+          shortMsg = "";
+          
+        if (!shortMsg.equals("")) {
+          final String msg = shortMsg + " " + 
messages.getString("desc_repetition_beginning_thesaurus");
+          final int startPos = tokens[1].getStartPos();
+          final int endPos = startPos + token.length();
+          final RuleMatch ruleMatch = new RuleMatch(this, startPos, endPos, 
msg, shortMsg);
+          ruleMatches.add(ruleMatch);
+        }
+      }
+      beforeLastToken = lastToken;
+      lastToken = token;
+    }
+    
+    //TODO should we ignore repetitions involving multiple paragraphs?
+    //if (tokens[tokens.length - 1].isParaEnd()) beforeLastToken = "";
+    
+    return toRuleMatchArray(ruleMatches);
+  }
+
+  @Override
+  public void reset() {
+    // nothing
+  }
+
+}

Added: 
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
                           (rev 0)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/de/GermanWordRepeatBeginningRule.java
   2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,70 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.de;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.WordRepeatBeginningRule;
+
+/**
+ * List of German adverbs ("Konjunktionaladverben") for WordRepeatBeginningRule
+ * 
+ * @author Markus Brenneis
+ */
+public class GermanWordRepeatBeginningRule extends WordRepeatBeginningRule {
+  
+  public GermanWordRepeatBeginningRule(final ResourceBundle messages, final 
Language language) {
+    super(messages, language);
+  }
+  
+  @Override
+  public String getId() {
+    return "GERMAN_WORD_REPEAT_BEGINNING_RULE";
+  }
+  
+  private static final Set<String> ADVERBS = new HashSet<String>();
+  static {
+    ADVERBS.add("Auch");
+    ADVERBS.add("Anschließend");
+    ADVERBS.add("Außerdem");
+    ADVERBS.add("Danach");
+    ADVERBS.add("Darüberhinaus");
+    ADVERBS.add("Ferner");
+    ADVERBS.add("Nebenher");
+    ADVERBS.add("Nebenbei");
+    ADVERBS.add("Überdies");
+    ADVERBS.add("Weiterführend");
+    ADVERBS.add("Zudem");
+    ADVERBS.add("Zusätzlich");
+  }
+  
+  @Override
+  public boolean isAdverb(final String token) {
+    if (ADVERBS.contains(token)) return true;
+    return false;
+  }
+
+}

Added: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
                          (rev 0)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishWordRepeatBeginningRule.java
  2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.en;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.WordRepeatBeginningRule;
+
+/**
+ * List of English adverbs for WordRepeatBeginningRule
+ * 
+ * @author Markus Brenneis
+ */
+public class EnglishWordRepeatBeginningRule extends WordRepeatBeginningRule {
+  
+  public EnglishWordRepeatBeginningRule(final ResourceBundle messages, final 
Language language) {
+    super(messages, language);
+  }
+  
+  @Override
+  public String getId() {
+    return "ENGLISH_WORD_REPEAT_BEGINNING_RULE";
+  }
+  
+  private static final Set<String> ADVERBS = new HashSet<String>();
+  static {
+    ADVERBS.add("Additionally");
+    ADVERBS.add("Besides");
+    ADVERBS.add("Furthermore");
+    ADVERBS.add("Moreover");
+  }
+  
+  @Override
+  public boolean isAdverb(final String token) {
+    if (ADVERBS.contains(token)) return true;
+    return false;
+  }
+
+}

Added: 
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
                               (rev 0)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/de/GermanWordRepeatBeginningRuleTest.java
       2011-12-11 17:33:41 UTC (rev 6030)
@@ -0,0 +1,45 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.de;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
+import org.languagetool.TestTools;
+
+/**
+ * @author Markus Brenneis
+ */
+public class GermanWordRepeatBeginningRuleTest extends TestCase {
+
+  public void testRule() throws IOException {
+    GermanWordRepeatBeginningRule rule = new 
GermanWordRepeatBeginningRule(TestTools.getMessages("de"), Language.GERMAN);
+    JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+    // correct sentences:
+    assertEquals(0, langTool.check("Er ist nett. Er heißt Max.").size());
+    assertEquals(0, langTool.check("Außerdem kommt er. Ferner kommt sie. 
Außerdem kommt es.").size());
+    assertEquals(0, langTool.check("2011: Dieses passiert. 2011: Jenes 
passiert. 2011: Nicht passiert").size());
+    // errors:
+    assertEquals(1, langTool.check("Er ist nett. Er heißt Max. Er ist 
11.").size());
+    assertEquals(1, langTool.check("Außerdem kommt er. Außerdem kommt 
sie.").size());
+  }
+    
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Learn Windows Azure Live!  Tuesday, Dec 13, 2011
Microsoft is holding a special Learn Windows Azure training event for 
developers. It will provide a great way to learn Windows Azure and what it 
provides. You can attend the event by watching it streamed LIVE online.  
Learn more at http://p.sf.net/sfu/ms-windowsazure
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to