Revision: 8011 http://languagetool.svn.sourceforge.net/languagetool/?rev=8011&view=rev Author: jaumeortola Date: 2012-09-09 17:43:06 +0000 (Sun, 09 Sep 2012) Log Message: ----------- [ca] New Java rule: CATALAN_WRONG_WORD_IN_CONTEXT
Modified Paths: -------------- trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml Added Paths: ----------- trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanWrongWordInContextRule.java trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/wrongWordInContext.txt trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/CatalanWrongWordInContextRuleTest.java Modified: trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java =================================================================== --- trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java 2012-09-09 13:57:14 UTC (rev 8010) +++ trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java 2012-09-09 17:43:06 UTC (rev 8011) @@ -36,6 +36,7 @@ import org.languagetool.rules.ca.CatalanUnpairedQuestionMarksRule; import org.languagetool.rules.ca.ComplexAdjectiveConcordanceRule; import org.languagetool.rules.ca.MorfologikCatalanSpellerRule; +import org.languagetool.rules.ca.CatalanWrongWordInContextRule; import org.languagetool.rules.patterns.Unifier; import org.languagetool.synthesis.Synthesizer; import org.languagetool.synthesis.ca.CatalanSynthesizer; @@ -44,14 +45,14 @@ import org.languagetool.tagging.disambiguation.Disambiguator; import org.languagetool.tagging.disambiguation.ca.CatalanHybridDisambiguator; import org.languagetool.tokenizers.SRXSentenceTokenizer; -import org.languagetool.tokenizers.SentenceTokenizer; +//import org.languagetool.tokenizers.SentenceTokenizer; import org.languagetool.tokenizers.Tokenizer; import org.languagetool.tokenizers.ca.CatalanWordTokenizer; public class Catalan extends Language { private Tagger tagger; - private SentenceTokenizer sentenceTokenizer; + //private SentenceTokenizer sentenceTokenizer; private Tokenizer wordTokenizer; private Synthesizer synthesizer; private Disambiguator disambiguator; @@ -107,7 +108,8 @@ CatalanUnpairedQuestionMarksRule.class, CatalanUnpairedExclamationMarksRule.class, AccentuationCheckRule.class, - ComplexAdjectiveConcordanceRule.class + ComplexAdjectiveConcordanceRule.class, + CatalanWrongWordInContextRule.class //CastellanismesReplaceRule.class, //AccentuacioReplaceRule.class ); @@ -129,13 +131,13 @@ return synthesizer; } - @Override - public final SentenceTokenizer getSentenceTokenizer() { - if (sentenceTokenizer == null) { - sentenceTokenizer = new SRXSentenceTokenizer(this); - } - return sentenceTokenizer; - } +// @Override +// public final SentenceTokenizer getSentenceTokenizer() { +// if (sentenceTokenizer == null) { +// sentenceTokenizer = new SRXSentenceTokenizer(this); +// } +// return sentenceTokenizer; +// } @Override public Unifier getUnifier() { Added: trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanWrongWordInContextRule.java =================================================================== --- trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanWrongWordInContextRule.java (rev 0) +++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanWrongWordInContextRule.java 2012-09-09 17:43:06 UTC (rev 8011) @@ -0,0 +1,67 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Markus Brenneis + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.ca; + +import java.io.IOException; +import java.util.ResourceBundle; + +import org.languagetool.rules.WrongWordInContextRule; + +public class CatalanWrongWordInContextRule extends WrongWordInContextRule { + + public CatalanWrongWordInContextRule(final ResourceBundle messages) throws IOException { + super(messages); + } + + @Override + protected String getCategoryString() { + return "Paraules fàcils de confondre"; + } + + @Override + public String getId() { + return "CATALAN_WRONG_WORD_IN_CONTEXT"; + } + + @Override + public String getDescription() { + return "Confusió de paraules (rendible/rentable, etc.)"; + } + + @Override + protected String getFilename() { + return "/ca/wrongWordInContext.txt"; + } + + @Override + protected String getMessageString() { + return "Possible confusió: ¿Volíeu dir <suggestion>$SUGGESTION</suggestion> en lloc de '$WRONGWORD'?"; + } + + @Override + protected String getShortMessageString() { + return "Possible confusió"; + } + + @Override + protected String getLongMessageString() { + return "Possible confusió: ¿Volíeu dir <suggestion>$SUGGESTION</suggestion> (= $EXPLANATION_SUGGESTION) en lloc de '$WRONGWORD' (= $EXPLANATION_WRONGWORD)?"; + } + +} Modified: trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml =================================================================== --- trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml 2012-09-09 13:57:14 UTC (rev 8010) +++ trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml 2012-09-09 17:43:06 UTC (rev 8011) @@ -6253,7 +6253,8 @@ <token>ben</token> <token>segur</token> </pattern> - <message>Cal dir <suggestion>ben segur</suggestion> o <suggestion>de segur</suggestion>.</message> + <message>Cal dir <suggestion>ben segur</suggestion>, <suggestion>de segur</suggestion>, <suggestion>segurament</suggestion>, + <suggestion>amb seguretat</suggestion> o <suggestion>sens dubte</suggestion>.</message> <short>Expressió incorrecta</short> <example type="incorrect"><marker>a ben segur</marker> que ho faràs</example> <example type="correct"><marker>ben segur</marker> que ho faràs</example> @@ -7733,6 +7734,17 @@ </rule> </rulegroup> </category> + <category name="Paraules incorrectes"> + <rule id="ANDAMI_BASTIDA" name="*andami/bastida"> + <pattern> + <token regexp="yes">andamis?</token> + </pattern> + <message>¿Volíeu dir <suggestion>bastida</suggestion> o <suggestion>bastides</suggestion> (= construcció) en comptes de 'andamis' (= manera de caminar)?</message> + <short>Paraula dubtosa</short> + <example type="incorrect">Els <marker>andamis</marker></example> + <example type="correct">Les bastides</example> + </rule> + </category> <category name="Sintaxi"> <rulegroup id="FALTA_ELEMENT" name="Falta un element entre verbs: hi ha una cosa *(que) és clara"> <rule> Added: trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/wrongWordInContext.txt =================================================================== --- trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/wrongWordInContext.txt (rev 0) +++ trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/wrongWordInContext.txt 2012-09-09 17:43:06 UTC (rev 8011) @@ -0,0 +1,8 @@ +#word1 word2 match1 match2 context1 context2 [explanation1 explanation2] +#do not forget to add tests to CatalanWrongWordInContextRuleTest.java +# rendible/rentable +rendibles? rentables? ndib ntab cars?|cara|cares|barat.*|terminis?|interès|interess.*|result.*|produ.*|pag.*|cobr.*|negoci.*|fira|empres.*|ven.*|compr.*|companyi.*|econòm.*|econom.*|treball.*|project.*|inver.*|conre.*|culti.* roba|robes|teixits?|pells?|cuir.*|empaperats? que rendeix econòmicament que es pot rentar +# escortar/escoltar (cal ampliar-ho...) +escort.* escolt.* ort olt polici.*|guàrdi.*|vigil.*|enxamp.*|atrap.*|deté.*|deten.*|detin.*|presó|presoner.*|calabós atent.*|atenció|parl.*|veus?|ràdios?|concerts?|peça|peces|simfoni.*|cant.* acompanyar aplicar l'orella +# escorta (jugador de bàsquet)/escolta +#venda/bena \ No newline at end of file Added: trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/CatalanWrongWordInContextRuleTest.java =================================================================== --- trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/CatalanWrongWordInContextRuleTest.java (rev 0) +++ trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/CatalanWrongWordInContextRuleTest.java 2012-09-09 17:43:06 UTC (rev 8011) @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Markus Brenneis + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.ca; + +import java.io.IOException; + +import junit.framework.TestCase; + +import org.languagetool.JLanguageTool; +import org.languagetool.Language; + +/** + * @author Jaume Ortolà + */ +public class CatalanWrongWordInContextRuleTest extends TestCase { + + public void testRule() throws IOException { + CatalanWrongWordInContextRule rule = new CatalanWrongWordInContextRule(null); + JLanguageTool langTool = new JLanguageTool(Language.CATALAN); + + // rendible/rentable + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Una empresa molt rendible.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Una empresa molt rentable.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Uns cultius rentables.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Es venen bé i són rentables.")).length); + assertEquals("rendibles", rule.match(langTool.getAnalyzedSentence("Uns projectes molt rentables."))[0].getSuggestedReplacements().get(0)); + assertEquals("rentable", rule.match(langTool.getAnalyzedSentence("Un teixit rendible."))[0].getSuggestedReplacements().get(0)); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs