Revision: 9029
http://languagetool.svn.sourceforge.net/languagetool/?rev=9029&view=rev
Author: jaumeortola
Date: 2013-01-15 13:26:49 +0000 (Tue, 15 Jan 2013)
Log Message:
-----------
[ca] New rule Catalan UnpairedBracketsRule.java.
Some problems to be solved added to tests.
Modified Paths:
--------------
trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
trunk/JLanguageTool/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
Added Paths:
-----------
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanUnpairedBracketsRule.java
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java
2013-01-15 13:23:02 UTC (rev 9028)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/language/Catalan.java
2013-01-15 13:26:49 UTC (rev 9029)
@@ -37,6 +37,7 @@
import org.languagetool.rules.ca.MorfologikCatalanSpellerRule;
import org.languagetool.rules.ca.CatalanWrongWordInContextRule;
import org.languagetool.rules.ca.ReflexiveVerbsRule;
+import org.languagetool.rules.ca.CatalanUnpairedBracketsRule;
import org.languagetool.rules.patterns.Unifier;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.synthesis.ca.CatalanSynthesizer;
@@ -69,16 +70,6 @@
}
@Override
- public String[] getUnpairedRuleStartSymbols() {
- return new String[]{ "[", "(", "{", "“", "«"};
- }
-
- @Override
- public String[] getUnpairedRuleEndSymbols() {
- return new String[]{ "]", ")", "}", "”", "»"};
- }
-
- @Override
public String getShortName() {
return "ca";
}
@@ -93,7 +84,7 @@
return Arrays.asList(
CommaWhitespaceRule.class,
DoublePunctuationRule.class,
- GenericUnpairedBracketsRule.class,
+ CatalanUnpairedBracketsRule.class,
UppercaseSentenceStartRule.class,
WhitespaceRule.class,
WordRepeatRule.class,
Added:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanUnpairedBracketsRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanUnpairedBracketsRule.java
(rev 0)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/CatalanUnpairedBracketsRule.java
2013-01-15 13:26:49 UTC (rev 9029)
@@ -0,0 +1,79 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package org.languagetool.rules.ca;
+
+import java.util.ResourceBundle;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.GenericUnpairedBracketsRule;
+
+public class CatalanUnpairedBracketsRule extends GenericUnpairedBracketsRule {
+
+ private static final String[] CA_START_SYMBOLS = { "[", "(", "{", "“",
"«" };
+ private static final String[] CA_END_SYMBOLS = { "]", ")", "}", "”",
"»" };
+
+ private static final Pattern VALID_BEFORE_CLOSING_PARENTHESIS = Pattern
+ .compile("\\d+|[a-zA-Z]", Pattern.UNICODE_CASE);
+
+ public CatalanUnpairedBracketsRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages, language);
+ startSymbols = CA_START_SYMBOLS;
+ endSymbols = CA_END_SYMBOLS;
+ uniqueMapInit();
+ }
+
+ @Override
+ public String getId() {
+ return "CA_UNPAIRED_BRACKETS";
+ }
+
+ @Override
+ protected boolean isNoException(final String tokenStr,
+ final AnalyzedTokenReadings[] tokens, final int i,
final int j,
+ final boolean precSpace, final boolean follSpace) {
+
+ if (i < 1) {
+ return true;
+ }
+
+ if (i == 1 && tokenStr.equals("»"))
+ return false;
+
+ if (i > 1 && tokenStr.equals(")")) {
+ int nOpeningParenthesis = 0;
+ for (int k = 1; k < i; k++)
+ if (tokens[k].getToken().equals("("))
+ nOpeningParenthesis++;
+ if (nOpeningParenthesis % 2 == 0) {
+ final Matcher mValidBeforeClosingParenthesis =
VALID_BEFORE_CLOSING_PARENTHESIS
+ .matcher(tokens[i -
1].getToken());
+ if (mValidBeforeClosingParenthesis.matches())
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+}
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
2013-01-15 13:23:02 UTC (rev 9028)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
2013-01-15 13:26:49 UTC (rev 9029)
@@ -56,6 +56,9 @@
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Viu al núm.
23 del carrer Nou.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("N'hi ha de
color vermell, blau, verd, etc.")).length);
+ // Test for Multiwords.
+ //assertEquals(0, rule.match(langTool.getAnalyzedSentence("Era vox
populi.")).length);
+
//test for "LanguageTool":
assertEquals(0,
rule.match(langTool.getAnalyzedSentence("LanguageTool!")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
2013-01-15 13:23:02 UTC (rev 9028)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
2013-01-15 13:26:49 UTC (rev 9029)
@@ -54,6 +54,10 @@
testSplit(new String[] { "El pou d'Avall. ", "És bonic." });
testSplit(new String[] { "El pou d’Avall. ", "És bonic." });
testSplit(new String[] { "Ell viu a l'u. ", "Jo al dos." });
+
+ //Unsolved problem
+ //testSplit(new String[] { "–La vols més fina, l'euga? ",
"Mira-te-la, fill meu, l'euga." });
+
}
private void testSplit(final String[] sentences) {
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Master SQL Server Development, Administration, T-SQL, SSAS, SSIS, SSRS
and more. Get SQL Server skills now (including 2012) with LearnDevNow -
200+ hours of step-by-step video tutorials by Microsoft MVPs and experts.
SALE $99.99 this month only - learn more at:
http://p.sf.net/sfu/learnmore_122512
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits