Revision: 9723
http://languagetool.svn.sourceforge.net/languagetool/?rev=9723&view=rev
Author: jaumeortola
Date: 2013-03-18 16:50:45 +0000 (Mon, 18 Mar 2013)
Log Message:
-----------
[ca] Detect more typographical errors in "ela geminada".
Modified Paths:
--------------
trunk/languagetool/languagetool-language-modules/ca/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
trunk/languagetool/languagetool-language-modules/ca/src/main/resources/org/languagetool/rules/ca/grammar.xml
Modified:
trunk/languagetool/languagetool-language-modules/ca/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/ca/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
2013-03-18 09:02:08 UTC (rev 9722)
+++
trunk/languagetool/languagetool-language-modules/ca/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
2013-03-18 16:50:45 UTC (rev 9723)
@@ -93,7 +93,10 @@
public List<String> tokenize(final String text) {
final List<String> l = new ArrayList<String>();
final StringTokenizer st = new StringTokenizer(
- text.replaceAll("([\\p{L}])['’]([\\p{L}])",
"$1##CA_APOS##$2")
+ text
+ // allows correcting typographical errors
in "ela geminada"
+ .replaceAll("([aeiouàéèíóòúïü])l[.\u2022-]l([aeiouàéèíóòúïü])",
"$1##ELA_GEMINADA##$2")
+ .replaceAll("([\\p{L}])['’]([\\p{L}])", "$1##CA_APOS##$2")
// Cases: d'1 km, és l'1 de
gener, és d'1.4 kg
.replaceAll("([dlDL])['’](1[\\s\\.,])", "$1##CA_APOS##$2")
//it's necessary for words like
"vint-i-quatre"
@@ -101,9 +104,7 @@
.replaceAll("([\\p{L}])-([\\p{L}\\d])", "$1##CA_HYPHEN##$2")
.replaceAll("([\\d])\\.([\\d])", "$1##CA_DECIMALPOINT##$2")
.replaceAll("([\\d]),([\\d])","$1##CA_DECIMALCOMMA##$2")
- .replaceAll("([\\d])
([\\d])","$1##CA_SPACE##$2")
- // allows correcting
typographical errors in "ela geminada"
- .replaceAll("l\\.l",
"##ELA_GEMINADA##"),
+ .replaceAll("([\\d])
([\\d])","$1##CA_SPACE##$2"),
"\u0020\u00A0\u115f\u1160\u1680"
+
"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+
"\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
Modified:
trunk/languagetool/languagetool-language-modules/ca/src/main/resources/org/languagetool/rules/ca/grammar.xml
===================================================================
---
trunk/languagetool/languagetool-language-modules/ca/src/main/resources/org/languagetool/rules/ca/grammar.xml
2013-03-18 09:02:08 UTC (rev 9722)
+++
trunk/languagetool/languagetool-language-modules/ca/src/main/resources/org/languagetool/rules/ca/grammar.xml
2013-03-18 16:50:45 UTC (rev 9723)
@@ -4068,6 +4068,17 @@
</rulegroup>
</category>
<category name="Z) Confusions">
+ <rule id="CERO" name="cero/zero">
+ <pattern>
+ <marker>
+ <token regexp="yes">ceros?</token>
+ </marker>
+ </pattern>
+ <message>¿Volíeu dir <suggestion><match no="1"
regexp_match="cero(s?)" regexp_replace="zero$1" /></suggestion> (nombre) en
lloc de "<match no="1"/>" (variant de "cérvol").</message>
+ <short>Possible confusió</short>
+ <example type="incorrect" correction="zero">Té un valor
de <marker>cero</marker>.</example>
+ <example type="correct">Té un valor de zero.</example>
+ </rule>
<rule id="SERIEM" name="seriem/seríem">
<pattern>
<token regexp="yes">serie[mu]</token>
@@ -9514,16 +9525,18 @@
</rule>
</category>
<category name="A1) Ortotipografia">
- <!-- <rule id="ELA_GEMINADA" name="Errors en la ela geminada.">
+ <rule id="ELA_GEMINADA" name="Errors en la ela geminada.">
<pattern case_sensitive="no">
<marker>
- <token regexp="yes">.+l\.l.+</token>
+ <token
regexp="yes">.*[aeiouàéèíóòúïü]l[•.\-]l[aeiouàéèíóòúïü].*</token>
</marker>
</pattern>
- <message>¿Volíeu dir <suggestion><match no="1"
regexp_match="(.+l)\.(l.+)" regexp_replace="$1·$2"/></suggestion></message>
+ <message>¿Volíeu dir <suggestion><match no="1"
regexp_match="(.+l)[•.-](l.+)" regexp_replace="$1·$2"/></suggestion></message>
+ <example type="incorrect" correction="sol·licitud">La
<marker>sol•licitud</marker>.</example>
+ <example type="incorrect" correction="sol·licitud">La
<marker>sol-licitud</marker>.</example>
<example type="incorrect" correction="sol·licitud">La
<marker>sol.licitud</marker>.</example>
<example type="correct">la col·laboració</example>
- </rule> -->
+ </rule>
<rule id="ESPAI_DESPRES_DE_PUNT" name="Comprova que hi ha espai deprés
de punt.">
<pattern>
<marker>
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits