Revision: 8624
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=8624&view=rev
Author:   jaumeortola
Date:     2012-12-22 09:08:50 +0000 (Sat, 22 Dec 2012)
Log Message:
-----------
[ca] Don't apply apostrophation rules to non-catalan words and unknown proper 
nouns. More proper nouns are needed in the tagger dictionary. 
Don't separate expressions like A-7, N-340 (road names).

Modified Paths:
--------------
    
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
    
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
    trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
  2012-12-22 07:10:02 UTC (rev 8623)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
  2012-12-22 09:08:50 UTC (rev 8624)
@@ -88,7 +88,7 @@
                final List<String> l = new ArrayList<String>();
                final StringTokenizer st = new 
StringTokenizer(text.replaceAll("([\\p{L}])['’]([\\p{L}])", "$1##CA_APOS##$2")
                                .replaceAll("([\\p{L}])-([\\p{L}])-([\\p{L}])", 
"$1##CA_HYPHEN##$2##CA_HYPHEN##$3")  //it's necessary for words like 
"vint-i-quatre" 
-                               .replaceAll("([\\p{Ll}])-([\\p{Ll}])", 
"$1##CA_HYPHEN##$2"),
+                               .replaceAll("([\\p{L}])-([\\p{Ll}\\d])", 
"$1##CA_HYPHEN##$2"),
                                "\u0020\u00A0\u115f\u1160\u1680" 
                                                + 
"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
                                                + 
"\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
      2012-12-22 07:10:02 UTC (rev 8623)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
      2012-12-22 09:08:50 UTC (rev 8624)
@@ -90,13 +90,7 @@
             <token postag="RG" 
regexp="yes">.+ment|molt|poc|tant|tan|aleshores|alhora|almanco|almenys|aviat|bastant|ben|encara|força|gaire|gairebé|gens|inclús|ja|llavors|mai|massa|menys|mig|millor|més|no|només|pas|pitjor|potser|prompte|prou|quasi|quelcom|sempre|sobretot|sols|suara|també|tampoc|tanmateix|tostemps|tot|tothora|àdhuc</token>
         </pattern>
         <disambig action="add"><wd pos="RG_anteposat"></wd></disambig>
-    </rule>
-    <rule id="MOT_FORASTER" name="paraula no catalana">
-        <pattern>
-            <token postag="UNKNOWN" 
regexp="yes">.*ñ.*|.*á.*|.+ón|.+ía|.+íes|.+ch.+</token>
-        </pattern>
-        <disambig action="add"><wd pos="mot_foraster"></wd></disambig>
-    </rule>  
+    </rule> 
     <rulegroup id="CONTRACCIONS" name="contraccions">
         <rule>
             <pattern>
@@ -572,7 +566,7 @@
             </pattern>
             <disambig action="add"><wd pos="NPCS000"></wd></disambig>
         </rule>
-        <rule>
+        <!-- <rule>
             <pattern>
                 <token postag="D[^R].*" postag_regexp="yes" negate_pos="yes"/>
                 <marker>
@@ -580,7 +574,7 @@
                 </marker>
             </pattern>
             <disambig action="add"><wd pos="NPCN000"></wd></disambig>
-        </rule>    
+        </rule>   -->  
         <rule id="nom_persona_o_geogràfic" name="nom de persona o geogràfic: a 
l'Àfrica?">
             <pattern>
                 <token skip="1">a</token>
@@ -740,14 +734,13 @@
     </rule>
     <rule id="noNom2" name="FA LA no nom">
         <pattern>
+            <token><exception postag="D[DAI]0MS0" postag_regexp="yes" 
regexp="yes">el|un|aquest|algun</exception></token>
             <marker>
                 <token regexp="yes">fa|la</token>                
             </marker>
             <token><exception 
regexp="yes">menor|major|sostingut|bemoll|diesi|becaire</exception></token>
         </pattern>
-        <disambig>
-            <match no="1" postag_regexp="yes" postag="[^N].*"></match>
-        </disambig>
+        <disambig action="filter" postag="[^N].*"/>
     </rule>
     <rule id="nAdj" name="no adjectiu">
         <!--  paraules comunes que no solen ser adjectiu gairebé mai -->
@@ -7467,6 +7460,20 @@
             <disambig action="add"><wd pos="hac_aspirada"></wd></disambig>
         </rule>
     </rulegroup>
+    <rulegroup id="MOT_FORASTER" name="paraula no catalana">
+        <rule>
+            <pattern>
+                <token postag="UNKNOWN" 
regexp="yes">.*ñ.*|.*á.*|.+ón|.+ía|.+íes|.+ch.+</token>
+            </pattern>
+            <disambig action="add"><wd pos="mot_foraster"></wd></disambig>
+        </rule> 
+        <rule>
+            <pattern>
+                <token>agua</token>
+            </pattern>
+            <disambig action="add"><wd pos="mot_foraster"></wd></disambig>
+        </rule>
+    </rulegroup>    
     <rulegroup id="verb_reflexiu" name="verb acomopanyat de pronom reflexiu">
         <rule>
             <pattern>

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml    
    2012-12-22 07:10:02 UTC (rev 8623)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml    
    2012-12-22 09:08:50 UTC (rev 8624)
@@ -5702,7 +5702,7 @@
             <rule>
                 <pattern>
                     <token>de</token>
-                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].*|el|els|a|e|i|o|u|efa|ef|efe|hac|ela|ele|ema|eme|em|ena|ene|en|erra|erre|er|essa|esse|es|ics|iure</exception><exception
 inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].*|hosts?|el|els|a|e|i|o|u|efa|ef|efe|hac|ela|ele|ema|eme|em|ena|ene|erra|erre|essa|esse|ics|iure</exception><exception
 inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|V.[MSI].*|NP..000" 
postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>d'<match 
no="2"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5712,12 +5712,12 @@
                 <example type="correct">de Hubble</example>
                 <example type="correct">de atletismo</example>
                 <example type="correct">de Iugoslàvia</example>
-                
+                <example type="correct">el 23 de agosto de 1984</example>
             </rule>
             <rule>
                 <pattern>
                     <token>el</token>
-                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i|host</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>l'<match 
no="2"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5732,7 +5732,7 @@
                 <pattern>
                     <marker>
                         <token postag="DA0MS0">en</token>
-                        <token postag="NPMSSP0" 
regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                        <token postag="NPMSSP0" 
regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                     </marker>
                     <token postag="V...[12].." postag_regexp="yes" 
negate_pos="yes" />
                 </pattern>
@@ -5747,7 +5747,7 @@
                 <pattern>
                     <token>de</token>
                     <token spacebefore="no">l</token>
-                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i|host</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>de l'<match 
no="3"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5761,7 +5761,7 @@
                 <pattern>
                     <token>a</token>
                     <token spacebefore="no">l</token>
-                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i|host</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>a l'<match 
no="3"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5775,7 +5775,7 @@
                 <pattern>
                     <token>pe</token>
                     <token spacebefore="no">l</token>
-                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóòu].+|o|i|host</exception><exception 
inflected="yes">ordinal</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>per l'<match 
no="3"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5845,47 +5845,49 @@
                 <example type="correct">d'ionització</example>
                 <example type="correct">ve d'Iowa.</example>
             </rule> 
+            <rule>
+                <pattern>
+                    <token>l'</token>
+                    <token postag="N.M.*|A..M.*" postag_regexp="yes" 
regexp="yes">h?[ui][aeoiàèéóòu].*<exception 
regexp="yes">ió|uix.+</exception></token>
+                </pattern>
+                <message>Cal escriure: <suggestion>el <match 
no="2"></match></suggestion>.</message>
+                <short>Error ortogràfic</short>
+                <example type="incorrect" correction="el 
iogurt"><marker>l'iogurt</marker></example>
+                <example type="incorrect" correction="el 
iugoslau"><marker>l'iugoslau</marker></example>
+                <example type="correct">l'uixer</example>
+                <example type="correct">l'Uixó</example>
+            </rule>
         </rulegroup>
         <rulegroup id="L_D_NOM_LLETRA" name="apostrofació: no davant de nom de 
lletra">                        
             <rule>
                 <pattern>
                     <token>d'</token>
-                    <token 
regexp="yes">a|e|i|o|efa|ef|efe|hac|ela|el|ele|ema|eme|em|ena|ene|en|erra|erre|er|essa|esse|es|ics</token>
+                    <token 
regexp="yes">a|e|i|o|efa|ef|efe|hac|ela|ele|ema|eme|em|ena|ene|erra|erre|er|essa|esse|ics<exception
 regexp="yes">(?-i)I</exception></token>
                 </pattern>
                 <message>Els noms de lletra no s'apostrofen. Cal escriure: 
<suggestion>de <match no="2"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
                 <example type="incorrect">Darrere 
<marker>d'essa</marker></example>
                 <example type="correct">Darrere de essa</example>
                 <example type="correct">d'u a cinc</example>
+                <example type="correct">La casa d'en Joan</example>
             </rule>
             <rule>
                 <pattern>
                     <token>l'</token>
-                    <token 
regexp="yes">a|e|i|o|efa|ef|efe|hac|ela|el|ele|ema|eme|em|ena|ene|en|erra|erre|er|essa|esse|es|ics</token>
+                    <token 
regexp="yes">a|e|i|o|efa|ef|efe|hac|ela|ele|ema|eme|em|ena|ene|er|essa|esse|ics<exception
 regexp="yes">(?-i)I</exception></token>
                 </pattern>
                 <message>Els noms de lletra no s'apostrofen. Cal escriure: 
<suggestion>la <match no="2"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
                 <example type="incorrect">Darrere de 
<marker>l'essa</marker></example>
                 <example type="correct">Darrere la essa</example>
                 <example type="correct">de l'u al cinc</example>
+                <example type="correct">Enllaça amb l'A-7</example>
             </rule>
         </rulegroup>
-        <rulegroup id="LA_NA_NOM_FEMENI" name="apostrofació: la/na + femení"> 
+        <rulegroup id="LA_NA_NOM_FEMENI" name="apostrofació: la/na + femení">
             <rule>
                 <pattern>
                     <token>l'</token>
-                    <token postag="N.M.*|A..M.*" postag_regexp="yes" 
regexp="yes">h?[ui][aeoiàèéóòu].*<exception 
regexp="yes">ió|uix.+</exception></token>
-                </pattern>
-                <message>Cal escriure: <suggestion>el <match 
no="2"></match></suggestion>.</message>
-                <short>Error ortogràfic</short>
-                <example type="incorrect" correction="el 
iogurt"><marker>l'iogurt</marker></example>
-                <example type="incorrect" correction="el 
iugoslau"><marker>l'iugoslau</marker></example>
-                <example type="correct">l'uixer</example>
-                <example type="correct">l'Uixó</example>
-            </rule>
-            <rule>
-                <pattern>
-                    <token>l'</token>
                     <token postag="N.F.*|A..F.*|_GN_F." postag_regexp="yes" 
regexp="yes">h?[ui].+|host<exception 
regexp="yes">hidra|hifa|hulla|ulna|umbra|una|ungla|unça|upa|urbs|urna|urpa|idus|illa|inca|iva</exception><exception
 postag="_GN_M.*" postag_regexp="yes" /></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>la <match 
no="2"></match></suggestion>.</message>
@@ -5928,7 +5930,7 @@
             <rule>
                 <pattern>
                     <token>la</token>
-                    <token 
regexp="yes">h?[aeoàèéíòóú].*|h?[ui][^aeiouàèéíòóúüï]+[aeiou][ns]?|urbs<exception
 
regexp="yes">host|ira|inxa|[aeiou]|efa|hac|ela|ema|ena|erra|essa|ics|una</exception><exception
 postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/><exception 
regexp="yes">(?-i)Haia</exception></token>
+                    <token 
regexp="yes">h?[aeoàèéíòóú].*|h?[ui][^aeiouàèéíòóúüï]+[aeiou][ns]?|urbs<exception
 
regexp="yes">host|ira|inxa|[aeiou]|efa|hac|ela|ema|en|ena|ene|er|erra|erre|essa|ics|una</exception><exception
 postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" 
postag_regexp="yes"/><exception regexp="yes">(?-i)Haia</exception></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>l'<match 
no="2"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5955,7 +5957,7 @@
             <rule>
                 <pattern>
                     <token>na</token>
-                    <token postag="N.FS.*" postag_regexp="yes" 
regexp="yes">h?[aeoàèéíòóú].*|h?[ui][^aeiouàèéíòóúüï]+[aeiou][ns]?|urbs<exception
 regexp="yes">host|ira|inxa</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token postag="N.FS.*" postag_regexp="yes" 
regexp="yes">h?[aeoàèéíòóú].*|h?[ui][^aeiouàèéíòóúüï]+[aeiou][ns]?|urbs<exception
 regexp="yes">host|ira|inxa</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>n'<match 
no="2"></match></suggestion>.</message>
                 <short>Error ortogràfic</short>
@@ -5968,7 +5970,7 @@
             <rule>
                 <pattern>
                     <token>cal</token>
-                    <token postag="NPMSSP0" 
regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóò].+</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token postag="NPMSSP0" 
regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóò].+</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>ca l'<match 
no="2"></match></suggestion>.</message>
                 <short>Error d'apostrofació</short>
@@ -5978,7 +5980,7 @@
             <rule>
                 <pattern>
                     <token postag="contraccio">cal</token>
-                    <token postag="N.[MC][SN].*|AQ0[MC][SN]0" 
postag_regexp="yes" regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóò].+</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN" postag_regexp="yes"/></token>
+                    <token postag="N.[MC][SN].*|AQ0[MC][SN]0" 
postag_regexp="yes" regexp="yes">h?[aeiouàèéíòóú].*<exception 
regexp="yes">h?[ui][aeioàèéóò].+</exception><exception 
postag="hac_aspirada|mot_foraster|UNKNOWN|NP..000" postag_regexp="yes"/></token>
                 </pattern>
                 <message>Cal escriure: <suggestion>ca l'<match 
no="2"></match></suggestion>.</message>
                 <short>Error d'apostrofació</short>
@@ -12706,7 +12708,7 @@
             <url>http://esadir.cat/lexic/entrades/assentarse</url>
             <example type="incorrect" correction="asseure">Es van 
<marker>assentar</marker> a taula.</example>
             <example type="incorrect" correction="assegué">Joan 
s'<marker>assentà</marker> a la cadira.</example>
-            <example type="correct">Els israelians s'assentaren a 
Cisjordània.</example>
+            <!-- <example type="correct">Els israelians s'assentaren a 
Cisjordània.</example> -->
             <example type="correct">S'assentaren en aquell territori.</example>
             <example type="correct">Volien que assentéssim les bases del 
futur.</example>
             <example type="correct">S'assentaren a Argentina.</example>

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to