Revision: 7760
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7760&view=rev
Author:   jaumeortola
Date:     2012-07-27 16:59:12 +0000 (Fri, 27 Jul 2012)
Log Message:
-----------
[ca] Java rule improved: AccentuationCheckRule 

Modified Paths:
--------------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java
    trunk/JLanguageTool/src/resource/ca/catalan.dict
    trunk/JLanguageTool/src/resource/ca/catalan_synth.dict
    trunk/JLanguageTool/src/resource/ca/disambiguation.xml
    trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt
    
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java
   2012-07-27 11:43:33 UTC (rev 7759)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java
   2012-07-27 16:59:12 UTC (rev 7760)
@@ -58,7 +58,7 @@
   private static final Pattern ARTICLE_EL_FS = Pattern.compile("la|l'|La|L'");
   private static final Pattern ARTICLE_EL_MP = Pattern.compile("els|Els");
   private static final Pattern ARTICLE_EL_FP = Pattern.compile("les|Les");
-//  private static final Pattern DETERMINANT = Pattern.compile("D[^R].*");
+  private static final Pattern DETERMINANT = Pattern.compile("D[^R].*");
   private static final Pattern DETERMINANT_MS = 
Pattern.compile("D[^R].[MC][SN].*");
   private static final Pattern DETERMINANT_FS = 
Pattern.compile("D[^R].[FC][SN].*");
   private static final Pattern DETERMINANT_MP = 
Pattern.compile("D[^R].[MC][PN].*");
@@ -72,7 +72,7 @@
   private static final Pattern ADJECTIU_MP = 
Pattern.compile("AQ.[MC][PN].*|V.P..PM|PX.MP.*");
   private static final Pattern ADJECTIU_FP = 
Pattern.compile("AQ.[FC][PN].*|V.P..PF|PX.FP.*");
   private static final Pattern INFINITIU = Pattern.compile("V.N.*");
-  private static final Pattern VERB_CONJUGAT = Pattern.compile("V.[^NGP].*");
+  private static final Pattern VERB_CONJUGAT = 
Pattern.compile("V.[^NGP].*|_GV_");
   private static final Pattern NOT_IN_PREV_TOKEN = 
Pattern.compile("VA.*|PP.*|P0.*|VSP.*");
   private static final Pattern BEFORE_ADJECTIVE_MS = 
Pattern.compile("SPS00|D[^R].[MC][SN].*|V.[^NGP].*|PX.*");
   private static final Pattern BEFORE_ADJECTIVE_FS = 
Pattern.compile("SPS00|D[^R].[FC][SN].*|V.[^NGP].*|PX.*");
@@ -146,8 +146,8 @@
       // verb without accent -> noun with accent   
       if (isRelevantWord)
       {
-       //amb renuncies
-        if (tokens[i-1].hasPosTag("SPS00") && 
!matchPostagRegexp(tokens[i],INFINITIU) )
+       //amb renuncies DETERMINANT
+        if (tokens[i-1].hasPosTag("SPS00") && 
!matchPostagRegexp(tokens[i-1],DETERMINANT) && 
!matchPostagRegexp(tokens[i],INFINITIU) )
        {
                replacement = relevantWords.get(token).getToken();
        }
@@ -157,13 +157,23 @@
                   && !token.equals("cantar") )
                ||(matchPostagRegexp(tokens[i-1],DETERMINANT_MP) && 
matchPostagRegexp(relevantWords.get(token),NOM_MP))
                ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FS) && 
matchPostagRegexp(relevantWords.get(token),NOM_FS)
-                  && !token.equals("venia") && !token.equals("tenia") && 
!token.equals("continua") && !token.equals("genera") )
+                  && !token.equals("venia") && !token.equals("tenia") && 
!token.equals("continua") && !token.equals("genera") && !token.equals("faria"))
                ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FP) && 
matchPostagRegexp(relevantWords.get(token),NOM_FP)) ) )
        {
                replacement = relevantWords.get(token).getToken();
        }
+       //fumaré una faria (correct: fària)
+       else if ( i>2 && matchPostagRegexp(tokens[i-2],VERB_CONJUGAT) &&
+                ((matchPostagRegexp(tokens[i-1],DETERMINANT_MS) && 
matchPostagRegexp(relevantWords.get(token),NOM_MS))
+               ||(matchPostagRegexp(tokens[i-1],DETERMINANT_MP) && 
matchPostagRegexp(relevantWords.get(token),NOM_MP))
+               ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FS) && 
matchPostagRegexp(relevantWords.get(token),NOM_FS))
+               ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FP) && 
matchPostagRegexp(relevantWords.get(token),NOM_FP)) ) )
+       {
+               replacement = relevantWords.get(token).getToken();
+       }    
        //circumstancies d'un altre caire
        else if  ( !token.equals("venia") && !token.equals("venies") && 
!token.equals("tenia") && !token.equals("tenies")
+                          && !token.equals("faria") && !token.equals("faries")
                   && !token.equals("continua") && !token.equals("continues") 
&& !token.equals("cantar")
                   && mPreposicioDE.matches() && 
!matchPostagRegexp(tokens[i-1],NOT_IN_PREV_TOKEN)
                   && (i<tokens.length-2) && 
!matchPostagRegexp(tokens[i+2],INFINITIU)
@@ -173,6 +183,7 @@
        }
        //la renuncia del president.
        else if ( !token.equals("venia") && !token.equals("venies") && 
!token.equals("tenia") && !token.equals("tenies")
+                          && !token.equals("faria") && !token.equals("faries")
                   && !token.equals("continua") && !token.equals("continues") 
&& !token.equals("cantar")
                   && !token.equals("diferencia") && 
!token.equals("diferencies") && !token.equals("distancia") && 
!token.equals("distancies")
                 &&(  ( mArticleELMS.matches() && 
matchPostagRegexp(relevantWords.get(token),NOM_MS) )
@@ -188,6 +199,7 @@
        //circumstancies extraordinàries
        else if ( !token.equals("pronuncia") && !token.equals("pronuncies") && 
!token.equals("venia") && !token.equals("venies")
                  && !token.equals("tenia") && !token.equals("tenies") && 
!token.equals("continua") && !token.equals("continues")
+                 && !token.equals("faria") && !token.equals("faries")
                  && !token.equals("genera")
                  && (i<tokens.length-1) &&
                  (
@@ -204,7 +216,7 @@
        else if (
                    (matchPostagRegexp(relevantWords.get(token),NOM_MS) && 
matchPostagRegexp(tokens[i-1],ADJECTIU_MS))
                    || (matchPostagRegexp(relevantWords.get(token),NOM_FS) && 
matchPostagRegexp(tokens[i-1],ADJECTIU_FS) 
-                               && !token.equals("venia") && 
!token.equals("tenia") && !token.equals("continua") && !token.equals("genera") )
+                               && !token.equals("venia") && 
!token.equals("tenia") && !token.equals("continua") && !token.equals("genera") 
&& !token.equals("faria"))
                    || (matchPostagRegexp(relevantWords.get(token),NOM_MP) && 
matchPostagRegexp(tokens[i-1],ADJECTIU_MP))
                    || (matchPostagRegexp(relevantWords.get(token),NOM_FP) && 
matchPostagRegexp(tokens[i-1],ADJECTIU_FP))
                  )

Modified: trunk/JLanguageTool/src/resource/ca/catalan.dict
===================================================================
(Binary files differ)

Modified: trunk/JLanguageTool/src/resource/ca/catalan_synth.dict
===================================================================
(Binary files differ)

Modified: trunk/JLanguageTool/src/resource/ca/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/ca/disambiguation.xml      2012-07-27 
11:43:33 UTC (rev 7759)
+++ trunk/JLanguageTool/src/resource/ca/disambiguation.xml      2012-07-27 
16:59:12 UTC (rev 7760)
@@ -4421,7 +4421,7 @@
         <pattern>
             <token postag="V.*" postag_regexp="yes"><exception postag="N.*" 
postag_regexp="yes"/></token>
             <marker>
-                <token postag="SPS00" postag_regexp="yes"><exception 
postag="_GN_.*" postag_regexp="yes"/></token>
+                <token postag="SPS00" postag_regexp="yes"><exception 
postag="_GN_.*|PI.*|DI.*" postag_regexp="yes"/></token>
             </marker>
         </pattern>
         <disambig action="filter" postag="SP.*"></disambig>

Modified: trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt
===================================================================
--- trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt 
2012-07-27 11:43:33 UTC (rev 7759)
+++ trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt 
2012-07-27 16:59:12 UTC (rev 7760)
@@ -329,6 +329,8 @@
 falco;falcó;NCMS000
 falsaria;falsària;NCFS000
 falsaries;falsàries;NCFP000
+faria;fària;NCFS000
+faries;fàries;NCFP000
 farratjo;farratjó;NCMS000
 feixo;feixó;NCMS000
 fenia;fenià;NCMS000

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java
       2012-07-27 11:43:33 UTC (rev 7759)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java
       2012-07-27 16:59:12 UTC (rev 7760)
@@ -43,6 +43,7 @@
   public void testRule() throws IOException {
 
     // correct sentences:
+       assertCorrect("Cap faria una cosa així.");
        assertCorrect("El cos genera suficient pressió interna.");
        assertCorrect("Les seues contràries.");
     assertCorrect("Això és una frase de prova.");
@@ -69,6 +70,8 @@
     assertCorrect("La magnífica conservació del palau.");
 
     // errors:
+    assertIncorrect("No em fumaré cap faria com feia abans.");
+    assertIncorrect("M'he fumat una faria.");
     assertIncorrect("Les seues contraries.");
     assertIncorrect("Amb renuncies i esforç.");
     assertIncorrect("La renuncia del president.");

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to