Revision: 8568
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=8568&view=rev
Author:   jaumeortola
Date:     2012-12-17 02:21:03 +0000 (Mon, 17 Dec 2012)
Log Message:
-----------
[ca] Fixed false alarms in ReflexiveVerbsRule.java

Modified Paths:
--------------
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/ReflexiveVerbsRule.java
    
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
    trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml
    
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ReflexiveVerbsRuleTest.java

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/ReflexiveVerbsRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/ReflexiveVerbsRule.java
 2012-12-17 01:46:44 UTC (rev 8567)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/ca/ReflexiveVerbsRule.java
 2012-12-17 02:21:03 UTC (rev 8568)
@@ -41,13 +41,14 @@
    * Patterns
    */
   
-//List of only pronominal verbs from GDLC (eliminats: assolar, enfundar, 
burlar)
-  private static final Pattern VERBS_PRONOMINALS = 
Pattern.compile("abacallanar|abalançar|ablenar|aborrallonar|abotifarrar|abrinar|abromar|abstenir|acagallonar|acanyar|acarcanyar|acarnissar|acatarrar|aciutadanar|aclocar|acopar|acorriolar|adir|adonar|adormissar|afal·lerar|afarrossar|afeccionar|aferrallar|aferrissar|aferrussar|agallinar|agarbir|agarrofar|agemolir|agenollar|agotzonar|aiguabarrejar|allocar|alçurar|amatinar|amelar|amigar|amoixir|amoltonar|amotar|amullerar|amunionar|antullar|aparroquianar|aparroquiar|aperduar|apergaminar|apiadar|aponentar|apropinquar|apugonar|arguellar|arrapinyar|arrasir|arravatar|arraïmar|arrepapar|arrepetellar|arrigolar|arrodir|arrogar|arrossar|arruar|assemblar|assocarrar|atendar|atenir|atorrentar|atrafegar|atrevir|avencar|avidolar|avinençar|balbar|balcar|balir|balmar|bescomptar|boirar|boixar|botinflar|bromar|cagaferrar|candir|capbaixar|capmassar|captenir|cariar|carnificar|carpir|coalitzar|colltrencar|collvinclar|compenetrar|condoldre|condolir|congraciar|contorçar|contrapuntar|contòrcer|corcorcar|coresforçar|cornuar|corruixar|crisalidar|desafeccionar|desalenar|desamorar|desaparroquiar|desapassionar|desaplegar|desavenir|desbocar|descantar|descarar|descontrolar|descovar|desdubtar|desempallegar|desenrojolar|desentossudir|desfeinar|desmemoriar|desnodrir|despondre|despreocupar|dessolidaritzar|desteixinar|desvagar|desvergonyir|desviure|dignar|embarbussar|embascar|embessonar|embordeir|embordir|emborrascar|emborrossar|embotifarrar|embotzegar|embromallar|embromar|embroquerar|emmainadar|emmalurar|emmalurir|emmarar|emmarranar|emmatar|emmigranyar|emmorronar|emmurriar|empassar|empassolar|empegueir|empenyalar|empescar|empillocar|empinyar|empiocar|empitarrar|emplomissar|emplujar|emportar|encabotar|encabritar|encalmar|encalostrar|encelar|encinglar|encirar|encistar|enclaperar|encolerir|encordar|encruar|endoblir|endur|enfarfollar|enfaristolar|enfavar|enfereir|enferotgir|enferritjar|enfugir|enfurrunyar|enfutimar|enfutismar|engelabrir|engolfar|engorgar|engripar|enguerxinar|enllagrimar|enlleganyar|enlleir|enllustrar|ennavegar|enneguitar|enquistar|enrinxar|enseriosir|ensobecar|entonyinar|entossudir|entotsolar|entreabaltir|entrebadar|entrebatre|entrebesar|entrecavalcar|entredevorar|entreferir|entreforcar|entrematar|entremetre|entremirar|entrenyorar|entresaludar|entreseguir|entresoldar|entretocar|entretzenar|entrigar|envidreir|envidriar|envolar|enxautar|esbafar|esbafegar|esbatussar|esblamar|esbojarrar|esborneiar|esbromar|escabridar|escamotar|escanyellar|escanyolir|escanyussar|escapolar|escapolir|escarcanyar|escarramicar|escarrassar|escarxofar|escatifenyar|esconillar|escorporar|escullar|escunçar|esfarinar|esfetgegar|esforçar|esgargamellar|esgatinyar|esgolar|esguimbar|esllanguir|esllavissar|esperitar|espitellar|espitxar|espollinar|espoltrar|esporcellar|espotonar|esprimatxar|esquifir|esquitllar|estilar|estritllar|esvedellar|esventegar|esvomegar|etiolar|extralimitar|extravasar|extravenar|gamar|gaspar|gatinyar|gaubar|gloriar|grifar|immiscir|indigestar|industriar|innivar|insolentar|insurgir|intersecar|inveterar|irèixer|jactar|juramentar|lateritzar|llufar|malfiar|malfixar|migrolar|mofar|mullerar|neulir|obstinar|octubrar|olivar|pellobrir|pellpartir|pelltrencar|penedir|penjolar|pollar|prosternar|queixar|querar|querellar|quillar|ramificar|rancurar|realegrar|rebel·lar|rebordeir|refiar|repanxolar|repapar|repetellar|reressagar|resclosir|ressagar|ressentir|revenjar|salinar|suïcidar|tinyar|tolir|transvestir|traslluir|traspostar|trufar|vanagloriar|vanagloriejar|vanar|vantar|vergonyar|xautar");
+//List of only pronominal verbs from GDLC (eliminats: assolar, enfundar, 
burlar, traslluir)
+  private static final Pattern VERBS_PRONOMINALS = 
Pattern.compile("abacallanar|abalançar|ablenar|aborrallonar|abotifarrar|abrinar|abromar|abstenir|acagallonar|acanyar|acarcanyar|acarnissar|acatarrar|aciutadanar|aclocar|acopar|acorriolar|adir|adonar|adormissar|afal·lerar|afarrossar|afeccionar|aferrallar|aferrissar|aferrussar|agallinar|agarbir|agarrofar|agemolir|agenollar|agotzonar|aiguabarrejar|allocar|alçurar|amatinar|amelar|amigar|amoixir|amoltonar|amotar|amullerar|amunionar|antullar|aparroquianar|aparroquiar|aperduar|apergaminar|apiadar|aponentar|apropinquar|apugonar|arguellar|arrapinyar|arrasir|arravatar|arraïmar|arrepapar|arrepetellar|arrigolar|arrodir|arrogar|arrossar|arruar|assemblar|assocarrar|atendar|atenir|atorrentar|atrafegar|atrevir|avencar|avidolar|avinençar|balbar|balcar|balir|balmar|bescomptar|boirar|boixar|botinflar|bromar|cagaferrar|candir|capbaixar|capmassar|captenir|cariar|carnificar|carpir|coalitzar|colltrencar|collvinclar|compenetrar|condoldre|condolir|congraciar|contorçar|contrapuntar|contòrcer|corcorcar|coresforçar|cornuar|corruixar|crisalidar|desafeccionar|desalenar|desamorar|desaparroquiar|desapassionar|desaplegar|desavenir|desbocar|descantar|descarar|descontrolar|descovar|desdubtar|desempallegar|desenrojolar|desentossudir|desfeinar|desmemoriar|desnodrir|despondre|despreocupar|dessolidaritzar|desteixinar|desvagar|desvergonyir|desviure|dignar|embarbussar|embascar|embessonar|embordeir|embordir|emborrascar|emborrossar|embotifarrar|embotzegar|embromallar|embromar|embroquerar|emmainadar|emmalurar|emmalurir|emmarar|emmarranar|emmatar|emmigranyar|emmorronar|emmurriar|empassar|empassolar|empegueir|empenyalar|empescar|empillocar|empinyar|empiocar|empitarrar|emplomissar|emplujar|emportar|encabotar|encabritar|encalmar|encalostrar|encelar|encinglar|encirar|encistar|enclaperar|encolerir|encordar|encruar|endoblir|endur|enfarfollar|enfaristolar|enfavar|enfereir|enferotgir|enferritjar|enfugir|enfurrunyar|enfutimar|enfutismar|engelabrir|engolfar|engorgar|engripar|enguerxinar|enllagrimar|enlleganyar|enlleir|ennavegar|enneguitar|enquistar|enrinxar|enseriosir|ensobecar|entonyinar|entossudir|entotsolar|entreabaltir|entrebadar|entrebatre|entrebesar|entrecavalcar|entredevorar|entreferir|entreforcar|entrematar|entremetre|entremirar|entrenyorar|entresaludar|entreseguir|entresoldar|entretocar|entretzenar|entrigar|envidreir|envidriar|envolar|enxautar|esbafar|esbafegar|esbatussar|esblamar|esbojarrar|esborneiar|esbromar|escabridar|escamotar|escanyellar|escanyolir|escanyussar|escapolar|escapolir|escarcanyar|escarramicar|escarrassar|escarxofar|escatifenyar|esconillar|escorporar|escullar|escunçar|esfarinar|esfetgegar|esforçar|esgargamellar|esgatinyar|esgolar|esguimbar|esllanguir|esllavissar|esperitar|espitellar|espitxar|espollinar|espoltrar|esporcellar|espotonar|esprimatxar|esquifir|esquitllar|estilar|estritllar|esvedellar|esventegar|esvomegar|etiolar|extralimitar|extravasar|extravenar|gamar|gaspar|gatinyar|gaubar|gloriar|grifar|immiscir|indigestar|industriar|innivar|insolentar|insurgir|intersecar|inveterar|irèixer|jactar|juramentar|lateritzar|llufar|malfiar|malfixar|migrolar|mofar|mullerar|neulir|obstinar|octubrar|olivar|pellobrir|pellpartir|pelltrencar|penedir|penjolar|pollar|prosternar|queixar|querar|querellar|quillar|ramificar|rancurar|realegrar|rebel·lar|rebordeir|refiar|repanxolar|repapar|repetellar|reressagar|resclosir|ressagar|ressentir|revenjar|salinar|suïcidar|tinyar|tolir|transvestir|traspostar|trufar|vanagloriar|vanagloriejar|vanar|vantar|vergonyar|xautar");
   private static final Pattern NO_VERBS_PRONOMINALS = 
Pattern.compile("atendre|escollir|assolir");
   private static final Pattern VERBS_NO_PRONOMINALS = 
Pattern.compile("caure|callar|témer|marxar|albergar|olorar|seure");
-  private static final Pattern VERBS_NO_PRONOMINALS_IMPERSONALS = 
Pattern.compile("caure|callar|témer|marxar");
+  private static final Pattern VERBS_NO_PRONOMINALS_IMPERSONALS = 
Pattern.compile("caure|callar|marxar");
+  private static final Pattern VERBS_NO_PRONOMINALS_IMPERSONALS2 = 
Pattern.compile("témer|albergar");
   private static final Pattern NO_VERBS_NO_PRONOMINALS = 
Pattern.compile("segar");
-  private static final Pattern VERBS_MOVIMENT = 
Pattern.compile("anar|pujar|baixar");
+  private static final Pattern VERBS_MOVIMENT = 
Pattern.compile("anar|pujar|baixar|venir|entrar");
   private static final Pattern VERBS_SOVINT_PRONOMINALS = 
Pattern.compile("deixar|fer|veure");
   private static final Pattern VERBS_DEIXAR_FER = 
Pattern.compile("deixar|fer");
   private static final Pattern VERBS_PORTAR_DUR = 
Pattern.compile("portar|dur");
@@ -69,9 +70,10 @@
   private static final Pattern VERB_INFGER = Pattern.compile("V.[NG].*");
   private static final Pattern VERB_GERUNDI = Pattern.compile("V.G.*");
   private static final Pattern VERB_PARTICIPI = Pattern.compile("V.P.*");
-//  private static final Pattern PREPOSICIO = Pattern.compile("SPS00");
+  private static final Pattern PREPOSICIO = Pattern.compile("SPS00");
   private static final Pattern VERB_AUXILIAR = Pattern.compile("VA.*");
-  private static final Pattern PREP_VERB_PRONOM = 
Pattern.compile("SPS00|V.*|P0.{6}|PP3CN000|PP3NN000|PP3..A00|PP3CP000|PP3CSD00");
+  private static final Pattern PREP_VERB_PRONOM = 
Pattern.compile("_PUNCT_CONT|SPS00|V.*|P0.{6}|PP3CN000|PP3NN000|PP3..A00|PP3CP000|PP3CSD00");
+  private static final Pattern VERB_PRONOM = 
Pattern.compile("V.*|P0.{6}|PP3CN000|PP3NN000|PP3..A00|PP3CP000|PP3CSD00");
   //cal restringir les preposicions  
   
   private static final Pattern VERB_1S = Pattern.compile("V...1S.");
@@ -103,6 +105,9 @@
   private static final Pattern POSTAG_PRONOM_CI = 
Pattern.compile("P0.*|PP3CP000|PP3CSD00");
   private static final Pattern LEMMA_PRONOM_CD = Pattern.compile("jo|tu|ell");
   private static final Pattern POSTAG_PRONOM_CD = 
Pattern.compile("P0.*|PP3CP000|PP3..A00");
+  private static final Pattern POSTAG_CD = 
Pattern.compile("_GN_.*|N.*|DI.*|P[DI].*");
+  private static final Pattern LEMMA_DE = Pattern.compile("de");
+  private static final Pattern POSTAG_DE = Pattern.compile("SPS00");
   
   private static final Pattern POSTAG_ADVERBI = 
Pattern.compile("RG.*|.*LOC_ADV.*");
   private static final Pattern ANYMESDIA = Pattern.compile("any|mes|dia");
@@ -160,28 +165,36 @@
                        // Comprova: portar-se/emportar-se
                        if (i+2<tokens.length
                                        && matchLemmaRegexp(tokens[i], 
VERBS_PORTAR_DUR)
+                                       && !hasVerbMultipleReadings(tokens[i]) 
//em duràs un mocador
                                        && isThereReflexivePronoun(tokens, i) 
// ens portem, ens hem de portar
+                                       && 
isThereAfterWithoutPreposition(tokens, i, POSTAG_CD)
                                        && 
!isThereVerbBefore(tokens,i,VERBS_DEIXAR_FER) // es deixen portar
+                                       && 
!(isThereVerbBefore(tokens,i,VERBS_POTENCIALMENT_PRONOMINALS)&&!isThereVerbBefore(tokens,i,NO_VERBS_POTENCIALMENT_PRONOMINALS))
                                        && !matchPostagRegexp(tokens[i+1], 
POSTAG_ADVERBI) // es porten bé
                                        && !matchPostagRegexp(tokens[i+2], 
POSTAG_ADVERBI) // hem de portar-nos bé
                                        && !matchLemmaRegexp(tokens[i+2], 
ANYMESDIA) // ens portem tres anys
+                                       && !isPhraseImpersonalVerbSP(tokens, i) 
// Es va portar l'any passat
                                        ) {
                                // the rule matches
-                               String msg;
-                               if (matchLemmaRegexp(tokens[i], VERB_PORTAR)) 
{msg= "Cal escriure: <suggestion>em"+token+"</suggestion>."; }
-                                       else {msg= "Cal escriure: 
<suggestion>en"+token+"</suggestion>.";}
+                               String suggestion;
+                               if (matchLemmaRegexp(tokens[i], VERB_PORTAR)) 
{suggestion= "em"+token; }
+                                       else if (token.equalsIgnoreCase("du")) 
{suggestion="endú"; }
+                                       else {suggestion= "en"+token; }
+                               final String msg="¿Volíeu dir 
<suggestion>"+suggestion+"</suggestion>?";
                                final RuleMatch ruleMatch = new RuleMatch(this,
                                                tokens[i].getStartPos(), 
tokens[i].getStartPos()
-                                                               + 
token.length(), msg, "Ús incorrecte");
-                               ruleMatches.add(ruleMatch);                     
        
+                                                               + 
token.length(), msg, "Possible error");
+                               ruleMatches.add(ruleMatch);             
+                               continue loop;
                        }
                        
                        //PERÍFRASI AMB VERB PRONOMINAL: el fan 
*agenollar-se/agenollar
                        if (i+1<tokens.length 
                                        && matchPostagRegexp(tokens[i], 
VERB_INFGER)
+                                       && !matchPostagRegexp(tokens[i - 1], 
PREPOSICIO) 
                                        && 
isThereVerbBefore(tokens,i,VERBS_DEIXAR_FER)
-                                       && isTherePronounBefore(tokens, i, 
LEMMA_PRONOM_CD, POSTAG_PRONOM_CD)  
-                                       && matchPostagRegexp(tokens[i+1], 
PRONOM_REFLEXIU) ) {
+                                       && isThereBefore(tokens, i, 
LEMMA_PRONOM_CD, POSTAG_PRONOM_CD)  
+                                       && matchRegexp(tokens[i + 
1].getToken(), REFLEXIU_POSPOSAT) ) {
                                        // the rule matches
                                        final String msg = "En aquesta 
perífrasi verbal el pronom reflexiu posterior és 
redundant.<suggestion></suggestion>";
                                        final RuleMatch ruleMatch = new 
RuleMatch(this,
@@ -199,7 +212,7 @@
                                if (matchPostagRegexp(tokens[i], 
VERB_PARTICIPI) && !matchLemmaRegexp(tokens[i - 1], VERB_HAVER)) 
                                        continue loop;
                                if 
(isThereVerbBefore(tokens,i,VERBS_DEIXAR_FER)  // el fa agenollar
-                                               && isTherePronounBefore(tokens, 
i, LEMMA_PRONOM_CD, POSTAG_PRONOM_CD) )
+                                               && isThereBefore(tokens, i, 
LEMMA_PRONOM_CD, POSTAG_PRONOM_CD) )
                                        continue loop;
                                if (isThereReflexivePronoun(tokens, i)) 
                                        continue loop;
@@ -215,7 +228,7 @@
                        
 //                     //FRASE IMPERSONAL
 //                     // És frase impersonal si hi ha el pronom 'es', llevat 
que es pugui identificar un subjecte "personal"
-//                     if (isTherePronounBefore(tokens, i, LEMMA_ES, POSTAG_ES)
+//                     if (isThereBefore(tokens, i, LEMMA_ES, POSTAG_ES)
 //                                     && 
!isThereBefore(tokens,i,SUBJECTE_PERSONAL_TOKEN,SUBJECTE_PERSONAL_POSTAG,SUBJECTE_PERSONAL_NO_POSTAG)
 //                                     && 
isVerbNumberPerson(tokens,i,VERB_3S))  
 //                             continue loop;
@@ -234,11 +247,12 @@
                                //FRASE IMPERSONAL
                                // És frase impersonal si hi ha el pronom 'es', 
llevat que es pugui identificar un subjecte "personal"
                                if 
(matchLemmaRegexp(tokens[i],VERBS_NO_PRONOMINALS_IMPERSONALS)
-                                               && isTherePronounBefore(tokens, 
i, LEMMA_ES, POSTAG_ES)
-                                               && 
!isTherePronounBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
-                                               && 
!isTherePersonalSubjectBefore(tokens,i,TRENCA_COMPTE2)
-                                               && 
isVerbNumberPerson(tokens,i,VERB_3S))  
+                                               && 
isPhraseImpersonalVerbS(tokens, i) )  
                                        continue loop;
+                               if 
(matchLemmaRegexp(tokens[i],VERBS_NO_PRONOMINALS_IMPERSONALS2)
+                                               && 
isPhraseImpersonalVerbSP(tokens, i) )  
+                                       continue loop;
+                               
                                // the rule matches
                                final String msg = "Aquest verb no és 
pronominal. Sobra un pronom.";
                                final RuleMatch ruleMatch = new RuleMatch(this,
@@ -247,8 +261,14 @@
                                                "Verb no pronominal: sobra un 
pronom");
                                ruleMatches.add(ruleMatch);
                        }
+                       
                        //VERBS DE MOVIMENT: si hi ha pronom reflexiu cal el 
pronom 'en'.
                        if (matchLemmaRegexp(tokens[i], VERBS_MOVIMENT) && 
!matchPostagRegexp(tokens[i], VERB_AUXILIAR)) {
+                               //impersonal obligació: s'ha de baixar
+                               if (isThereBefore(tokens, i, LEMMA_ES, 
POSTAG_ES)
+                                               && isThereBefore(tokens, i, 
LEMMA_DE, POSTAG_DE)
+                                               && 
isThereVerbBefore(tokens,i,VERB_HAVER) )
+                                               continue loop;
                                if 
(isThereVerbBefore(tokens,i,VERBS_SOVINT_PRONOMINALS) 
                                                || 
(isThereVerbBefore(tokens,i,VERBS_POTENCIALMENT_PRONOMINALS)&&!isThereVerbBefore(tokens,i,NO_VERBS_POTENCIALMENT_PRONOMINALS))
                                                || 
isThereVerbBefore(tokens,i,VERBS_PRONOMINALS)) //et deixes anar/pujar
@@ -261,16 +281,16 @@
                                                        
||isThereVerbAfter(tokens,i,VERBS_PRONOMINALS))
                                                continue loop;
                                        //FRASE IMPERSONAL
-                                       if (isTherePronounBefore(tokens, i, 
LEMMA_ES, POSTAG_ES)
-                                                       && 
!isTherePronounBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
-                                                       && 
(!isTherePersonalSubjectBefore(tokens,i,TRENCA_COMPTE) || 
isTherePronounBefore(tokens, i, LEMMA_HI, POSTAG_HI)) 
+                                       if (isThereBefore(tokens, i, LEMMA_ES, 
POSTAG_ES)
+                                                       && 
!isThereBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
+                                                       && 
(!isTherePersonalSubjectBefore(tokens,i,TRENCA_COMPTE) || isThereBefore(tokens, 
i, LEMMA_HI, POSTAG_HI)) 
                                                        && 
isVerbNumberPerson(tokens,i,VERB_3S))  
                                                continue loop;
                                }
                                else {
                                        // FRASE IMPERSONAL
-                                       if (isTherePronounBefore(tokens, i, 
LEMMA_ES, POSTAG_ES)
-                                                       && 
!isTherePronounBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
+                                       if (isThereBefore(tokens, i, LEMMA_ES, 
POSTAG_ES)
+                                                       && 
!isThereBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
                                                        && 
!isTherePersonalSubjectBefore(tokens, i,     TRENCA_COMPTE))
                                                continue loop;
                                }
@@ -282,9 +302,8 @@
                                                                        + 
token.length(), msg,
                                                        "Falta el pronom 'en'");
                                        ruleMatches.add(ruleMatch);
-                               
+                               }
                        }
-                       }
                }
                return toRuleMatchArray(ruleMatches);
        }
@@ -314,6 +333,14 @@
   }
   
   /**
+   * El verb té múltiples lectures
+   */ 
+  private boolean hasVerbMultipleReadings (AnalyzedTokenReadings aToken) {
+       return (matchPostagRegexp(aToken,VERB_1S) && 
matchPostagRegexp(aToken,VERB_3S))
+                       || (matchPostagRegexp(aToken,VERB_2S) && 
matchPostagRegexp(aToken,VERB_3S));
+  }
+  
+  /**
    * Match POS tag with regular expression
    */
   private boolean matchPostagRegexp(AnalyzedTokenReadings aToken, Pattern 
pattern) {
@@ -491,41 +518,41 @@
         * @param i
         * @return
         */
-       private boolean isTherePronoun(
-                       final AnalyzedTokenReadings[] tokens, int i, Pattern 
lemma, Pattern postag) {
+       private boolean isTherePronoun(final AnalyzedTokenReadings[] tokens, 
int i,
+                       Pattern lemma, Pattern postag) {
                int j = 1;
                boolean keepCounting = true;
-               while (i-j>0 && keepCounting) {
-                       if (matchPostagRegexp(tokens[i-j], postag) && 
matchLemmaRegexp(tokens[i-j], lemma))
+               while (i - j > 0 && keepCounting) {
+                       if (matchPostagRegexp(tokens[i - j], postag)
+                                       && matchLemmaRegexp(tokens[i - j], 
lemma))
                                return true;
-                       keepCounting = matchPostagRegexp(tokens[i - j],
-                                       PREP_VERB_PRONOM);
+                       keepCounting = matchPostagRegexp(tokens[i - j], 
PREP_VERB_PRONOM);
                        j++;
                }
                j = 1;
                keepCounting = true;
-               while (i+j<tokens.length && keepCounting) {
-                       if (matchPostagRegexp(tokens[i+j], postag) && 
matchLemmaRegexp(tokens[i+j], lemma))
+               while (i + j < tokens.length && keepCounting) {
+                       if (matchPostagRegexp(tokens[i + j], postag)
+                                       && matchLemmaRegexp(tokens[i + j], 
lemma))
                                return true;
-                       keepCounting = matchPostagRegexp(tokens[i+j],
-                                       PREP_VERB_PRONOM);
+                       keepCounting = matchPostagRegexp(tokens[i + j], 
PREP_VERB_PRONOM);
                        j++;
-               }               
-       return false;
+               }
+               return false;
        }
-       
-       private boolean isTherePronounBefore(
-                       final AnalyzedTokenReadings[] tokens, int i, Pattern 
lemma, Pattern postag) {
+
+       private boolean isThereBefore(final AnalyzedTokenReadings[] tokens,
+                       int i, Pattern lemma, Pattern postag) {
                int j = 1;
                boolean keepCounting = true;
-               while (i-j>0 && keepCounting) {
-                       if (matchPostagRegexp(tokens[i-j], postag) && 
matchLemmaRegexp(tokens[i-j], lemma))
+               while (i - j > 0 && keepCounting) {
+                       if (matchPostagRegexp(tokens[i - j], postag)
+                                       && matchLemmaRegexp(tokens[i - j], 
lemma))
                                return true;
-                       keepCounting = matchPostagRegexp(tokens[i - j],
-                                       PREP_VERB_PRONOM);
+                       keepCounting = matchPostagRegexp(tokens[i - j], 
PREP_VERB_PRONOM);
                        j++;
-               }               
-       return false;
+               }
+               return false;
        }
 
        private boolean isThereAfter(final AnalyzedTokenReadings[] tokens, int 
i, Pattern postag) {
@@ -541,6 +568,19 @@
                return false;
        }
        
+       private boolean isThereAfterWithoutPreposition(final 
AnalyzedTokenReadings[] tokens, int i, Pattern postag) {
+               int j = 1;
+               boolean keepCounting = true;
+               while (i+j<tokens.length && keepCounting) {
+                       if (matchPostagRegexp(tokens[i+j], postag))
+                               return true;
+                       keepCounting = matchPostagRegexp(tokens[i+j],
+                                       VERB_PRONOM);
+                       j++;
+               }
+               return false;
+       }
+       
        private boolean isThereVerbBefore(final AnalyzedTokenReadings[] tokens, 
int i, Pattern lemma) {
                int j = 1;
                boolean keepCounting = true;
@@ -604,6 +644,24 @@
                return false;
        }
        
+       
+       private boolean isPhraseImpersonalVerbS (final AnalyzedTokenReadings[] 
tokens, int i) {
+               //FRASE IMPERSONAL
+               // És frase impersonal si hi ha el pronom 'es', llevat que es 
pugui identificar un subjecte "personal".
+               return isThereBefore(tokens, i, LEMMA_ES, POSTAG_ES)
+               && !isThereBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
+               && (!isTherePersonalSubjectBefore(tokens,i,TRENCA_COMPTE2) || 
isThereBefore(tokens, i, LEMMA_HI, POSTAG_HI))
+               && isVerbNumberPerson(tokens,i,VERB_3S);                
+       }
+       private boolean isPhraseImpersonalVerbSP (final AnalyzedTokenReadings[] 
tokens, int i) {
+               //FRASE IMPERSONAL
+               // És frase impersonal si hi ha el pronom 'es', llevat que es 
pugui identificar un subjecte "personal".
+               return isThereBefore(tokens, i, LEMMA_ES, POSTAG_ES)
+               && !isThereBefore(tokens, i, LEMMA_PRONOM_CI, POSTAG_PRONOM_CI)
+               && (!isTherePersonalSubjectBefore(tokens,i,TRENCA_COMPTE) || 
isThereBefore(tokens, i, LEMMA_HI, POSTAG_HI))
+               && 
(isVerbNumberPerson(tokens,i,VERB_3S)||isVerbNumberPerson(tokens,i,VERB_3P));   
             
+       }
+       
        @Override
        public void reset() {
                // nothing

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
      2012-12-17 01:46:44 UTC (rev 8567)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
      2012-12-17 02:21:03 UTC (rev 8568)
@@ -6995,6 +6995,21 @@
         </pattern>
         <disambig action="filter" postag="[^P].*"></disambig>
     </rule>
+    <rule id="el_no_article" name="el no article"> <!-- el feu cantar -->
+        <pattern>
+            <marker>
+            <and>
+                <token postag="DA.*" postag_regexp="yes"/>
+                <token postag="_GV_"/>
+            </and>
+            </marker>
+            <and>
+                <token postag="V.[SI].*" postag_regexp="yes"/>
+                <token postag="_GV_"><exception postag="_GN_.*|N.*" 
postag_regexp="yes"/></token>
+            </and>
+        </pattern>
+        <disambig action="filter" postag="[^D].*"></disambig>
+    </rule>
     <rule id="GRANS" name="grans de cereals...">
         <pattern>
             <marker>

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml    
    2012-12-17 01:46:44 UTC (rev 8567)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml    
    2012-12-17 02:21:03 UTC (rev 8568)
@@ -6071,6 +6071,7 @@
     </category>
     <category name="Z) Accents diacrítics">
         <rulegroup id="FEU" name="feu/féu">
+            <!-- Cal estudiar "que el feu". Mirar que no hi hagi cap verb ni 
pronom de 2P abans. "que el feu famós"... -->
             <rule>
                 <pattern>
                     <token postag="P0300000|PP3.S000|_GN_.S|N..S.*" 
postag_regexp="yes"><exception postag="D.0MS0|PX3MS0C0|DP3CSP" 
postag_regexp="yes"/></token>
@@ -12778,19 +12779,34 @@
             <example type="correct">Una vegada aconseguit l'objectiu</example>
             <example type="correct">Aconseguit l'objectiu</example>
         </rule> 
-        <rule id="VISTIPLAU" name="vistiplau">
-            <pattern>
-                <token postag="D.0MS0" postag_regexp="yes"/>
-                <marker>
-                    <token>vist</token>
-                    <token>i</token>
-                    <token>plau</token>
-                </marker>
-            </pattern>
-            <message>¿Volíeu dir <suggestion>vistiplau</suggestion>?</message>
-            <example type="incorrect">El <marker>vist i plau</marker></example>
-            <example type="correct">el vistiplau</example>
-        </rule>
+        <rulegroup id="VISTIPLAU" name="vistiplau">
+            <rule>
+                <pattern>
+                    <token postag="D.0MS0" postag_regexp="yes"/>
+                    <marker>
+                        <token>vist</token>
+                        <token>i</token>
+                        <token>plau</token>
+                    </marker>
+                </pattern>
+                <message>¿Volíeu dir 
<suggestion>vistiplau</suggestion>?</message>
+                <example type="incorrect">un <marker>vist i 
plau</marker></example>
+                <example type="correct">un vistiplau</example>
+            </rule>
+            <rule>
+                <pattern>
+                    <token regexp="yes">el|l</token>
+                    <marker>
+                        <token>vist</token>
+                        <token>i</token>
+                        <token>plau</token>
+                    </marker>
+                </pattern>
+                <message>¿Volíeu dir 
<suggestion>vistiplau</suggestion>?</message>
+                <example type="incorrect">el <marker>vist i 
plau</marker></example>
+                <example type="correct">el vistiplau</example>
+            </rule>
+        </rulegroup>    
         <rulegroup id="A_FALTA_DE" name="* a falta de cinc mintus">
             <rule>
                 <pattern>

Modified: 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ReflexiveVerbsRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ReflexiveVerbsRuleTest.java
     2012-12-17 01:46:44 UTC (rev 8567)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ReflexiveVerbsRuleTest.java
     2012-12-17 02:21:03 UTC (rev 8568)
@@ -42,7 +42,27 @@
 
        public void testRule() throws IOException { 
 
+               //TODO: se'n vola / s'envola
                // correct sentences:
+               //assertCorrect("el dia de Rams es commemora anant a l'església 
a beneir el palmó");
+               //assertCorrect("La Nit de sant Joan es baixaven falles de la 
muntanya."); solucions: marcar "la nit..." com a CC o comprovar la concordança 
subj/verb
+               assertCorrect("que s'havien anat instal·lant");
+               assertCorrect("gràcies a la presència del Riu Set s'hi alberga 
una gran arboreda amb taules");
+               assertCorrect("no fa gaires anys també s'hi portaven alguns 
animals");
+               assertCorrect("el sòlid es va \"descomponent\".");
+               assertCorrect("la divisió s'ha d'anar amb cura per evitar 
ambigüitats");
+               assertCorrect("la senyera s'ha de baixar");
+               assertCorrect("Es van témer assalts a altres edificis de la CNT 
");
+               assertCorrect("que Joan em dugués el mocador");
+               assertCorrect("que Joan es dugués el mocador"); // dubtós
+               assertCorrect("em duràs un mocador de seda del teu color");
+               assertCorrect("El va deixar per a dedicar-se a la música");
+               assertCorrect("Hermes s'encarregava de dur les ànimes que 
acabaven de morir a l'Inframón");
+               assertCorrect("aquest nom és poc adequat ja que es poden portar 
les propostes de l'escalada clàssica");
+               //assertCorrect("totes les comissions dels països vencedors en 
les guerres napoleòniques es van portar els seus propis cuiners");
+               assertCorrect("en fer-lo girar se'n podia observar el 
moviment");
+               assertCorrect("el segon dia es duien a terme les carreres 
individuals");
+               assertCorrect("Normalment no es duu un registre oficial 
extern");
                assertCorrect("Ens portem força bé");
                assertCorrect("Hem de portar-nos bé");
                assertCorrect("Ells es porten tres anys");
@@ -148,6 +168,8 @@
                assertCorrect("Joan no es va a jugar la feina.");       
                        
                // errors:
+               assertIncorrect("Ells es volen dur les ànimes a l'Inframón");
+               assertIncorrect("Joan es va portar el carretó");
                assertIncorrect("en aquesta vida ens portem moltes sorpreses");
                assertIncorrect("Ens hem portat massa material al campament");
                assertIncorrect("Hem de dur-nos tot això.");

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to