Revision: 8806
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=8806&view=rev
Author:   jaumeortola
Date:     2013-01-03 00:36:30 +0000 (Thu, 03 Jan 2013)
Log Message:
-----------
Change in "unification". Now the readings that don't match its pattern element 
are not taken into account in "unification". 

Small changes in French and Galician disambiguation files to avoid errors in 
the tests. 

Modified Paths:
--------------
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java
    
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
    
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
    
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
    
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
    
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
  2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
  2013-01-03 00:36:30 UTC (rev 8806)
@@ -195,18 +195,19 @@
           final boolean lastReading, final AnalyzedToken matchToken,
           final Element elem) {
       boolean thisMatched = matched;
+      boolean elemIsMatched= elem.isMatched(matchToken);
       if (testUnification) {
           if (matched && elem.isUnified()) {
               if (elem.isUniNegated()) {
                   thisMatched = !(thisMatched && unifier.isUnified(matchToken, 
elem.getUniFeatures(), 
-                          lastReading));
+                          lastReading,elemIsMatched));
               } else {
                   if (elem.isLastInUnification()) {
                       thisMatched = thisMatched && 
unifier.isUnified(matchToken, elem.getUniFeatures(), 
-                              lastReading);
+                              lastReading,elemIsMatched);
                   } else { //we don't care about the truth value, let it run   
                
                       unifier.isUnified(matchToken, elem.getUniFeatures(), 
-                              lastReading);
+                              lastReading, elemIsMatched);
                   }
 
               }

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java  
    2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java  
    2013-01-03 00:36:30 UTC (rev 8806)
@@ -331,15 +331,20 @@
    * 
    * @param matchToken
    *          {@link AnalyzedToken} token to unify
- * @param lastReading
+   * @param lastReading
    *          true when the matchToken is the last reading in the
    *          {@link AnalyzedTokenReadings}
+   * @param isMatched     
+   *          true if the reading matches the element in the pattern rule,
+   *          otherwise the reading is not considered in the unification    
    * @return True if the tokens in the sequence are unified.
    */
   public final boolean isUnified(final AnalyzedToken matchToken,
-      final Map<String, List<String>> uFeatures, final boolean lastReading) {
-    if (inUnification) {      
-      uniMatched |= isSatisfied(matchToken, uFeatures);
+      final Map<String, List<String>> uFeatures, final boolean lastReading, 
final boolean isMatched) {
+    if (inUnification) {
+       if (isMatched) {        
+          uniMatched |= isSatisfied(matchToken, uFeatures); 
+       }
       uniAllMatched = uniMatched;
       if (lastReading) {
         startNextToken();
@@ -348,7 +353,9 @@
       }
       return uniAllMatched;
     }
-    isSatisfied(matchToken, uFeatures);
+    if (isMatched) {   
+       isSatisfied(matchToken, uFeatures);
+    }
     if (lastReading) {
       inUnification = true;
       uniMatched = false;
@@ -356,6 +363,11 @@
     }
     return true;
   }
+  
+  public final boolean isUnified(final AnalyzedToken matchToken,
+             final Map<String, List<String>> uFeatures, final boolean 
lastReading) {
+         return this.isUnified(matchToken, uFeatures, lastReading, true); 
+  }
 
   /**
    * Used for getting a unified sequence in case when simple test method

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
      2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
      2013-01-03 00:36:30 UTC (rev 8806)
@@ -34,6 +34,7 @@
         </equivalence>
     </unification>
     <rulegroup id="PUNTUACIO" name="signes de puntuació">
+       
         <rule>
             <pattern>
                 <token regexp="yes">[.;:!?—–…()\[\]\-]</token>
@@ -3104,86 +3105,47 @@
             <disambig action="filter" postag="DI.*"></disambig>
         </rule>
     </rulegroup>
-    <rulegroup id="DAAN" name="Det + AdjNom + Adj + Nom"> 
     <!-- La característica mitja canya
         problema: la part superior esquerra -->
-       <!--  <rule>
-            <pattern>
-            <unify>
-                <feature id="nombre"/>
-                <feature id="genere"/>
-                <marker>
-                       <token postag="D[^R].*" postag_regexp="yes"/>    
-                    <token postag="A.*" postag_regexp="yes"/>
-                       <token postag="A.*" postag_regexp="yes"/>
-                       <token postag="N.*" postag_regexp="yes"/>
-                </marker>      
-            </unify>
-            </pattern>
-            <disambig action="filterall" />
-        </rule> -->
-        <rule>
-            <pattern>
-            <unify>
-                <feature id="nombre"/>
-                <token postag="D[^R].[FC].*" postag_regexp="yes"/>
-                <marker>
-                    <and>
-                    <token postag="A..[FC].*" postag_regexp="yes"/>
-                    <token postag="N.*" postag_regexp="yes"/>
-                    </and>
-                </marker>
-                <token postag="A..[FC].*" postag_regexp="yes"/>
-                <token postag="N.[FC].*" postag_regexp="yes"/>
-            </unify>
-            </pattern>
-            <disambig action="filter" postag="A..[FC].*"></disambig>
-        </rule>
-        <rule>
-            <pattern>
-            <unify>
-                <feature id="nombre"/>
-                <token postag="D[^R].[MC].*" postag_regexp="yes"/>
-                <marker>
-                    <and>
-                    <token postag="A..[MC].*" postag_regexp="yes"/>
-                    <token postag="N.*" postag_regexp="yes"/>
-                    </and>
-                </marker>
-                <token postag="A..[MC].*" postag_regexp="yes"/>
-                <token postag="N.[MC].*" postag_regexp="yes"/>
-            </unify>
-            </pattern>
-            <disambig action="filter" postag="A..[MC].*"></disambig>
-        </rule>
-        <rule>
-            <pattern>
-            <unify>
-                <feature id="nombre"/>
-                <feature id="genere"/>
-                <token postag="D[^R].*" postag_regexp="yes"/>
+    <rule id="DAAN" name="det + adj + adj + nom">
+       <pattern>
+          <unify>
+              <feature id="nombre"/>
+              <feature id="genere"/>
+              <marker>
+                       <token postag="D[^R].*" postag_regexp="yes"/>    
                 <token postag="A.*" postag_regexp="yes"/>
-                <marker>
-                    <and>
-                    <token postag="N.*" postag_regexp="yes"/>
-                    <token postag="D.*" postag_regexp="yes" 
inflected="yes">numeral</token>
-                    </and>
-                </marker>
+                       <token postag="A.*" postag_regexp="yes"/>
+                       <token postag="N.*" postag_regexp="yes"/>
+              </marker>        
+          </unify>
+          </pattern>
+          <disambig action="filterall" />
+    </rule>    
+    <rule id="DANumN" name="det + adj + numeral + nom">
+        <pattern>
+        <unify>
+        <feature id="nombre"/>
+        <feature id="genere"/>
+            <marker>
+               <token postag="D[^R].*" postag_regexp="yes"/>
+               <token postag="A.*" postag_regexp="yes"/>
+                <token postag="DN.*" postag_regexp="yes"/>
                 <token postag="N.*" postag_regexp="yes"/>
-            </unify>
-            </pattern>
-            <disambig action="filter" postag="D.*"></disambig>
-        </rule>
-    </rulegroup>
+            </marker>
+        </unify>
+        </pattern>
+        <disambig action="filterall"/>
+    </rule>
     <rulegroup id="DPAN" name="Det + Possessiu  + AdjNom + Nom"> 
     <!-- La seva mala intenció -->
        <rule>
             <pattern>
                 <marker>
-                <token postag="D[^R].[MC][SN].*" postag_regexp="yes"/>
-                <token postag="PX.[MC][SN].*" postag_regexp="yes"/>
-                <token postag="A..[MC][SN].*|V.P.*SM" postag_regexp="yes"/>
-                <token postag="N.[MC][SN].*" postag_regexp="yes"/>
+                       <token postag="D[^R].[MC][SN].*" postag_regexp="yes"/>
+                       <token postag="PX.[MC][SN].*" postag_regexp="yes"/>
+                       <token postag="A..[MC][SN].*|V.P.*SM" 
postag_regexp="yes"/>
+                       <token postag="N.[MC][SN].*" postag_regexp="yes"/>
                 </marker>
             </pattern>
             <disambig action="filterall"/>
@@ -3191,10 +3153,10 @@
         <rule>
             <pattern>
                 <marker>
-                <token postag="D[^R].[FC][SN].*" postag_regexp="yes"/>
-                <token postag="PX.[FC][SN].*" postag_regexp="yes"/>
-                <token postag="A..[FC][SN].*|V.P.*SF" postag_regexp="yes"/>
-                <token postag="N.[FC][SN].*" postag_regexp="yes"><exception 
regexp="yes">era|mira|queda</exception></token>
+                       <token postag="D[^R].[FC][SN].*" postag_regexp="yes"/>
+                       <token postag="PX.[FC][SN].*" postag_regexp="yes"/>
+                       <token postag="A..[FC][SN].*|V.P.*SF" 
postag_regexp="yes"/>
+                       <token postag="N.[FC][SN].*" 
postag_regexp="yes"><exception regexp="yes">era|mira|queda</exception></token>
                 </marker>
             </pattern>
             <disambig action="filterall"/>
@@ -3202,10 +3164,10 @@
         <rule>
             <pattern>
                 <marker>
-                <token postag="D[^R].[MC][PN].*" postag_regexp="yes"/>
-                <token postag="PX.[MC][PN].*" postag_regexp="yes"/>
-                <token postag="A..[MC][PN].*|V.P.*PM" postag_regexp="yes"/>
-                <token postag="N.[MC][PN].*" postag_regexp="yes"/>
+                       <token postag="D[^R].[MC][PN].*" postag_regexp="yes"/>
+                       <token postag="PX.[MC][PN].*" postag_regexp="yes"/>
+                       <token postag="A..[MC][PN].*|V.P.*PM" 
postag_regexp="yes"/>
+                       <token postag="N.[MC][PN].*" postag_regexp="yes"/>
                 </marker>
             </pattern>
             <disambig action="filterall"/>
@@ -3213,10 +3175,10 @@
         <rule>
             <pattern>
                 <marker>
-                <token postag="D[^R].[FC][PN].*" postag_regexp="yes"/>
-                <token postag="PX.[FC][PN].*" postag_regexp="yes"/>
-                <token postag="A..[FC][PN].*|V.P.*PF" postag_regexp="yes"/>
-                <token postag="N.[FC][PN].*" postag_regexp="yes"/>
+                       <token postag="D[^R].[FC][PN].*" postag_regexp="yes"/>
+                       <token postag="PX.[FC][PN].*" postag_regexp="yes"/>
+                       <token postag="A..[FC][PN].*|V.P.*PF" 
postag_regexp="yes"/>
+                       <token postag="N.[FC][PN].*" postag_regexp="yes"/>
                 </marker>
             </pattern>
             <disambig action="filterall"/>
@@ -5905,13 +5867,13 @@
             <token postag="N.*|A.*" postag_regexp="yes"></token>
             <marker>
                 <and>
-                    <token postag="N.*" postag_regexp="yes"></token>
-                    <token postag="SP.*" postag_regexp="yes"></token>
+                    <token postag="N.*" postag_regexp="yes"/>
+                    <token postag="SPS00"/>
                 </and>
             </marker>
         </pattern>
         <disambig action="filter" postag="[^N].*"></disambig>
-    </rule>
+    </rule> 
     <rule id="haver_de" name="haver de + infinitiu">
         <pattern>
             <marker>

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
      2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
      2013-01-03 00:36:30 UTC (rev 8806)
@@ -650,25 +650,34 @@
       <disambig postag="V avoir inf"/>
     </rule>
   </rulegroup>
-  <rule name="RP : Det + Adj ambigu + Nom" id="RP-D_J_AMBIG_N">
+  <rule name="RP : Det + Nom + Adj" id="RP-D_N_AMBIG_J">
     <pattern>
       <unify>  <feature id="number"/> <feature id="gender"/>
         <token postag="D.*" postag_regexp="yes"/>
+        <token postag="N.*" postag_regexp="yes"/>
+        <token postag="J.*|V ppa .*" postag_regexp="yes"/>
+      </unify>
+    </pattern>
+    <disambig action="filterall"/>
+  </rule>
+  <rule name="RP : Det + Adj + Nom" id="RP-D_J_AMBIG_N">
+    <pattern>
+      <unify>  <feature id="number"/> <feature id="gender"/>
+        <token postag="D.*" postag_regexp="yes"/>
         <token postag="J.*" postag_regexp="yes"/>
         <token postag="N.*" postag_regexp="yes"><exception 
regexp="yes">dans|envers|mais</exception><exception inflected="yes" 
regexp="yes">avoir|être</exception></token>
       </unify>
     </pattern>
-    <disambig action="unify"/>
+    <disambig action="filterall"/>
   </rule>
-  
   <rule name="RP : Det + Nom ambigu" id="RP-D_N_AMBIG">
     <pattern>
       <unify> <feature id="number"/> <feature id="gender"/>
         <token postag="D.*" postag_regexp="yes"/>
-        <token postag="N.*" postag_regexp="yes"><exception 
regexp="yes">a|dans|envers</exception></token>
+        <token postag="N.*|J.*" postag_regexp="yes"><exception 
regexp="yes">a|dans|envers</exception></token>
       </unify>
     </pattern>
-    <disambig action="unify"/>
+    <disambig action="filterall"/>
     <example type="untouched">Ils sont parti.</example>
     <example type="ambiguous" inputform="livre[livre/N e s,livrer/V imp pres 2 
s,livrer/V ind pres 1 s,livrer/V ind pres 3 s,livrer/V sub pres 1 s,livrer/V 
sub pres 3 s]" outputform="livre[livre/N e s]">Il a enfin publié son 
<marker>livre</marker>.</example>
   </rule>

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
      2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
      2013-01-03 00:36:30 UTC (rev 8806)
@@ -46,12 +46,11 @@
                 <feature id="number"></feature>
 
                 <token postag="D.*" postag_regexp="yes"></token>
-                <token postag="N.*" postag_regexp="yes"></token>
+                <token postag="N.*" postag_regexp="yes"><exception 
regexp="yes">gran|grande</exception></token>
                 <token postag="A.*" postag_regexp="yes"></token>
             </unify>
         </pattern>
-
-        <disambig action="unify"></disambig>
+        <disambig action="filterall"></disambig>
     </rule>
 
     <rule id="DET_SUBST" name="det|pron + subst|adx (det + subst|adx)">

Modified: 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
        2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
        2013-01-03 00:36:30 UTC (rev 8806)
@@ -43,6 +43,7 @@
        public void testRule() throws IOException { 
 
                // correct sentences:
+               assertCorrect("La part superior esquerra");
                assertCorrect("I sí, la crisi serà llarga, molt llarga, potser 
eterna.");
                assertCorrect("El rei ha trobat l'excusa i l'explicació 
adequada.");
                assertCorrect("té una manera de jugar aquestes gires tan 
femenina");

Modified: 
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
 2013-01-02 23:04:39 UTC (rev 8805)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
 2013-01-03 00:36:30 UTC (rev 8806)
@@ -91,7 +91,7 @@
      assertEquals(sent.getAnnotations(), "Disambiguator log: \n\n" +
                "RP-D_N_AMBIG: Les[le/D e p,les/R pers obj 3 p] -> Les[le/D e 
p]"+
              "\nRB-LE_LA_LES: Les[le/D e p] -> Les[le/D e p]" +
-               "\n\nRP-D_N_AMBIG: avions[avoir/V avoir ind impa 1 p,avion/N m 
p,avoir/SENT_END] -> avions[avion/N m p,avion/SENT_END]\n");
+               "\n\nRP-D_N_AMBIG: avions[avoir/V avoir ind impa 1 p,avion/N m 
p,avoir/SENT_END] -> avions[avion/N m p,avoir/SENT_END]\n");
   }
   
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnmore_122712
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to