Revision: 8806
http://languagetool.svn.sourceforge.net/languagetool/?rev=8806&view=rev
Author: jaumeortola
Date: 2013-01-03 00:36:30 +0000 (Thu, 03 Jan 2013)
Log Message:
-----------
Change in "unification". Now the readings that don't match its pattern element
are not taken into account in "unification".
Small changes in French and Galician disambiguation files to avoid errors in
the tests.
Modified Paths:
--------------
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/AbstractPatternRule.java
2013-01-03 00:36:30 UTC (rev 8806)
@@ -195,18 +195,19 @@
final boolean lastReading, final AnalyzedToken matchToken,
final Element elem) {
boolean thisMatched = matched;
+ boolean elemIsMatched= elem.isMatched(matchToken);
if (testUnification) {
if (matched && elem.isUnified()) {
if (elem.isUniNegated()) {
thisMatched = !(thisMatched && unifier.isUnified(matchToken,
elem.getUniFeatures(),
- lastReading));
+ lastReading,elemIsMatched));
} else {
if (elem.isLastInUnification()) {
thisMatched = thisMatched &&
unifier.isUnified(matchToken, elem.getUniFeatures(),
- lastReading);
+ lastReading,elemIsMatched);
} else { //we don't care about the truth value, let it run
unifier.isUnified(matchToken, elem.getUniFeatures(),
- lastReading);
+ lastReading, elemIsMatched);
}
}
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/Unifier.java
2013-01-03 00:36:30 UTC (rev 8806)
@@ -331,15 +331,20 @@
*
* @param matchToken
* {@link AnalyzedToken} token to unify
- * @param lastReading
+ * @param lastReading
* true when the matchToken is the last reading in the
* {@link AnalyzedTokenReadings}
+ * @param isMatched
+ * true if the reading matches the element in the pattern rule,
+ * otherwise the reading is not considered in the unification
* @return True if the tokens in the sequence are unified.
*/
public final boolean isUnified(final AnalyzedToken matchToken,
- final Map<String, List<String>> uFeatures, final boolean lastReading) {
- if (inUnification) {
- uniMatched |= isSatisfied(matchToken, uFeatures);
+ final Map<String, List<String>> uFeatures, final boolean lastReading,
final boolean isMatched) {
+ if (inUnification) {
+ if (isMatched) {
+ uniMatched |= isSatisfied(matchToken, uFeatures);
+ }
uniAllMatched = uniMatched;
if (lastReading) {
startNextToken();
@@ -348,7 +353,9 @@
}
return uniAllMatched;
}
- isSatisfied(matchToken, uFeatures);
+ if (isMatched) {
+ isSatisfied(matchToken, uFeatures);
+ }
if (lastReading) {
inUnification = true;
uniMatched = false;
@@ -356,6 +363,11 @@
}
return true;
}
+
+ public final boolean isUnified(final AnalyzedToken matchToken,
+ final Map<String, List<String>> uFeatures, final boolean
lastReading) {
+ return this.isUnified(matchToken, uFeatures, lastReading, true);
+ }
/**
* Used for getting a unified sequence in case when simple test method
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/ca/disambiguation.xml
2013-01-03 00:36:30 UTC (rev 8806)
@@ -34,6 +34,7 @@
</equivalence>
</unification>
<rulegroup id="PUNTUACIO" name="signes de puntuació">
+
<rule>
<pattern>
<token regexp="yes">[.;:!?—–…()\[\]\-]</token>
@@ -3104,86 +3105,47 @@
<disambig action="filter" postag="DI.*"></disambig>
</rule>
</rulegroup>
- <rulegroup id="DAAN" name="Det + AdjNom + Adj + Nom">
<!-- La característica mitja canya
problema: la part superior esquerra -->
- <!-- <rule>
- <pattern>
- <unify>
- <feature id="nombre"/>
- <feature id="genere"/>
- <marker>
- <token postag="D[^R].*" postag_regexp="yes"/>
- <token postag="A.*" postag_regexp="yes"/>
- <token postag="A.*" postag_regexp="yes"/>
- <token postag="N.*" postag_regexp="yes"/>
- </marker>
- </unify>
- </pattern>
- <disambig action="filterall" />
- </rule> -->
- <rule>
- <pattern>
- <unify>
- <feature id="nombre"/>
- <token postag="D[^R].[FC].*" postag_regexp="yes"/>
- <marker>
- <and>
- <token postag="A..[FC].*" postag_regexp="yes"/>
- <token postag="N.*" postag_regexp="yes"/>
- </and>
- </marker>
- <token postag="A..[FC].*" postag_regexp="yes"/>
- <token postag="N.[FC].*" postag_regexp="yes"/>
- </unify>
- </pattern>
- <disambig action="filter" postag="A..[FC].*"></disambig>
- </rule>
- <rule>
- <pattern>
- <unify>
- <feature id="nombre"/>
- <token postag="D[^R].[MC].*" postag_regexp="yes"/>
- <marker>
- <and>
- <token postag="A..[MC].*" postag_regexp="yes"/>
- <token postag="N.*" postag_regexp="yes"/>
- </and>
- </marker>
- <token postag="A..[MC].*" postag_regexp="yes"/>
- <token postag="N.[MC].*" postag_regexp="yes"/>
- </unify>
- </pattern>
- <disambig action="filter" postag="A..[MC].*"></disambig>
- </rule>
- <rule>
- <pattern>
- <unify>
- <feature id="nombre"/>
- <feature id="genere"/>
- <token postag="D[^R].*" postag_regexp="yes"/>
+ <rule id="DAAN" name="det + adj + adj + nom">
+ <pattern>
+ <unify>
+ <feature id="nombre"/>
+ <feature id="genere"/>
+ <marker>
+ <token postag="D[^R].*" postag_regexp="yes"/>
<token postag="A.*" postag_regexp="yes"/>
- <marker>
- <and>
- <token postag="N.*" postag_regexp="yes"/>
- <token postag="D.*" postag_regexp="yes"
inflected="yes">numeral</token>
- </and>
- </marker>
+ <token postag="A.*" postag_regexp="yes"/>
+ <token postag="N.*" postag_regexp="yes"/>
+ </marker>
+ </unify>
+ </pattern>
+ <disambig action="filterall" />
+ </rule>
+ <rule id="DANumN" name="det + adj + numeral + nom">
+ <pattern>
+ <unify>
+ <feature id="nombre"/>
+ <feature id="genere"/>
+ <marker>
+ <token postag="D[^R].*" postag_regexp="yes"/>
+ <token postag="A.*" postag_regexp="yes"/>
+ <token postag="DN.*" postag_regexp="yes"/>
<token postag="N.*" postag_regexp="yes"/>
- </unify>
- </pattern>
- <disambig action="filter" postag="D.*"></disambig>
- </rule>
- </rulegroup>
+ </marker>
+ </unify>
+ </pattern>
+ <disambig action="filterall"/>
+ </rule>
<rulegroup id="DPAN" name="Det + Possessiu + AdjNom + Nom">
<!-- La seva mala intenció -->
<rule>
<pattern>
<marker>
- <token postag="D[^R].[MC][SN].*" postag_regexp="yes"/>
- <token postag="PX.[MC][SN].*" postag_regexp="yes"/>
- <token postag="A..[MC][SN].*|V.P.*SM" postag_regexp="yes"/>
- <token postag="N.[MC][SN].*" postag_regexp="yes"/>
+ <token postag="D[^R].[MC][SN].*" postag_regexp="yes"/>
+ <token postag="PX.[MC][SN].*" postag_regexp="yes"/>
+ <token postag="A..[MC][SN].*|V.P.*SM"
postag_regexp="yes"/>
+ <token postag="N.[MC][SN].*" postag_regexp="yes"/>
</marker>
</pattern>
<disambig action="filterall"/>
@@ -3191,10 +3153,10 @@
<rule>
<pattern>
<marker>
- <token postag="D[^R].[FC][SN].*" postag_regexp="yes"/>
- <token postag="PX.[FC][SN].*" postag_regexp="yes"/>
- <token postag="A..[FC][SN].*|V.P.*SF" postag_regexp="yes"/>
- <token postag="N.[FC][SN].*" postag_regexp="yes"><exception
regexp="yes">era|mira|queda</exception></token>
+ <token postag="D[^R].[FC][SN].*" postag_regexp="yes"/>
+ <token postag="PX.[FC][SN].*" postag_regexp="yes"/>
+ <token postag="A..[FC][SN].*|V.P.*SF"
postag_regexp="yes"/>
+ <token postag="N.[FC][SN].*"
postag_regexp="yes"><exception regexp="yes">era|mira|queda</exception></token>
</marker>
</pattern>
<disambig action="filterall"/>
@@ -3202,10 +3164,10 @@
<rule>
<pattern>
<marker>
- <token postag="D[^R].[MC][PN].*" postag_regexp="yes"/>
- <token postag="PX.[MC][PN].*" postag_regexp="yes"/>
- <token postag="A..[MC][PN].*|V.P.*PM" postag_regexp="yes"/>
- <token postag="N.[MC][PN].*" postag_regexp="yes"/>
+ <token postag="D[^R].[MC][PN].*" postag_regexp="yes"/>
+ <token postag="PX.[MC][PN].*" postag_regexp="yes"/>
+ <token postag="A..[MC][PN].*|V.P.*PM"
postag_regexp="yes"/>
+ <token postag="N.[MC][PN].*" postag_regexp="yes"/>
</marker>
</pattern>
<disambig action="filterall"/>
@@ -3213,10 +3175,10 @@
<rule>
<pattern>
<marker>
- <token postag="D[^R].[FC][PN].*" postag_regexp="yes"/>
- <token postag="PX.[FC][PN].*" postag_regexp="yes"/>
- <token postag="A..[FC][PN].*|V.P.*PF" postag_regexp="yes"/>
- <token postag="N.[FC][PN].*" postag_regexp="yes"/>
+ <token postag="D[^R].[FC][PN].*" postag_regexp="yes"/>
+ <token postag="PX.[FC][PN].*" postag_regexp="yes"/>
+ <token postag="A..[FC][PN].*|V.P.*PF"
postag_regexp="yes"/>
+ <token postag="N.[FC][PN].*" postag_regexp="yes"/>
</marker>
</pattern>
<disambig action="filterall"/>
@@ -5905,13 +5867,13 @@
<token postag="N.*|A.*" postag_regexp="yes"></token>
<marker>
<and>
- <token postag="N.*" postag_regexp="yes"></token>
- <token postag="SP.*" postag_regexp="yes"></token>
+ <token postag="N.*" postag_regexp="yes"/>
+ <token postag="SPS00"/>
</and>
</marker>
</pattern>
<disambig action="filter" postag="[^N].*"></disambig>
- </rule>
+ </rule>
<rule id="haver_de" name="haver de + infinitiu">
<pattern>
<marker>
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/fr/disambiguation.xml
2013-01-03 00:36:30 UTC (rev 8806)
@@ -650,25 +650,34 @@
<disambig postag="V avoir inf"/>
</rule>
</rulegroup>
- <rule name="RP : Det + Adj ambigu + Nom" id="RP-D_J_AMBIG_N">
+ <rule name="RP : Det + Nom + Adj" id="RP-D_N_AMBIG_J">
<pattern>
<unify> <feature id="number"/> <feature id="gender"/>
<token postag="D.*" postag_regexp="yes"/>
+ <token postag="N.*" postag_regexp="yes"/>
+ <token postag="J.*|V ppa .*" postag_regexp="yes"/>
+ </unify>
+ </pattern>
+ <disambig action="filterall"/>
+ </rule>
+ <rule name="RP : Det + Adj + Nom" id="RP-D_J_AMBIG_N">
+ <pattern>
+ <unify> <feature id="number"/> <feature id="gender"/>
+ <token postag="D.*" postag_regexp="yes"/>
<token postag="J.*" postag_regexp="yes"/>
<token postag="N.*" postag_regexp="yes"><exception
regexp="yes">dans|envers|mais</exception><exception inflected="yes"
regexp="yes">avoir|être</exception></token>
</unify>
</pattern>
- <disambig action="unify"/>
+ <disambig action="filterall"/>
</rule>
-
<rule name="RP : Det + Nom ambigu" id="RP-D_N_AMBIG">
<pattern>
<unify> <feature id="number"/> <feature id="gender"/>
<token postag="D.*" postag_regexp="yes"/>
- <token postag="N.*" postag_regexp="yes"><exception
regexp="yes">a|dans|envers</exception></token>
+ <token postag="N.*|J.*" postag_regexp="yes"><exception
regexp="yes">a|dans|envers</exception></token>
</unify>
</pattern>
- <disambig action="unify"/>
+ <disambig action="filterall"/>
<example type="untouched">Ils sont parti.</example>
<example type="ambiguous" inputform="livre[livre/N e s,livrer/V imp pres 2
s,livrer/V ind pres 1 s,livrer/V ind pres 3 s,livrer/V sub pres 1 s,livrer/V
sub pres 3 s]" outputform="livre[livre/N e s]">Il a enfin publié son
<marker>livre</marker>.</example>
</rule>
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/gl/disambiguation.xml
2013-01-03 00:36:30 UTC (rev 8806)
@@ -46,12 +46,11 @@
<feature id="number"></feature>
<token postag="D.*" postag_regexp="yes"></token>
- <token postag="N.*" postag_regexp="yes"></token>
+ <token postag="N.*" postag_regexp="yes"><exception
regexp="yes">gran|grande</exception></token>
<token postag="A.*" postag_regexp="yes"></token>
</unify>
</pattern>
-
- <disambig action="unify"></disambig>
+ <disambig action="filterall"></disambig>
</rule>
<rule id="DET_SUBST" name="det|pron + subst|adx (det + subst|adx)">
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRuleTest.java
2013-01-03 00:36:30 UTC (rev 8806)
@@ -43,6 +43,7 @@
public void testRule() throws IOException {
// correct sentences:
+ assertCorrect("La part superior esquerra");
assertCorrect("I sí, la crisi serà llarga, molt llarga, potser
eterna.");
assertCorrect("El rei ha trobat l'excusa i l'explicació
adequada.");
assertCorrect("té una manera de jugar aquestes gires tan
femenina");
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
2013-01-02 23:04:39 UTC (rev 8805)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
2013-01-03 00:36:30 UTC (rev 8806)
@@ -91,7 +91,7 @@
assertEquals(sent.getAnnotations(), "Disambiguator log: \n\n" +
"RP-D_N_AMBIG: Les[le/D e p,les/R pers obj 3 p] -> Les[le/D e
p]"+
"\nRB-LE_LA_LES: Les[le/D e p] -> Les[le/D e p]" +
- "\n\nRP-D_N_AMBIG: avions[avoir/V avoir ind impa 1 p,avion/N m
p,avoir/SENT_END] -> avions[avion/N m p,avion/SENT_END]\n");
+ "\n\nRP-D_N_AMBIG: avions[avoir/V avoir ind impa 1 p,avion/N m
p,avoir/SENT_END] -> avions[avion/N m p,avoir/SENT_END]\n");
}
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnmore_122712
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits