Revision: 7268 http://languagetool.svn.sourceforge.net/languagetool/?rev=7268&view=rev Author: milek_pl Date: 2012-06-06 19:19:36 +0000 (Wed, 06 Jun 2012) Log Message: ----------- another change in the unifier, it should be now consistent
Modified Paths: -------------- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/AbstractPatternRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java trunk/JLanguageTool/src/rules/xx/grammar.xml trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/AbstractPatternRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/AbstractPatternRule.java 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/AbstractPatternRule.java 2012-06-06 19:19:36 UTC (rev 7268) @@ -192,31 +192,37 @@ } protected boolean testUnificationAndGroups(final boolean matched, - final boolean lastReading, final AnalyzedToken matchToken, - final Element elem) { - boolean thisMatched = matched; - if (testUnification) { - if (matched && elem.isUnified()) { - if (elem.isUniNegated()) { - thisMatched = !(thisMatched && unifier.isUnified(matchToken, elem.getUniFeatures(), - lastReading)); - } else { - thisMatched = thisMatched && unifier.isUnified(matchToken, elem.getUniFeatures(), - lastReading); - } - } - if (thisMatched && getUnified) { - unifiedTokens = unifier.getFinalUnified(); - } - if (!elem.isUnified()) { - unifier.reset(); - } - } - elem.addMemberAndGroup(matchToken); - if (lastReading) { - thisMatched &= elem.checkAndGroup(thisMatched); - } - return thisMatched; + final boolean lastReading, final AnalyzedToken matchToken, + final Element elem) { + boolean thisMatched = matched; + if (testUnification) { + if (matched && elem.isUnified()) { + if (elem.isUniNegated()) { + thisMatched = !(thisMatched && unifier.isUnified(matchToken, elem.getUniFeatures(), + lastReading)); + } else { + if (elem.isLastInUnification()) { + thisMatched = thisMatched && unifier.isUnified(matchToken, elem.getUniFeatures(), + lastReading); + } else { //we don't care about the truth value, let it run + unifier.isUnified(matchToken, elem.getUniFeatures(), + lastReading); + } + + } + } + if (thisMatched && getUnified) { + unifiedTokens = unifier.getFinalUnified(); + } + if (!elem.isUnified()) { + unifier.reset(); + } + } + elem.addMemberAndGroup(matchToken); + if (lastReading) { + thisMatched &= elem.checkAndGroup(thisMatched); + } + return thisMatched; } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java 2012-06-06 19:19:36 UTC (rev 7268) @@ -140,7 +140,13 @@ private boolean posUnknown; + /** + * Set to true on tokens that close the unification block. + */ + private boolean isLastUnified; + + /** * Creates Element that is used to match tokens in the text. * * @param token @@ -784,6 +790,14 @@ return uniNegation; } + public final boolean isLastInUnification() { + return isLastUnified; + } + + public final void setLastInUnification() { + isLastUnified = true; + } + public final void setWhitespaceBefore(final boolean isWhite) { whitespaceBefore = isWhite; testWhitespace = true; Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleHandler.java 2012-06-06 19:19:36 UTC (rev 7268) @@ -277,8 +277,10 @@ //clear the features... equivalenceFeatures = new HashMap<String, List<String>>(); //set negation on the last token only! - if (uniNegation) { - elementList.get(elementList.size() - 1).setUniNegation(); + final int lastElement = elementList.size() - 1; + elementList.get(lastElement).setLastInUnification(); + if (uniNegation) { + elementList.get(lastElement).setUniNegation(); } } } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java 2012-06-06 19:19:36 UTC (rev 7268) @@ -36,12 +36,6 @@ */ public class Unifier { - //TODO: add a possibility to negate some features but not all - /** - * Negates the meaning of unification just like negation in Element tokens. - */ - private boolean negation; - private boolean allFeatsIn; private int tokCnt; @@ -261,14 +255,6 @@ } tmpFeaturesFound = new ArrayList<Boolean>(featuresFound); } - - public final void setNegation(final boolean neg) { - negation = neg; - } - - public final boolean getNegation() { - return negation; - } /** * Resets after use of unification. Required. @@ -276,7 +262,6 @@ public final void reset() { equivalencesMatched.clear(); allFeatsIn = false; - negation = false; tokCnt = -1; featuresFound.clear(); tmpFeaturesFound.clear(); Modified: trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java 2012-06-06 19:19:36 UTC (rev 7268) @@ -375,8 +375,10 @@ inUnification = false; equivalenceFeatures = new HashMap<String, List<String>>(); //set negation on the last token only! - if (uniNegation) { - elementList.get(elementList.size() - 1).setUniNegation(); + final int lastElement = elementList.size() - 1; + elementList.get(lastElement).setLastInUnification(); + if (uniNegation) { + elementList.get(lastElement).setUniNegation(); } } else if (qName.equals(WD)) { addNewWord(wd.toString(), wdLemma, wdPos); Modified: trunk/JLanguageTool/src/rules/xx/grammar.xml =================================================================== --- trunk/JLanguageTool/src/rules/xx/grammar.xml 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/rules/xx/grammar.xml 2012-06-06 19:19:36 UTC (rev 7268) @@ -16,6 +16,22 @@ <token regexp="yes">\p{Ll}+</token> </equivalence> </unification> + <unification feature="gender"> + <equivalence type="masc"> + <token regexp="yes">.*_masc</token> + </equivalence> + <equivalence type="fem"> + <token regexp="yes">.*_fem</token> + </equivalence> + </unification> + <unification feature="number"> + <equivalence type="plur"> + <token regexp="yes">.*_plur_.*</token> + </equivalence> + <equivalence type="sing"> + <token regexp="yes">.*_sing_.*</token> + </equivalence> + </unification> <phrases> <phrase id="UNIFICATION_PHRASE"> <unify> @@ -340,5 +356,24 @@ <example type="incorrect">Dogs <marker>blah blah</marker></example> </rule> </rulegroup> + <rulegroup id="test_unification_with_negation" name="Test unification with negation"> + <rule> + <pattern> + <unify negate="yes"> + <feature id="gender"/> + <feature id="number"/> + <token><exception postag="SENT_START"/></token> + <token/> + <token/> + </unify> + </pattern> + <message>blabla</message> + <example type="correct">det_sing_masc</example> + <example type="incorrect"><marker>det_sing_masc adj_sing_masc subst_plur_fem</marker></example> + <example type="incorrect"><marker>det_sing_fem adj_sing_masc subst_plur_fem</marker></example> + <example type="incorrect"><marker>det_sing_fem adj_sing_fem subst_plur_fem</marker></example> + <example type="incorrect"><marker>det_plur_fem adj_sing_fem subst_plur_fem</marker></example> + </rule> + </rulegroup> </category> </rules> \ No newline at end of file Modified: trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java 2012-06-06 19:08:18 UTC (rev 7267) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java 2012-06-06 19:19:36 UTC (rev 7268) @@ -342,7 +342,6 @@ //now let's negate this //traditional way - uni.setNegation(true); satisfied = uni.isSatisfied(det_sing_masc, equiv); uni.startUnify(); satisfied &= uni.isSatisfied(sing_masc, equiv); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs