Revision: 7259 http://languagetool.svn.sourceforge.net/languagetool/?rev=7259&view=rev Author: milek_pl Date: 2012-06-06 08:20:28 +0000 (Wed, 06 Jun 2012) Log Message: ----------- some progress on the unifier
Modified Paths: -------------- trunk/JLanguageTool/.classpath trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java Modified: trunk/JLanguageTool/.classpath =================================================================== --- trunk/JLanguageTool/.classpath 2012-06-06 07:18:42 UTC (rev 7258) +++ trunk/JLanguageTool/.classpath 2012-06-06 08:20:28 UTC (rev 7259) @@ -9,7 +9,7 @@ </attributes> </classpathentry> <classpathentry kind="src" path="libs/native-lib"/> - <classpathentry kind="lib" path="libs/build/junit.jar"> + <classpathentry kind="lib" path="libs/build/junit.jar" sourcepath="C:/Users/user/.m2/repository/junit/junit/4.8.2/junit-4.8.2-sources.jar"> <attributes> <attribute name="org.eclipse.jdt.launching.CLASSPATH_ATTR_LIBRARY_PATH_ENTRY" value="JLanguageTool/libs/native-lib"/> </attributes> Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java 2012-06-06 07:18:42 UTC (rev 7258) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Unifier.java 2012-06-06 08:20:28 UTC (rev 7259) @@ -232,7 +232,10 @@ } else { if (readingsCounter<tokSequence.size()) { tokSequence.get(readingsCounter).addReading(aToken); - } /* else? */ + } else { + unifiedNext = false; + } + /* else? */ } tmpFeaturesFound = tokenFeaturesFound; } Modified: trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java 2012-06-06 07:18:42 UTC (rev 7258) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/UnifierTest.java 2012-06-06 08:20:28 UTC (rev 7259) @@ -26,6 +26,8 @@ import java.util.ArrayList; import junit.framework.TestCase; + +import org.junit.Test; import org.languagetool.AnalyzedToken; public class UnifierTest extends TestCase { @@ -183,8 +185,8 @@ uni.setEquivalence("gender", "masculine", mascElement); final AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały"); - final AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:sg:blahblah:f", "mały"); - final AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:m", "mały"); + final AnalyzedToken sing1a = new AnalyzedToken("mała", "adj:sg:blahblah:f", "mały"); + final AnalyzedToken sing1b = new AnalyzedToken("małe", "adj:pl:blahblah:m", "mały"); final AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek"); final Map<String, List<String>> equiv = new HashMap<String, List<String>>(); @@ -203,6 +205,7 @@ } // checks if all tokens share the same set of features to be unified + @Test public void testMultipleFeats() { final Unifier uni = new Unifier(); final Element sgElement = new Element("", false, false, false); @@ -281,4 +284,109 @@ uni.reset(); } + @Test + public void testNegation() { + final Unifier uni = new Unifier(); + final Element sgElement = new Element("", false, false, false); + sgElement.setPosElement(".*[\\.:]sg:.*", true, false); + uni.setEquivalence("number", "singular", sgElement); + final Element plElement = new Element("", false, false, false); + plElement.setPosElement(".*[\\.:]pl:.*", true, false); + uni.setEquivalence("number", "plural", plElement); + final Element femElement = new Element("", false, false, false); + femElement.setPosElement(".*:f", true, false); + uni.setEquivalence("gender", "feminine", femElement); + final Element mascElement = new Element("", false, false, false); + mascElement.setPosElement(".*:m", true, false); + uni.setEquivalence("gender", "masculine", mascElement); + + //Latin adjectives + final AnalyzedToken sing_masc = new AnalyzedToken("parvus", "adj:sg:blahblah:m", "parvus"); + final AnalyzedToken plur_masc = new AnalyzedToken("parvi", "adj:sg:blahblah:m", "parvus"); + final AnalyzedToken plur_fem = new AnalyzedToken("parvae", "adj:pl:blahblah:f", "parvus"); + final AnalyzedToken sing_fem = new AnalyzedToken("parva", "adj:sg:blahblah:f", "parvus"); + + //Let's pretend Latin has determiners + final AnalyzedToken det_sing_fem = new AnalyzedToken("una", "det:sg:blahblah:f", "unus"); + final AnalyzedToken det_plur_fem = new AnalyzedToken("unae", "det:sg:blahblah:f", "unus"); + final AnalyzedToken det_sing_masc = new AnalyzedToken("unus", "det:sg:blahblah:m", "unus"); + final AnalyzedToken det_plur_masc = new AnalyzedToken("uni", "det:sg:blahblah:m", "unus"); + + //and nouns + final AnalyzedToken subst_sing_fem = new AnalyzedToken("discrepatio", "subst:sg:blahblah:f", "discrepatio"); + final AnalyzedToken subst_plur_fem = new AnalyzedToken("discrepationes", "subst:sg:blahblah:f", "discrepatio"); + final AnalyzedToken subst_sing_masc = new AnalyzedToken("homo", "sg:sg:blahblah:m", "homo"); + final AnalyzedToken subst_plur_masc = new AnalyzedToken("homines", "sg:sg:blahblah:m", "homo"); + + //now we should have 4x4x4 combinations... + + final Map<String, List<String>> equiv = new HashMap<String, List<String>>(); + equiv.put("number", null); + equiv.put("gender", null); + + boolean satisfied = uni.isSatisfied(det_sing_masc, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing_masc, equiv); + uni.startNextToken(); + satisfied &= uni.isSatisfied(subst_sing_masc, equiv); + uni.startNextToken(); + assertEquals(true, satisfied); + uni.reset(); + + //now test the simplified interface + satisfied = true; //this must be true to start with... + satisfied &= uni.isUnified(det_sing_masc, equiv, false, true); + satisfied &= uni.isUnified(sing_masc, equiv, false, true); + satisfied &= uni.isUnified(subst_sing_masc, equiv, false, true); + assertEquals(true, satisfied); + uni.reset(); + + //now let's negate this + + //traditional way + uni.setNegation(true); + satisfied = uni.isSatisfied(det_sing_masc, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing_masc, equiv); + uni.startNextToken(); + satisfied &= uni.isSatisfied(subst_sing_masc, equiv); + uni.startNextToken(); + assertEquals(false, satisfied); + uni.reset(); + + //now test the simplified interface + satisfied = true; //this must be true to start with... + satisfied &= uni.isUnified(det_sing_masc, equiv, true, true); + satisfied &= uni.isUnified(sing_masc, equiv, true, true); + satisfied &= uni.isUnified(subst_sing_masc, equiv, true, true); + assertEquals(false, satisfied); + uni.reset(); + + //OK, so let's test it with something that is not correct + satisfied = true; //this must be true to start with... + satisfied &= uni.isUnified(det_sing_fem, equiv, true, true); + satisfied &= uni.isUnified(sing_masc, equiv, true, true); + satisfied &= uni.isUnified(subst_sing_masc, equiv, true, true); + assertEquals(true, satisfied); + uni.reset(); + + //OK, so let's test it with something that is not correct + satisfied = true; //this must be true to start with... + satisfied &= uni.isUnified(det_sing_masc, equiv, true, true); + satisfied &= uni.isUnified(sing_fem, equiv, true, true); + satisfied &= uni.isUnified(subst_sing_masc, equiv, true, true); + assertEquals(true, satisfied); + uni.reset(); + + //OK, so let's test it with something that is not correct + /* I think there's an error here -- the negation is applied too soon... + satisfied = true; //this must be true to start with... + satisfied &= uni.isUnified(det_sing_masc, equiv, true, true); + satisfied &= uni.isUnified(sing_masc, equiv, true, true); + satisfied &= uni.isUnified(subst_sing_fem, equiv, true, true); + assertEquals(true, satisfied); + uni.reset(); + */ + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs