Revision: 7760 http://languagetool.svn.sourceforge.net/languagetool/?rev=7760&view=rev Author: jaumeortola Date: 2012-07-27 16:59:12 +0000 (Fri, 27 Jul 2012) Log Message: ----------- [ca] Java rule improved: AccentuationCheckRule
Modified Paths: -------------- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java trunk/JLanguageTool/src/resource/ca/catalan.dict trunk/JLanguageTool/src/resource/ca/catalan_synth.dict trunk/JLanguageTool/src/resource/ca/disambiguation.xml trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java 2012-07-27 11:43:33 UTC (rev 7759) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java 2012-07-27 16:59:12 UTC (rev 7760) @@ -58,7 +58,7 @@ private static final Pattern ARTICLE_EL_FS = Pattern.compile("la|l'|La|L'"); private static final Pattern ARTICLE_EL_MP = Pattern.compile("els|Els"); private static final Pattern ARTICLE_EL_FP = Pattern.compile("les|Les"); -// private static final Pattern DETERMINANT = Pattern.compile("D[^R].*"); + private static final Pattern DETERMINANT = Pattern.compile("D[^R].*"); private static final Pattern DETERMINANT_MS = Pattern.compile("D[^R].[MC][SN].*"); private static final Pattern DETERMINANT_FS = Pattern.compile("D[^R].[FC][SN].*"); private static final Pattern DETERMINANT_MP = Pattern.compile("D[^R].[MC][PN].*"); @@ -72,7 +72,7 @@ private static final Pattern ADJECTIU_MP = Pattern.compile("AQ.[MC][PN].*|V.P..PM|PX.MP.*"); private static final Pattern ADJECTIU_FP = Pattern.compile("AQ.[FC][PN].*|V.P..PF|PX.FP.*"); private static final Pattern INFINITIU = Pattern.compile("V.N.*"); - private static final Pattern VERB_CONJUGAT = Pattern.compile("V.[^NGP].*"); + private static final Pattern VERB_CONJUGAT = Pattern.compile("V.[^NGP].*|_GV_"); private static final Pattern NOT_IN_PREV_TOKEN = Pattern.compile("VA.*|PP.*|P0.*|VSP.*"); private static final Pattern BEFORE_ADJECTIVE_MS = Pattern.compile("SPS00|D[^R].[MC][SN].*|V.[^NGP].*|PX.*"); private static final Pattern BEFORE_ADJECTIVE_FS = Pattern.compile("SPS00|D[^R].[FC][SN].*|V.[^NGP].*|PX.*"); @@ -146,8 +146,8 @@ // verb without accent -> noun with accent if (isRelevantWord) { - //amb renuncies - if (tokens[i-1].hasPosTag("SPS00") && !matchPostagRegexp(tokens[i],INFINITIU) ) + //amb renuncies DETERMINANT + if (tokens[i-1].hasPosTag("SPS00") && !matchPostagRegexp(tokens[i-1],DETERMINANT) && !matchPostagRegexp(tokens[i],INFINITIU) ) { replacement = relevantWords.get(token).getToken(); } @@ -157,13 +157,23 @@ && !token.equals("cantar") ) ||(matchPostagRegexp(tokens[i-1],DETERMINANT_MP) && matchPostagRegexp(relevantWords.get(token),NOM_MP)) ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FS) && matchPostagRegexp(relevantWords.get(token),NOM_FS) - && !token.equals("venia") && !token.equals("tenia") && !token.equals("continua") && !token.equals("genera") ) + && !token.equals("venia") && !token.equals("tenia") && !token.equals("continua") && !token.equals("genera") && !token.equals("faria")) ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FP) && matchPostagRegexp(relevantWords.get(token),NOM_FP)) ) ) { replacement = relevantWords.get(token).getToken(); } + //fumaré una faria (correct: fària) + else if ( i>2 && matchPostagRegexp(tokens[i-2],VERB_CONJUGAT) && + ((matchPostagRegexp(tokens[i-1],DETERMINANT_MS) && matchPostagRegexp(relevantWords.get(token),NOM_MS)) + ||(matchPostagRegexp(tokens[i-1],DETERMINANT_MP) && matchPostagRegexp(relevantWords.get(token),NOM_MP)) + ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FS) && matchPostagRegexp(relevantWords.get(token),NOM_FS)) + ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FP) && matchPostagRegexp(relevantWords.get(token),NOM_FP)) ) ) + { + replacement = relevantWords.get(token).getToken(); + } //circumstancies d'un altre caire else if ( !token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") + && !token.equals("faria") && !token.equals("faries") && !token.equals("continua") && !token.equals("continues") && !token.equals("cantar") && mPreposicioDE.matches() && !matchPostagRegexp(tokens[i-1],NOT_IN_PREV_TOKEN) && (i<tokens.length-2) && !matchPostagRegexp(tokens[i+2],INFINITIU) @@ -173,6 +183,7 @@ } //la renuncia del president. else if ( !token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") + && !token.equals("faria") && !token.equals("faries") && !token.equals("continua") && !token.equals("continues") && !token.equals("cantar") && !token.equals("diferencia") && !token.equals("diferencies") && !token.equals("distancia") && !token.equals("distancies") &&( ( mArticleELMS.matches() && matchPostagRegexp(relevantWords.get(token),NOM_MS) ) @@ -188,6 +199,7 @@ //circumstancies extraordinàries else if ( !token.equals("pronuncia") && !token.equals("pronuncies") && !token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") && !token.equals("continua") && !token.equals("continues") + && !token.equals("faria") && !token.equals("faries") && !token.equals("genera") && (i<tokens.length-1) && ( @@ -204,7 +216,7 @@ else if ( (matchPostagRegexp(relevantWords.get(token),NOM_MS) && matchPostagRegexp(tokens[i-1],ADJECTIU_MS)) || (matchPostagRegexp(relevantWords.get(token),NOM_FS) && matchPostagRegexp(tokens[i-1],ADJECTIU_FS) - && !token.equals("venia") && !token.equals("tenia") && !token.equals("continua") && !token.equals("genera") ) + && !token.equals("venia") && !token.equals("tenia") && !token.equals("continua") && !token.equals("genera") && !token.equals("faria")) || (matchPostagRegexp(relevantWords.get(token),NOM_MP) && matchPostagRegexp(tokens[i-1],ADJECTIU_MP)) || (matchPostagRegexp(relevantWords.get(token),NOM_FP) && matchPostagRegexp(tokens[i-1],ADJECTIU_FP)) ) Modified: trunk/JLanguageTool/src/resource/ca/catalan.dict =================================================================== (Binary files differ) Modified: trunk/JLanguageTool/src/resource/ca/catalan_synth.dict =================================================================== (Binary files differ) Modified: trunk/JLanguageTool/src/resource/ca/disambiguation.xml =================================================================== --- trunk/JLanguageTool/src/resource/ca/disambiguation.xml 2012-07-27 11:43:33 UTC (rev 7759) +++ trunk/JLanguageTool/src/resource/ca/disambiguation.xml 2012-07-27 16:59:12 UTC (rev 7760) @@ -4421,7 +4421,7 @@ <pattern> <token postag="V.*" postag_regexp="yes"><exception postag="N.*" postag_regexp="yes"/></token> <marker> - <token postag="SPS00" postag_regexp="yes"><exception postag="_GN_.*" postag_regexp="yes"/></token> + <token postag="SPS00" postag_regexp="yes"><exception postag="_GN_.*|PI.*|DI.*" postag_regexp="yes"/></token> </marker> </pattern> <disambig action="filter" postag="SP.*"></disambig> Modified: trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt =================================================================== --- trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt 2012-07-27 11:43:33 UTC (rev 7759) +++ trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt 2012-07-27 16:59:12 UTC (rev 7760) @@ -329,6 +329,8 @@ falco;falcó;NCMS000 falsaria;falsària;NCFS000 falsaries;falsàries;NCFP000 +faria;fària;NCFS000 +faries;fàries;NCFP000 farratjo;farratjó;NCMS000 feixo;feixó;NCMS000 fenia;fenià;NCMS000 Modified: trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java 2012-07-27 11:43:33 UTC (rev 7759) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java 2012-07-27 16:59:12 UTC (rev 7760) @@ -43,6 +43,7 @@ public void testRule() throws IOException { // correct sentences: + assertCorrect("Cap faria una cosa així."); assertCorrect("El cos genera suficient pressió interna."); assertCorrect("Les seues contràries."); assertCorrect("Això és una frase de prova."); @@ -69,6 +70,8 @@ assertCorrect("La magnífica conservació del palau."); // errors: + assertIncorrect("No em fumaré cap faria com feia abans."); + assertIncorrect("M'he fumat una faria."); assertIncorrect("Les seues contraries."); assertIncorrect("Amb renuncies i esforç."); assertIncorrect("La renuncia del president."); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs