dnaber 2004/08/18 07:30:48 Modified: contributions/analyzers/src/java/org/apache/lucene/analysis/br BrazilianStemFilter.java BrazilianStemmer.java Log: convert to utf-8 Revision Changes Path 1.7 +1 -1 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java Index: BrazilianStemFilter.java =================================================================== RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- BrazilianStemFilter.java 12 Mar 2004 15:52:58 -0000 1.6 +++ BrazilianStemFilter.java 18 Aug 2004 14:30:47 -0000 1.7 @@ -66,7 +66,7 @@ /** * Based on (copied) the GermanStemFilter * - * @author João Kramer + * @author João Kramer * <p/> * <p/> * A filter that stemms german words. It supports a table of words that should 1.4 +20 -20 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java Index: BrazilianStemmer.java =================================================================== RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- BrazilianStemmer.java 22 Jan 2004 20:54:46 -0000 1.3 +++ BrazilianStemmer.java 18 Aug 2004 14:30:48 -0000 1.4 @@ -56,7 +56,7 @@ /** * A stemmer for brazilian words. The algorithm is based on the report - * "A Fast and Simple Stemming Algorithm for German Words" by Jörg + * "A Fast and Simple Stemming Algorithm for German Words" by Jörg * Caumanns ([EMAIL PROTECTED]). * * @author Gerhard Schwarz @@ -282,8 +282,8 @@ /** * 1) Turn to lowercase * 2) Remove accents - * 3) ã -> a ; õ -> o - * 4) ç -> c + * 3) ã -> a ; õ -> o + * 4) ç -> c * * @return null or a string transformed */ @@ -299,31 +299,31 @@ value = value.toLowerCase() ; for (j=0 ; j < value.length() ; j++) { - if ((value.charAt(j) == 'á') || - (value.charAt(j) == 'â') || - (value.charAt(j) == 'ã')) { + if ((value.charAt(j) == 'á') || + (value.charAt(j) == 'â') || + (value.charAt(j) == 'ã')) { r= r + "a" ; continue ; } - if ((value.charAt(j) == 'é') || - (value.charAt(j) == 'ê')) { + if ((value.charAt(j) == 'é') || + (value.charAt(j) == 'ê')) { r= r + "e" ; continue ; } - if (value.charAt(j) == 'í') { + if (value.charAt(j) == 'Ã') { r= r + "i" ; continue ; } - if ((value.charAt(j) == 'ó') || - (value.charAt(j) == 'ô') || - (value.charAt(j) == 'õ')) { + if ((value.charAt(j) == 'ó') || + (value.charAt(j) == 'ô') || + (value.charAt(j) == 'õ')) { r= r + "o" ; continue ; } - if ((value.charAt(j) == 'ú') || - (value.charAt(j) == 'ü')) { + if ((value.charAt(j) == 'ú') || + (value.charAt(j) == 'ü')) { r= r + "u" ; continue ; } - if (value.charAt(j) == 'ç') { + if (value.charAt(j) == 'ç') { r= r + "c" ; continue ; } - if (value.charAt(j) == 'ñ') { + if (value.charAt(j) == 'ñ') { r= r + "n" ; continue ; } @@ -410,7 +410,7 @@ } /** - * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. + * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. */ private void createCT( String term ) { CT = changeTerm(term) ; @@ -1008,7 +1008,7 @@ /** * Residual suffix * - * If the word ends with one of the suffixes (os a i o á í ó) + * If the word ends with one of the suffixes (os a i o á à ó) * in RV, delete it * */ @@ -1031,11 +1031,11 @@ } /** - * If the word ends with one of ( e é ê) in RV,delete it, + * If the word ends with one of ( e é ê) in RV,delete it, * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV, * delete the 'u' (or 'i') * - * Or if the word ends ç remove the cedilha + * Or if the word ends ç remove the cedilha * */ private void step5() {
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]