otis 2004/05/30 13:24:20 Modified: . CHANGES.txt build.xml src/java/org/apache/lucene/analysis/de GermanStemmer.java GermanAnalyzer.java src/test/org/apache/lucene/queryParser TestQueryParser.java Log: - Switched to UTF-8 file encoding Revision Changes Path 1.91 +5 -1 jakarta-lucene/CHANGES.txt Index: CHANGES.txt =================================================================== RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v retrieving revision 1.90 retrieving revision 1.91 diff -u -r1.90 -r1.91 --- CHANGES.txt 24 May 2004 19:05:21 -0000 1.90 +++ CHANGES.txt 30 May 2004 20:24:20 -0000 1.91 @@ -17,6 +17,10 @@ methods to replace a PhraseQuery with a SpanNearQuery instead, keeping the proper slop factor. (Erik Hatcher) + 4. Changed the encoding of GermanAnalyzer.java and GermanStemmer.java to + UTF-8 and changed the build encoding to UTF-8, to make changed files + compile. (Otis Gospodnetic) + 1.4 RC3 1.64 +1 -1 jakarta-lucene/build.xml Index: build.xml =================================================================== RCS file: /home/cvs/jakarta-lucene/build.xml,v retrieving revision 1.63 retrieving revision 1.64 diff -u -r1.63 -r1.64 --- build.xml 11 May 2004 20:20:04 -0000 1.63 +++ build.xml 30 May 2004 20:24:20 -0000 1.64 @@ -23,7 +23,7 @@ <property name="javac.debug" value="on"/> <property name="project.name" value="site"/> <!-- todo: is this used by anakia or something else? --> <property name="javadoc.link" value="http://java.sun.com/j2se/1.4/docs/api/"/> - <property name="build.encoding" value="ISO-8859-1"/> + <property name="build.encoding" value="utf-8"/> <property name="build.dir" location="build"/> <property name="dist.dir" location="dist"/> 1.11 +12 -12 jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanStemmer.java Index: GermanStemmer.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanStemmer.java,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- GermanStemmer.java 30 Mar 2004 15:54:48 -0000 1.10 +++ GermanStemmer.java 30 May 2004 20:24:20 -0000 1.11 @@ -18,7 +18,7 @@ /** * A stemmer for German words. The algorithm is based on the report - * "A Fast and Simple Stemming Algorithm for German Words" by Jörg + * "A Fast and Simple Stemming Algorithm for German Words" by Jörg * Caumanns ([EMAIL PROTECTED]). * * @author Gerhard Schwarz @@ -153,12 +153,12 @@ /** * Do some substitutions for the term to reduce overstemming: * - * - Substitute Umlauts with their corresponding vowel: äöü -> aou, - * "ß" is substituted by "ss" + * - Substitute Umlauts with their corresponding vowel: äöü -> aou, + * "ß" is substituted by "ss" * - Substitute a second char of a pair of equal characters with * an asterisk: ?? -> ?* * - Substitute some common character combinations with a token: - * sch/ch/ei/ie/ig/st -> $/§/%/&/#/! + * sch/ch/ei/ie/ig/st -> $/§/%/&/#/! */ private void substitute( StringBuffer buffer ) { @@ -169,18 +169,18 @@ buffer.setCharAt( c, '*' ); } // Substitute Umlauts. - else if ( buffer.charAt( c ) == 'ä' ) { + else if ( buffer.charAt( c ) == 'ä' ) { buffer.setCharAt( c, 'a' ); } - else if ( buffer.charAt( c ) == 'ö' ) { + else if ( buffer.charAt( c ) == 'ö' ) { buffer.setCharAt( c, 'o' ); } - else if ( buffer.charAt( c ) == 'ü' ) { + else if ( buffer.charAt( c ) == 'ü' ) { buffer.setCharAt( c, 'u' ); } // Take care that at least one character is left left side from the current one if ( c < buffer.length() - 1 ) { - if ( buffer.charAt( c ) == 'ß' ) { + if ( buffer.charAt( c ) == 'ß' ) { buffer.setCharAt( c, 's' ); buffer.insert( c + 1, 's' ); substCount++; @@ -194,7 +194,7 @@ substCount =+ 2; } else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) { - buffer.setCharAt( c, '§' ); + buffer.setCharAt( c, '§' ); buffer.deleteCharAt( c + 1 ); substCount++; } @@ -225,7 +225,7 @@ /** * Undoes the changes made by substitute(). That are character pairs and * character combinations. Umlauts will remain as their corresponding vowel, - * as "ß" remains as "ss". + * as "ß" remains as "ss". */ private void resubstitute( StringBuffer buffer ) { @@ -238,7 +238,7 @@ buffer.setCharAt( c, 's' ); buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 ); } - else if ( buffer.charAt( c ) == '§' ) { + else if ( buffer.charAt( c ) == '§' ) { buffer.setCharAt( c, 'c' ); buffer.insert( c + 1, 'h' ); } 1.16 +3 -3 jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java Index: GermanAnalyzer.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- GermanAnalyzer.java 30 Mar 2004 15:44:58 -0000 1.15 +++ GermanAnalyzer.java 30 May 2004 20:24:20 -0000 1.16 @@ -47,14 +47,14 @@ */ private String[] GERMAN_STOP_WORDS = { "einer", "eine", "eines", "einem", "einen", - "der", "die", "das", "dass", "daß", + "der", "die", "das", "dass", "daß", "du", "er", "sie", "es", "was", "wer", "wie", "wir", "und", "oder", "ohne", "mit", "am", "im", "in", "aus", "auf", "ist", "sein", "war", "wird", "ihr", "ihre", "ihres", - "als", "für", "von", "mit", + "als", "für", "von", "mit", "dich", "dir", "mich", "mir", "mein", "sein", "kein", "durch", "wegen", "wird" 1.26 +2 -2 jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Index: TestQueryParser.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v retrieving revision 1.25 retrieving revision 1.26 diff -u -r1.25 -r1.26 --- TestQueryParser.java 3 Mar 2004 12:07:13 -0000 1.25 +++ TestQueryParser.java 30 May 2004 20:24:20 -0000 1.26 @@ -159,8 +159,8 @@ public void testSimple() throws Exception { assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", null, "türm term term"); - assertQueryEquals("ümlaut", null, "ümlaut"); + assertQueryEquals("türm term term", null, "türm term term"); + assertQueryEquals("ümlaut", null, "ümlaut"); assertQueryEquals("a AND b", null, "+a +b"); assertQueryEquals("(a AND b)", null, "+a +b");
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]