Hi,
Here is the patch for KeywordAnalyzer, KeywordTokenizer, TestKeywordAnalyzer.
Thanks,
Hideaki,
On Dec 17, 2007 6:49 PM, Michael McCandless <[EMAIL PROTECTED]> wrote:
>
> Yes please do! Thanks.
>
> Mike
>
>
> TAKAHASHI hideaki wrote:
>
> > Hi, all
> >
> > I found KeywordAnalyzer/KeywordTokenizer on trunk has a problem.
> >
> > These have a condition(tokenStreams in Analyzer and done in
> > KeywordTokenizer),
> > but these don't reset the condition. So KeywordAnalyzer can't analyze
> > a field more then twice.
> >
> > I already created a patch for this problem.
> > Can I send this patch?
> >
> > Thanks,
> > Hideaki
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: [EMAIL PROTECTED]
> > For additional commands, e-mail: [EMAIL PROTECTED]
> >
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
>
>
--
高橋 秀明
Index: src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java
(revision 605078)
+++ src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java
(working copy)
@@ -18,7 +18,10 @@
*/
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -61,4 +64,22 @@
"+partnum:Q36 +space", query.toString("description"));
assertEquals("doc found!", 1, hits.length());
}
+
+ public void testMutipleDocument() throws Exception {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir,new KeywordAnalyzer(), true);
+ Document doc = new Document();
+ doc.add(new Field("partnum", "Q36", Field.Store.YES,
Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new Field("partnum", "Q37", Field.Store.YES,
Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir);
+ TermDocs td = reader.termDocs(new Term("partnum", "Q36"));
+ assertTrue(td.next());
+ td = reader.termDocs(new Term("partnum", "Q37"));
+ assertTrue(td.next());
+ }
}
Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java
===================================================================
--- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision
605078)
+++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy)
@@ -55,4 +55,9 @@
}
return null;
}
+
+ public void reset(Reader input) throws IOException {
+ super.reset(input);
+ this.done = false;
+ }
}
Index: src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (revision
605078)
+++ src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (working copy)
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import java.io.IOException;
import java.io.Reader;
/**
@@ -29,12 +30,13 @@
return new KeywordTokenizer(reader);
}
public TokenStream reusableTokenStream(String fieldName,
- final Reader reader) {
+ final Reader reader) throws
IOException {
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
if (tokenizer == null) {
tokenizer = new KeywordTokenizer(reader);
setPreviousTokenStream(tokenizer);
- }
+ } else
+ tokenizer.reset(reader);
return tokenizer;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]