[ https://issues.apache.org/jira/browse/LUCENE-1001?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12648259#action_12648259 ]
Jonathan Mamou commented on LUCENE-1001: ---------------------------------------- Hi, Here is the relevant code. I would expect to obtain 10 pos: 10 pos: 11 while I obtain 10 pos: 0 pos: 11 import java.io.StringReader; import java.util.Collection; import java.util.Iterator; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.spans.PayloadSpans; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; public class Test { public static void main (String args[]) throws Exception{ IndexWriter writer = new IndexWriter(args[0], new TestPayloadAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.add();new Field("content", new StringReader("a b c d e f g h i j a k"))); writer.addDocument(doc); writer.close(); IndexSearcher is = new IndexSearcher(args[0]);); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a" )); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k" )); SpanQuery[] sqs = {stq1,stq2}; SpanNearQuery snq = new SpanNearQuery(sqs,1,true); PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader()); TopDocs topDocs = is.search(snq,1); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while) (spans.next()) { System.out.println(spans.start()); Collection<byte[]> payloads = spans.getPayload(); for (Iterator<byte[]> it = payloads.iterator(); it.hasNext();) { System.out.println(new String(it.next())); } }} } } }} ------------------------------------------------------------------------------------------------------------------------------------- import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Payload; public class TestPayloadAnalyzer extends Analyzer { public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new LowerCaseTokenizer(reader); result = new PayloadFilter(result, fieldName); return result; } } class PayloadFilter extends TokenFilter { String fieldName; int pos; public PayloadFilter(TokenStream input, String fieldName) { super(input); this.fieldName = fieldName; pos = 0; } public Token next() throws IOException { Token result = input.next(); if (result != null) { String token = new String(result.termBuffer(), 0, result.termLength ()); result.setPayload(),new Payload(("pos: " + pos).getBytes())); pos += result.getPositionIncrement(); } return} result; } } Jonathan > Add Payload retrieval to Spans > ------------------------------ > > Key: LUCENE-1001 > URL: https://issues.apache.org/jira/browse/LUCENE-1001 > Project: Lucene - Java > Issue Type: New Feature > Components: Search > Reporter: Grant Ingersoll > Assignee: Grant Ingersoll > Priority: Minor > Fix For: 2.4 > > Attachments: LUCENE-1001.patch, LUCENE-1001.patch, LUCENE-1001.patch, > LUCENE-1001.patch, LUCENE-1001.patch, LUCENE-1001.patch, LUCENE-1001.patch, > LUCENE-1001.patch > > > It will be nice to have access to payloads when doing SpanQuerys. > See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 and > http://www.gossamer-threads.com/lists/lucene/java-dev/51134 > Current API, added to Spans.java is below. I will try to post a patch as > soon as I can figure out how to make it work for unordered spans (I believe I > have all the other cases working). > {noformat} > /** > * Returns the payload data for the current span. > * This is invalid until [EMAIL PROTECTED] #next()} is called for > * the first time. > * This method must not be called more than once after each call > * of [EMAIL PROTECTED] #next()}. However, payloads are loaded lazily, > * so if the payload data for the current position is not needed, > * this method may not be called at all for performance reasons.<br> > * <br> > * <p><font color="#FF0000"> > * WARNING: The status of the <b>Payloads</b> feature is experimental. > * The APIs introduced here might change in the future and will not be > * supported anymore in such a case.</font> > * > * @return a List of byte arrays containing the data of this payload > * @throws IOException > */ > // TODO: Remove warning after API has been finalized > List/*<byte[]>*/ getPayload() throws IOException; > /** > * Checks if a payload can be loaded at this position. > * <p/> > * Payloads can only be loaded once per call to > * [EMAIL PROTECTED] #next()}. > * <p/> > * <p><font color="#FF0000"> > * WARNING: The status of the <b>Payloads</b> feature is experimental. > * The APIs introduced here might change in the future and will not be > * supported anymore in such a case.</font> > * > * @return true if there is a payload available at this position that can > be loaded > */ > // TODO: Remove warning after API has been finalized > public boolean isPayloadAvailable(); > {noformat} -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online. --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]