Hi All,
I'm trying to create an index with payloads using Lucene 4.5 but the payloads
don't seem to be getting into the index.
I looked into the code and it looks like storePayloads argument to
addOrUpdateInternal when called from addOrUpdate in FieldInfos is set to false.
Is this my problem? If so should I be using a different indexer?
The indexing and reading coded is below.
ThanksKyle
The indexing code:import java.io.File;import java.io.IOException;import
java.nio.ByteBuffer;
import org.apache.commons.logging.Log;import
org.apache.commons.logging.LogFactory;import
org.apache.lucene.analysis.Analyzer;import
org.apache.lucene.analysis.AnalyzerWrapper;import
org.apache.lucene.analysis.TokenFilter;import
org.apache.lucene.analysis.TokenStream;import
org.apache.lucene.analysis.standard.StandardAnalyzer;import
org.apache.lucene.analysis.tokenattributes.PayloadAttribute;import
org.apache.lucene.document.Document;import
org.apache.lucene.document.Field;import
org.apache.lucene.document.FieldType;import
org.apache.lucene.document.TextField;import
org.apache.lucene.index.IndexWriter;import
org.apache.lucene.index.IndexWriterConfig;import
org.apache.lucene.index.IndexWriterConfig.OpenMode;import
org.apache.lucene.index.IndexableField;import
org.apache.lucene.store.Directory;import
org.apache.lucene.store.FSDirectory;import
org.apache.lucene.util.BytesRef;import org.apache.lucene.util.Version;
public class TestIndex2 { private static Log LOG =
LogFactory.getLog(TestIndex2.class); MetricAnalyzer analyzer;
Directory directory; IndexWriterConfig configIndex; IndexWriter iwriter;
public TestIndex2(File fileOut) throws IOException {
analyzer = new MetricAnalyzer(Analyzer.PER_FIELD_REUSE_STRATEGY);
directory = FSDirectory.open(fileOut); configIndex =
new IndexWriterConfig(Version.LUCENE_45, analyzer);
configIndex.setOpenMode(OpenMode.CREATE); iwriter = new
IndexWriter(directory, configIndex); } // end TestIndex2
public void close() { if (iwriter != null) {
try { iwriter.close(); } catch
(IOException e) { LOG.error("Unable to close
IndexWriter", e); } } if
(directory != null) { try {
directory.close(); } catch (IOException e) {
LOG.error("Unable to close Directory", e);
} } } // end close BytesRef encodeMetrics(long[]
metrics) { ByteBuffer buffer =
ByteBuffer.allocate(metrics.length * 8); for (long metric : metrics)
{ buffer.putLong(metric); } return new
BytesRef(buffer.array()); } // end encodeMetrics public void
index() throws IOException { FieldType type = new
FieldType(TextField.TYPE_NOT_STORED);
type.setStoreTermVectorPayloads(true);
type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true);
Document doc = new Document();
long[] metrics = { 1, 2, 3 };
analyzer.setPayload(encodeMetrics(metrics));
IndexableField indexField = new Field("test", "one", type);
doc.add(indexField); indexField = new Field("test", "two", type);
doc.add(indexField);
indexField = new Field("test_query", "one", type);
doc.add(indexField); indexField = new Field("test_query", "two",
type); doc.add(indexField);
iwriter.addDocument(doc); } // end index public class
MetricTokenFilter extends TokenFilter { private
MetricAnalyzer analyzer; private PayloadAttribute payloadAttr;
protected MetricTokenFilter(MetricAnalyzer analyzer,
TokenStream input) { super(input);
this.analyzer = analyzer; this.payloadAttr =
addAttribute(PayloadAttribute.class); } // end MetricTokenFilter
@Override public boolean
incrementToken() throws IOException {
payloadAttr.setPayload(analyzer.getPayload()); return
input.incrementToken(); } // end incrementToken } //
end class MetricTokenFilter public class MetricAnalyzer extends
AnalyzerWrapper { BytesRef payload;
protected MetricAnalyzer(Analyzer.ReuseStrategy reuseStrategy) {
super(reuseStrategy); } // end MetricAnalyzer
protected void setPayload(BytesRef payload) { this.payload
= payload; } protected BytesRef getPayload() { return payload; }
@Override protected Analyzer
getWrappedAnalyzer(String fieldName) { return new
StandardAnalyzer(Version.LUCENE_45); } // end getWrappedAnalyzer
@Override protected TokenStreamComponents
wrapComponents(String fieldName, TokenStreamComponents components) {
if (fieldName.endsWith("_query")) { return
components; } else {
MetricTokenFilter filter = new MetricTokenFilter(this,
components.getTokenStream()); return new
TokenStreamComponents(components.getTokenizer(), filter); }
} // end wrapComponents
} // end class MetricAnalyzer public static void
main(String[] args) throws IOException { File fileOut = new
File("test_index"); TestIndex2 iwriter = new TestIndex2(fileOut);
try { iwriter.index(); } finally {
iwriter.close(); } } // end main
} // end class TestIndex2
The reading code:import java.io.File;import java.io.IOException;import
java.nio.ByteBuffer;import java.util.List;
import org.apache.lucene.index.AtomicReaderContext;import
org.apache.lucene.index.DirectoryReader;import
org.apache.lucene.index.DocsAndPositionsEnum;import
org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Terms;import
org.apache.lucene.index.TermsEnum;import
org.apache.lucene.search.DocIdSetIterator;import
org.apache.lucene.store.Directory;import
org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Bits;import
org.apache.lucene.util.BytesRef;
public class TestQuery2 {
public static long[] decodeMetrics(BytesRef metrics, int count, long[]
reuse) { if (reuse == null) { reuse = new
long[count]; } ByteBuffer buffer =
ByteBuffer.wrap(metrics.bytes); for (int x=0; x<count; x++) {
reuse[x] = buffer.getLong(); } return
reuse; } // end decodeMetrics public static void report (IndexReader
ireader, String field) throws IOException { int count = 3;
List<AtomicReaderContext> leaves = ireader.leaves();
for (AtomicReaderContext leaf : leaves) { Terms t =
leaf.reader().terms(field); if (t == null) {
System.out.println("Invalid field: '"+field+"'");
return; } TermsEnum terms =
t.iterator(null); BytesRef term; long[]
metrics = null; while ((term = terms.next()) != null) {
int docCount = terms.docFreq(); Bits
bits = new Bits.MatchAllBits(docCount);
System.out.print ("\t" + term.utf8ToString() +
"\t");
boolean first = true;
DocsAndPositionsEnum docPositions = terms.docsAndPositions(bits, null,
DocsAndPositionsEnum.FLAG_PAYLOADS); if
(docPositions == null) {
System.out.println("No doc positions"); } else {
while (docPositions.nextDoc() !=
DocIdSetIterator.NO_MORE_DOCS) {
for (int pos=0; pos<docPositions.freq(); pos++) {
docPositions.nextPosition();
BytesRef payload =
docPositions.getPayload(); if
(payload != null) {
metrics = TestQuery2.decodeMetrics(payload, count, metrics);
for (int x=0; x<count; x++) {
if (!first) {
System.out.print(",");
} else {
first = false;
}
System.out.print(metrics[x]);
} }
}
} }
System.out.println(); } } } // end report
public static void main(String[] args) throws IOException {
File fileOut = new File("test_index"); Directory directory =
FSDirectory.open(fileOut);
DirectoryReader ireader = DirectoryReader.open(directory);
try { TestQuery2.report(ireader, "test");
} finally { directory.close(); }
} // end main } // end class TestQuery2