Hi guys,
I am still learning Lucene, and I had a very weird problem yesterday
that no matter what I did, I always got multifiles not compound file. By
default, Lucene use compound file, but I just got this weird problem,
even if I used setUseCompoundFile(true) explicitly... And my code is
actually adapted from the book Lucene In Action 2ed and the demo code in
lucene.apache.org... How can I fix this?
package act.indexing;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader;
public class Indexer {
public Indexer(String indexDir, boolean create) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriterConfig iwc = new
IndexWriterConfig(Version.LUCENE_30, analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
} else {
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
writer = new IndexWriter(dir, iwc);
writer.setUseCompoundFile(true);
}
public int index(String dataDir, FileFilter filter)
throws Exception{
File[] files = new File(dataDir).listFiles();
for (File f : files) {
if ( !f.isDirectory() && !f.isHidden()
&& f.exists() && f.canRead()
&& (filter == null || filter.accept(f))
) {
indexFiles(f);
}
}
return writer.numDocs();
}
public void close() throws IOException {
writer.optimize();
writer.close();
}
private IndexWriter writer;
private void indexFiles(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
private Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add( new Field("contents", new FileReader(f)) );
doc.add( new Field("filename", f.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED) );
doc.add( new Field("fullpath", f.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED) );
return doc;
}
private static class TextFilesFilter implements FileFilter {
public boolean accept(File file) {
return file.getName().toLowerCase().endsWith(".txt");
}
}
public static void main(String[] args) throws Exception {
String usage = "Usage: java " + "act.indexing.Indexer"
+ "[-index INDEX_PATH] " + "[-data
DATA_PATH] " + "[-update]";
String index_path = null;
String data_path = null;
boolean create = true;
for (int i = 0; i < args.length; i++) {
if ("-index".equals(args[i])) {
index_path = args[i+1];
}
else if ("-data".equals(args[i])) {
data_path = args[i+1];
}
else if ("-update".equals(args[i])) {
create = false;
}
}
if (index_path == null || data_path == null) {
System.out.println(usage);
System.exit(1);
}
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(index_path, create);
int docsIndexed = 0;
try {
docsIndexed = indexer.index(data_path, new
TextFilesFilter());
} catch(Exception e) {
e.printStackTrace();
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + docsIndexed + " files took " +
(end-start) + " milliseconds");
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]