Hello Everybody , I am trying to highlight some results . I index the body (the text) of my documents in the field "contents"and when I try to highilight using highlighter.getBestFragment(...) I get a NullPointerException .
But when,for exemple I try to highlight the fileName it works properly. I know since I "store" Everything in one field with the fileReader or (ParsingReader) my text is tokenized which is different from a file name . Here's my code ,please help me . package xxxxxx; import java.io.File; import java.io.FileFilter; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.tika.parser.ParsingReader; public class Indexer { static long start = 0; public static void main(String[] args) throws Exception { System.out.println("l'index se trouve à " + args[0]); System.out.println("le dossier ou s'effectue l'indexation est :" + args[1]); if (args.length != 2) { throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " "); } String indexDir = args[0]; String dataDir = args[1]; start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } private IndexWriter writer; public Indexer(String indexDir) throws IOException, InterruptedException { Directory dir = FSDirectory.open(new File(indexDir)); writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(true); } public void close() throws IOException { writer.optimize(); writer.close(); } public int index(String dataDir, FileFilter filter) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) { if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { if (!(f.getCanonicalPath().endsWith("~"))) { indexFile(f); } } else { index(f.toString(), filter); } } return writer.numDocs(); } private static class TextFilesFilter implements FileFilter { public boolean accept(File path) { return true; } } protected Document getDocument(File f) throws Exception { // FileReader frf = new FileReader(f); Document doc = new Document(); Reader reader = new ParsingReader(f); doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.ANALYZED )); doc.add(new Field("fullpath", f.getCanonicalPath(),Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); return doc; } private void indexFile(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); System.out.println(System.currentTimeMillis() - start); } } --------------------------------------------------------------------------------------- Lucene ,highlighting and NullPointerException Hello Everybody , I am trying to highlight some results . I index the body (the text) of my documents in the field "contents"and when I try to highilight using highlighter.getBestFragment(...) I get a NullPointerException . But when,for exemple I try to highlight the fileName it works properly. I know since I "store" Everything in one field with the fileReader or (ParsingReader) my text is tokenized which is different from a file name . Here's my code ,please help me . package xxxxxxxxxxxxxxxxxxxx; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class Searcher { public static void main(String[] args) throws IllegalArgumentException, IOException, ParseException, InvalidTokenOffsetsException { System.out.println("endroit ou se situe l'index " + args[0]); System.out.println(args[1]); if (args.length != 2) { throw new IllegalArgumentException("Usage: java " + Searcher.class.getName() + " "); } String indexDir = args[0]; String q = args[1]; search(indexDir, q); } public static void search(String indexDir, String q) throws IOException, ParseException, InvalidTokenOffsetsException { Directory dir = FSDirectory.open(new File(indexDir)); IndexSearcher indexSearcher = new IndexSearcher(dir); QueryParser parserC = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30)); // QueryParser parserN = new QueryParser(Version.LUCENE_30, "filename", new StandardAnalyzer(Version.LUCENE_30)); QueryParser parserP = new QueryParser(Version.LUCENE_30, "fullpath", new StandardAnalyzer(Version.LUCENE_30)); parserC.setDefaultOperator(QueryParser.Operator.OR); // parserN.setDefaultOperator(QueryParser.Operator.OR); parserC.setPhraseSlop(10); // parserN.setPhraseSlop(10); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(6); Query query = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"contents", "filename"}, new CustomAnalyzer()).parse(q); Query queryC = parserC.parse(q); //Query queryN = parserN.parse(q); dmq.add(queryC); //dmq.add(queryN); // dmq.add(query) ; QueryScorer scorer = new QueryScorer(dmq, "contents"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); System.out.println(query.toString()); long start = System.currentTimeMillis(); TopDocs hits = indexSearcher.search(dmq, 15); System.out.println(hits.totalHits); long end = System.currentTimeMillis(); System.err.println("Found " + hits.totalHits + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); System.out.print(scoreDoc.score); System.out.println(doc.get("fullpath")); String contents = doc.get("contents"); // I am pretty sure the mistake is here , contents is always Null //But what can I do to make this thing work ? TokenStream stream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "contents", doc, new StandardAnalyzer(Version.LUCENE_30)); String fragment = highlighter.getBestFragment(stream, contents); System.out.println(fragment); } indexSearcher.close(); } } ----------------------------------------------------------------------------------------- run: endroit ou se situe l'index /home/ghart/index le humus (contents:le filename:le) (contents:humus filename:humus) 6 Found 6 document(s) (in 21 milliseconds) that matched query 'le humus': 2.0974472/home/ghart/test/hook Exception in thread "main" java.lang.NullPointerException at org.apache.lucene.search.highlight.SimpleSpanFragmenter.start(SimpleSpanFragmenter.java:103) at org.apache.lucene.search.highlight.Highlighter.getBestTextFragments(Highlighter.java:216) at org.apache.lucene.search.highlight.Highlighter.getBestFragments(Highlighter.java:158) at org.apache.lucene.search.highlight.Highlighter.getBestFragment(Highlighter.java:104) at indexer.Searcher.search(Searcher.java:98) at indexer.Searcher.main(Searcher.java:45) Java Result: 1 GÉNÉRATION TERMINÉE (durée totale 1 seconde) -- View this message in context: http://lucene.472066.n3.nabble.com/Lucene-highlighting-and-NullPointerException-tp2634525p2634525.html Sent from the Lucene - General mailing list archive at Nabble.com.