hi all,
I am trying a program that could highlight a searched term and writes
the result into a demo.html file. As of now this demo.html can show
only few pages of the book . Is there any way i can use it show the
whole book. (Can increasing the fragment size upto filesize help?)
I have attached the program i am using(please use a large xml
file as it runs fine with shorter ones). F1 F1 F1
Thanks in advance.
import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.spans.SpanTermQuery;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
public class PhraseTestFile {
private IndexSearcher searcher;
private RAMDirectory directory;
@SuppressWarnings("deprecation")
public PhraseTestFile() throws Exception {
directory = new RAMDirectory();
Analyzer analyzer = new StandardAnalyzer() {
public TokenStream tokenStream(String fieldName, Reader reader)
{
return new LowerCaseTokenizer(reader);
}
public int getPositionIncrementGap(String fieldName) {
return 100;
}
};
IndexWriter writer = new IndexWriter(directory, analyzer, true);
Document doc = new Document();
String text= convertXMLFileToString("/root/Desktop/alldaisybuks/Ten_Days_That_Shook_the_World/Ten_Days_That_Shook_the_World_f1.xml");
System.out.println("The file size is : "+ text.length() );
doc.add(new Field("contents", text, Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
searcher = new IndexSearcher(directory);
String searchit="Committee";
Term t= new Term("contents",searchit);
//trying fuzzy query
Query query= new FuzzyQuery(t);
// Try a parsed query
Query parsedQuery = new QueryParser("contents",
analyzer).parse(searchit);
Hits hits = searcher.search(parsedQuery);
System.out.println("We found " + hits.length() + " hits.");
// Highlight the results
CachingTokenFilter tokenStream = new
CachingTokenFilter(analyzer.tokenStream( "contents", new
StringReader(text)));
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
SpanScorer sc = new SpanScorer(parsedQuery, "contents", tokenStream,"contents");
Highlighter highlighter = new Highlighter(formatter, sc);
highlighter.setTextFragmenter(new SimpleFragmenter(text.length()));//(new SimpleSpanFragmenter(sc));
tokenStream.reset();
String rv = highlighter.getBestFragments(tokenStream, text, text.length(),
"...");
String rv1=highlighter.getBestFragment(tokenStream,text);
FileWriter fwriter = new FileWriter("/root/Desktop/demo"); fwriter.write("<html>"); fwriter.write("<style>\n" +
".highlight {\n" +
" background: orange;\n" + "}\n" + "</style>"); fwriter.write("<body>"); fwriter.write(rv); fwriter.write("</body></html>"); fwriter.close();
System.out.println(rv1);
}
public String convertXMLFileToString(String fileName)
{
try{
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
InputStream inputStream = new FileInputStream(new File(fileName));
org.w3c.dom.Document doc = documentBuilderFactory.newDocumentBuilder().parse(inputStream);
StringWriter stw = new StringWriter();
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.transform(new DOMSource(doc), new StreamResult(stw));
return stw.toString();
}
catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
System.out.println("Starting...");
try {
new PhraseTestFile();
} catch(Exception ex) {
ex.printStackTrace();
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]