Hi! Eric,
Yes HighlightIt.java and HighlightTest.java works. I did attached the
file, Anyway here is the source:
1 <HTML><!-- -*-java-*- -->
2 <!-- Lucene Search Demo via CompiledPageServlet -->
3 <!-- Copyright (c) 1998,2000 Douglass R. Cutting. -->
4
5 <java type=import>
6 javax.servlet.*
7 javax.servlet.http.*
8 java.io.StringWriter
9 java.io.StringReader
10 java.io.IOException
11 org.apache.lucene.analysis.*
12 org.apache.lucene.document.*
13 org.apache.lucene.index.*
14 org.apache.lucene.search.*
15 org.apache.lucene.queryParser.*
16 org.apache.lucene.demo.*
17 org.apache.lucene.demo.html.Entities
18 org.apache.lucene.search.highlight.*
19 org.apache.lucene.analysis.standard.StandardAnalyzer
20 org.apache.lucene.search.highlight.Formatter
21 org.apache.lucene.search.highlight.Highlighter
22 org.apache.lucene.search.highlight.QueryScorer
23 org.apache.lucene.search.highlight.SimpleFragmenter
24 </java>
25
26 <java>
27 // get index from request
28 String indexName = request.getParameter("index");
29 if (indexName == null) // default to "index"
30 indexName = "/opt/dynamo/prod/hww-doc/hww/help/index";
31 IndexReader ireader = getReader(indexName);
32
33 Searcher searcher = new IndexSearcher(ireader);
34
35 // get query from request
36 String queryString = request.getParameter("query");
37 if (queryString == null)
38 throw new ServletException("no query specified");
39
40 int start = 0; // first hit to
display
41 String startString = request.getParameter("start");
42 if (startString != null)
43 start = Integer.parseInt(startString);
44
45 int hitsPerPage = 10; // number of hits to
display
46 String hitsString = request.getParameter("hitsPerPage");
47 if (hitsString != null)
48 hitsPerPage = Integer.parseInt(hitsString);
49
50 boolean showSummaries = true; // show summaries?
51 if ("false".equals(request.getParameter("showSummaries")))
52 showSummaries = false;
53
54 Query query = null;
55 try { // parse query
56 query = QueryParser.parse(queryString, "contents", analyzer);
57 } catch (ParseException e) { // error parsing query
58 </java>
59 <HEAD><TITLE>Error Parsing Query</TITLE></HEAD><BODY>
60 <p>While parsing `queryString`: `e.getMessage()`
61 <java>
62 return;
63 }
64
65 String servletPath = request.getRequestURI(); // getServletPath
should work
66 int j = servletPath.indexOf('?'); // here but doesn't,
so we
67 if (j != -1) // remove query by
hand...
68 servletPath = servletPath.substring(0, j);
69
70 </java>
71
72 <head><title>Lucene Search Results</title></head><body>
73
74 <center>
75 <form name=search action=`servletPath` method=get>
76 <input name=query size=44 value='`queryString`'>
77 <input type=hidden name=index value="`indexName`">
78 <input type=hidden name=hitsPerPage value=`hitsPerPage`>
79 <input type=hidden name=showSummaries value=`showSummaries`>
80 <input type=submit value=Search>
81 </form>
82 </center>
83 <java>
84 Hits hits = searcher.search(query); // perform query
85 int end = Math.min(hits.length(), start + hitsPerPage);
86 </java>
87
88
89
90 <p>Hits <b><java type=print>start+1</java>-<java
type=print>end</java></b>
91 (out of <java type=print>hits.length()</java> total matching documents):
92
93 <ul>
94 <java>
95 SimpleHTMLFormatter formatter =
96 new SimpleHTMLFormatter();
97 QueryScorer scorer = new QueryScorer(query);
98 query.rewrite(ireader);
99 Highlighter highlighter = new Highlighter(formatter, scorer);
100 highlighter.setTextFragmenter(new SimpleFragmenter(50));
101 String FIELD_NAME = "contents";
102 String result = "not specific";
103 String text;
104 for (int i = start; i < end; i++) { // display the hits
105 Document doc = hits.doc(i);
106 text = hits.doc(i).get(FIELD_NAME);
107 int maxNumFragmentsRequired = 5;
108 String fragmentSeparator = "...";
109 if ( text != null){
110 TokenStream tokenStream = new
StandardAnalyzer().tokenStream(FIELD_NAME, new java.io.StringReader(text));
111 result =
highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,fragmentSeparator);
112 System.out.println("text" + text + "start" + start + "end" +end +
"i" +i +"result=" +result);
113 }
114
115 String title = doc.get("title");
116 if (title.equals("")) // use url for docs
w/o title
117 title = doc.get("path");
118 </java>
119 <p><b><java type=print>(int)(hits.score(i) * 100.0f)</java>%
120 <a href="`doc.get("path")`">
121 <java type=print>Entities.encode(title)</java>
122 </b></a>
123 <hr>
124 <ul><i>Summary</i>:</ul>
125 <java>
126 if (text != null) { // maybe show summary
127 </java>
128 <java type=print>result</java>
129 }
130 elseif (showSummaries)
131 {
132 </java>
133 <java type=print>Entities.encode(doc.get("summary"))</java>
134 <java>
135 }
136 }
137 </java>
138 </ul>
139
140 <java>
141 if (end < hits.length()) { // insert next page
button
142 </java>
143 <center>
144 <form name=search action=`servletPath` method=get>
145 <input type=hidden name=query value='`queryString`'>
146 <input type=hidden name=start value=`end`>
147 <input type=hidden name=index value="`indexName`">
148 <input type=hidden name=hitsPerPage value=`hitsPerPage`>
149 <input type=hidden name=showSummaries value=`showSummaries`>
150 <input type=submit value=Next>
151 </form>
152 </center>
153 <java>
154 }
155 </java>
156
157 </body>
158
159 <java type=class>
160
161 Analyzer analyzer = new StopAnalyzer(); // used to tokenize
queries
162
163 /** Keep a cache of open IndexReader's, so that an index does not
have to
164 opened for each query. The cache re-opens an index when it has
changed
165 so that additions and deletions are visible ASAP. */
166
167 static Hashtable indexCache = new Hashtable(); // name->CachedIndex
168
169 class CachedIndex { // an entry in the
cache
170 IndexReader reader; // an open reader
171 long modified; // reader's modified
date
172
173 CachedIndex(String name) throws IOException {
174 modified = IndexReader.lastModified(name); // get modified date
175 reader = IndexReader.open(name); // open reader
176 }
177 }
178
179 IndexReader getReader(String name) throws ServletException {
180 CachedIndex index = // look in cache
181 (CachedIndex)indexCache.get(name);
182
183 try {
184 if (index != null && // check up-to-date
185 (index.modified == IndexReader.lastModified(name)))
186 return index.reader; // cache hit
187 else {
188 index = new CachedIndex(name); // cache miss
189 }
190 } catch (IOException e) {
191 StringWriter writer = new StringWriter();
192 PrintWriter pw = new PrintWriter(writer);
193 throw new ServletException("Could not open index " + name + ": " +
194 e.getClass().getName() + "--" +
195 e.getMessage());
196 }
197
198 indexCache.put(name, index); // add to cache
199 return index.reader;
200 }
201 </java>
-----Original Message-----
From: Erik Hatcher [mailto:[EMAIL PROTECTED]
Sent: Wednesday, April 06, 2005 6:45 PM
To: [email protected]
Subject: Re: HTML pages highlighter
What file do those line numbers correspond to? I'm lost.
Did the Lucene in Action highlighting code work for you?
Erik
On Apr 6, 2005, at 6:16 PM, Yagnesh Shah wrote:
> Hi! Erik,
> Yes basic seems to be working.
> a) My problem is there is a chances that query is not present in
> stored content of a file so sometimes I am getting empty strings at
> line#106 so I have to put a special check at line#109 and line#126. I
> guess this is not a problem. What you think?
> b) When I click on a doc path that was generated by line#120 and
> line#121 The files that it open do not have a searched query
> highlighted. Any suggestion for this? How I can do?
>
>
> -----Original Message-----
> From: Erik Hatcher [mailto:[EMAIL PROTECTED]
> Sent: Monday, April 04, 2005 8:45 PM
> To: [email protected]
> Subject: Re: HTML pages highlighter
>
>
> On Apr 4, 2005, at 5:35 PM, Yagnesh Shah wrote:
>> I end up purchasing your book "Lucene in Action". I have downloaded
>> your code samples. I am able to retrieve "result" only some time.
>> Below is the code I have taken from Search.jhtml in lucene demo. I
>> have 2 problem
>>
>> a) I am unable to display "result" using
>> b) When I click on the title to retrieve document I do not see my
>> query highlighted.
>
> First things first.... get something very very simple working and
> expand from there. Here is the simple code from our HighlightIt.java:
>
> TermQuery query = new TermQuery(new Term("f", "ipsum"));
> QueryScorer scorer = new QueryScorer(query);
> SimpleHTMLFormatter formatter =
> new SimpleHTMLFormatter("<span class=\"highlight\">",
> "</span>");
> Highlighter highlighter = new Highlighter(formatter, scorer);
> Fragmenter fragmenter = new SimpleFragmenter(50);
> highlighter.setTextFragmenter(fragmenter);
>
> TokenStream tokenStream = new StandardAnalyzer()
> .tokenStream("f", new StringReader(text));
>
> String result =
> highlighter.getBestFragments(tokenStream, text, 5, "...");
>
> One trick is that you must ensure the query you are passing to
> QueryScorer has been rewritten. In our simple TermQuery case, that is
> not necessary, but in a general application it is. You can call
> query.rewrite(reader) where reader is your IndexReader instance. This
> ensures that range, fuzzy, and wildcard queries are expanded and
> highlightable.
>
> I'm not sure what is wrong with the code you are trying. But again,
> start simple, just try out our HighlightIt or our HighlightTest. If
> those work fine for you then move on to integrating further with your
> index. Besides the Query.rewrite() trick, you have to be sure that the
> text you want to highlight is available. If you're pulling it from the
> index, it must be in a stored field, otherwise you need to retrieve it
> from elsewhere.
>
> Erik
>
>
>>
>> <java>
>>
>> Searcher searcher = new IndexSearcher(getReader(indexName));
>>
>> // get query from request
>> String queryString = request.getParameter("query");
>>
>> query = QueryParser.parse(queryString, "contents", analyzer);
>> Hits hits = searcher.search(query);
>> SimpleHTMLFormatter formatter =
>> new SimpleHTMLFormatter();
>> Highlighter highlighter = new Highlighter(formatter, new
>> QueryScorer(query));
>> highlighter.setTextFragmenter(new SimpleFragmenter(50));
>> String FIELD_NAME = "contents";
>>
>> for (int i = start; i < end; i++) { // display the hits
>> Document doc = hits.doc(i);
>> String text = hits.doc(i).get(FIELD_NAME);
>> int maxNumFragmentsRequired = 5;
>> String fragmentSeparator = "...";
>> if ( text != null){
>> TokenStream tokenStream = new
>> StandardAnalyzer().tokenStream(FIELD_NAME, new
>> java.io.StringReader(text));
>> String result =
>> highlighter.getBestFragments
>> (tokenStream,text,maxNumFragmentsRequired,fragmentSeparator);
>> System.out.println("result=" +result);
>> }
>>
>> String title = doc.get("title");
>> if (title.equals("")) // use url for docs
>> w/o title
>> title = doc.get("path");
>> </java>
>> <p><b><java type=print>(int)(hits.score(i) * 100.0f)</java>%
>> <a href="`doc.get("path")`">
>> <java type=print>Entities.encode(title)</java>
>> </b></a>
>> <java>
>> if (showSummaries) { // maybe show
>> summary
>> </java>
>> <ul><i>Summary</i>:
>> <java type=print>Entities.encode(doc.get("summary"))</java>
>> </ul>
>> <java>
>> }
>> }
>> </java>
>>
>>
>>
>> -----Original Message-----
>> From: Erik Hatcher [mailto:[EMAIL PROTECTED]
>> Sent: Thursday, March 31, 2005 8:04 PM
>> To: [email protected]
>> Subject: Re: HTML pages highlighter
>>
>>
>>
>> On Mar 31, 2005, at 6:36 PM, Yagnesh Shah wrote:
>>> try {
>>> fis = new FileInputStream(f);
>>> HTMLParser parser = new HTMLParser(fis);
>>>
>>> // Add the tag-stripped contents as a Reader-valued Text field
>>> so it will
>>> // get tokenized and indexed.
>>> // doc.add(new Field("contents", parser.getReader()));
>>> LineNumberReader reader = new
>>> LineNumberReader(parser.getReader());
>>> for (String l = reader.readLine(); l != null; l =
>>> reader.readLine())
>>> // System.out.println(l);
>>> doc.add(Field.Text("contents", l));
>>
>> Notice that your loop here is adding a "contents" field for *every*
>> line read since that is where the first semi-colon is.
>>
>> Look at using Luke to explore your index. Try indexing just a dummy
>> String:
>>
>> doc.add(Field.Text("contents", "some dummy text"));
>>
>> to show that it works. Always always always simplify a complicated
>> situation by doing the most obvious thing that _should_ work.
>>
>> Also, the demo Lucene code is not really designed to be used in a
>> production application (sadly), so you're better off borrowing code
>> from the many articles or our book to begin with.
>>
>> Erik
>>
>>
>>>
>>> // Add the summary as a field that is stored and returned with
>>> // hit documents for display.
>>> doc.add(new Field("summary", parser.getSummary(),
>>> Field.Store.YES, Field.Index.NO));
>>>
>>> // Add the title as a field that it can be searched and that is
>>> stored.
>>> doc.add(new Field("title", parser.getTitle(), Field.Store.YES,
>>> Field.Index.TOKENIZED));
>>> }
>>>
>>>
>>>
>>> -----Original Message-----
>>> From: Erik Hatcher [mailto:[EMAIL PROTECTED]
>>> Sent: Wednesday, March 30, 2005 7:38 PM
>>> To: [email protected]
>>> Subject: Re: HTML pages highlighter
>>>
>>>
>>>
>>> On Mar 30, 2005, at 4:46 PM, Yagnesh Shah wrote:
>>>
>>>> Hi! Eric,
>>>
>>> Erik - with a 'k' - Sorry, I let it slide once though :)
>>>
>>>> I try to modified that with this but I get compile error. Do you
>>>> have
>>>> any code snippet of highlighting code to pull the contents from the
>>>> original source?
>>>
>>> I have a whole book full of code examples :)
>>> http://www.lucenebook.com - Grab the source code and look in
>>> src/lia/tools at Highlight*.java
>>>
>>>> or Do you know how I can do field store?
>>>>
>>>> doc.add(new Field("contents", parser.getReader(),
>>>> Field.Store.YES, Field.Index.NO));
>>>
>>> You cannot store it with a Reader. You need to use
>>> Field.Text(String,
>>> String), or one of the other variations.
>>>
>>> Erik
>>>
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: [EMAIL PROTECTED]
>>> For additional commands, e-mail: [EMAIL PROTECTED]
>>>
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: [EMAIL PROTECTED]
>>> For additional commands, e-mail: [EMAIL PROTECTED]
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: [EMAIL PROTECTED]
>> For additional commands, e-mail: [EMAIL PROTECTED]
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: [EMAIL PROTECTED]
>> For additional commands, e-mail: [EMAIL PROTECTED]
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]