cutting 01/10/19 10:15:19 Modified: . build.properties build.xml src/java overview.html src/test/org/apache/lucene HighFreqTerms.java Log: Added source code into distribution. Revision Changes Path 1.11 +1 -1 jakarta-lucene/build.properties Index: build.properties =================================================================== RCS file: /home/cvs/jakarta-lucene/build.properties,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- build.properties 2001/10/10 16:36:11 1.10 +++ build.properties 2001/10/19 17:15:18 1.11 @@ -3,7 +3,7 @@ # --------------------------------------------------------- name=lucene Name=Lucene -version=1.2-rc1 +version=1.2-dev year=2000-2001 final.name=${name}-${version} debug=off 1.13 +10 -3 jakarta-lucene/build.xml Index: build.xml =================================================================== RCS file: /home/cvs/jakarta-lucene/build.xml,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- build.xml 2001/10/10 16:36:11 1.12 +++ build.xml 2001/10/19 17:15:18 1.13 @@ -299,8 +299,8 @@ <mkdir dir="${dist.dir}/docs"/> <mkdir dir="${dist.dir}/docs/api"/> <mkdir dir="${dist.dir}/demo"/> - <mkdir dir="${dist.dir}/demo/src"/> <mkdir dir="${dist.dir}/demo/classes"/> + <mkdir dir="${dist.dir}/src"/> <copy todir="${dist.dir}/docs"> <fileset dir="${docs.dir}"/> @@ -311,9 +311,16 @@ <copy todir="${dist.dir}/demo/classes"> <fileset dir="${build.demo.classes}"/> </copy> - <copy todir="${dist.dir}/demo/src"> - <fileset dir="${build.demo.src}"/> + + <copy todir="${dist.dir}/src"> + <fileset dir="src"/> + </copy> + <copy todir="${dist.dir}/lib"> + <fileset dir="lib"/> </copy> + <copy todir="${dist.dir}/" file="build.xml"/> + <copy todir="${dist.dir}/" file="build.properties"/> + <copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/> </target> 1.3 +168 -168 jakarta-lucene/src/java/overview.html Index: overview.html =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/overview.html,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- overview.html 2001/10/02 18:31:22 1.2 +++ overview.html 2001/10/19 17:15:19 1.3 @@ -1,168 +1,168 @@ -<html> -<head> - <title>Jakarta Lucene API</title> -</head> -<body> - -<h1>Jakarta Lucene API</h1> -The Jakarta Lucene API is divided into several packages: -<ul> -<li> -<b><a href="org/apache/lucene/util/package-summary.html">com.lucene.util</a></b> -contains a few handy data structures, e.g., <a href="org/apache/lucene/util/BitVector.html">BitVector</a> -and <a href="org/apache/lucene/util/PriorityQueue.html">PriorityQueue</a>.</li> - -<li> -<b><a href="org/apache/lucene/store/package-summary.html">com.lucene.store</a></b> -defines an abstract class for storing persistent data, the <a href="org/apache/lucene/store/Directory.html">Directory</a>, -a collection of named files written by an <a href="org/apache/lucene/store/OutputStream.html">OutputStream</a> -and read by an <a href="org/apache/lucene/store/InputStream.html">InputStream</a>. -Two implementations are provided, <a href="org/apache/lucene/store/FSDirectory.html">FSDirectory</a>, -which uses a file system directory to store files, and <a href="org/apache/lucene/store/RAMDirectory.html">RAMDirectory</a> -which implements files as memory-resident data structures.</li> - -<li> -<b><a href="org/apache/lucene/document/package-summary.html">com.lucene.document</a></b> -provides a simple <a href="org/apache/lucene/document/Document.html">Document</a> -class. A document is simply a set of named <a href="org/apache/lucene/document/Field.html">Field</a>'s, -whose values may be strings or instances of <a href="http://java.sun.com/products/jdk/1.2/docs/api/java/io/Reader.html">java.io.Reader</a>.</li> - -<li> -<b><a href="org/apache/lucene/analysis/package-summary.html">com.lucene.analysis</a></b> -defines an abstract <a href="org/apache/lucene/analysis/Analyzer.html">Analyzer</a> -API for converting text from a <a href="http://java.sun.com/products/jdk/1.2/docs/api/java/io/Reader.html">java.io.Reader</a> -into a <a href="org/apache/lucene/analysis/TokenStream.html">TokenStream</a>, -an enumeration of <a href="org/apache/lucene/analysis/Token.html">Token</a>'s. -A TokenStream is composed by applying <a href="org/apache/lucene/analysis/TokenFilter.html">TokenFilter</a>'s -to the output of a <a href="org/apache/lucene/analysis/Tokenizer.html">Tokenizer</a>. -A few simple implemenations are provided, including <a href="org/apache/lucene/analysis/StopAnalyzer.html">StopAnalyzer</a> -and the grammar-based <a href="org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a>.</li> - -<li> -<b><a href="org/apache/lucene/index/package-summary.html">com.lucene.index</a></b> -provides two primary classes: <a href="org/apache/lucene/index/IndexWriter.html">IndexWriter</a>, -which creates and adds documents to indices; and <a href="org/apache/lucene/index/IndexReader.html">IndexReader</a>, -which accesses the data in the index.</li> - -<li> -<b><a href="org/apache/lucene/search/package-summary.html">com.lucene.search</a></b> -provides data structures to represent queries (<a href="org/apache/lucene/search/TermQuery.html">TermQuery</a> -for individual words, <a href="org/apache/lucene/search/PhraseQuery.html">PhraseQuery</a> -for phrases, and <a href="org/apache/lucene/search/BooleanQuery.html">BooleanQuery</a> -for boolean combinations of queries) and the abstract <a href="org/apache/lucene/search/Searcher.html">Searcher</a> -which turns queries into <a href="org/apache/lucene/search/Hits.html">Hits</a>. -<a href="org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a> -implements search over a single IndexReader.</li> - -<li> -<b><a href="org/apache/lucene/queryParser/package-summary.html">com.lucene.queryParser</a></b> -uses <a href="http://www.suntest.com/JavaCC/">JavaCC</a> to implement a -<a href="org/apache/lucene/queryParser/QueryParser.html">QueryParser</a>.</li> -</ul> -To use Lucene, an application should: -<ol> -<li> -Create <a href="org/apache/lucene/document/Document.html">Document</a>'s by -adding -<a href="org/apache/lucene/document/Field.html">Field</a>'s.</li> - -<li> -Create an <a href="org/apache/lucene/index/IndexWriter.html">IndexWriter</a> -and add documents to to it with <a href="org/apache/lucene/index/IndexWriter.html#addDocument(com.lucene.document.Document)">addDocument()</a>;</li> - -<li> -Call <a href="org/apache/lucene/queryParser/QueryParser.html#parse(java.lang.String)">QueryParser.parse()</a> -to build a query from a string; and</li> - -<li> -Create an <a href="org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a> -and pass the query to it's <a href="org/apache/lucene/search/Searcher.html#search(com.lucene.search.Query)">search()</a> -method.</li> -</ol> -Some simple examples of code which does this are: -<ul> -<li> - <a href="../../demo/src/org/apache/lucene/FileDocument.java">FileDocument.java</a> contains -code to create a Document for a file.</li> - -<li> - <a href="../../demo/src/org/apache/lucene/IndexFiles.java">IndexFiles.java</a> creates an -index for all the files contained in a directory.</li> - -<li> - <a href="../../demo/src/org/apache/lucene/DeleteFiles.java">DeleteFiles.java</a> deletes some -of these files from the index.</li> - -<li> - <a href="../../demo/src/org/apache/lucene/SearchFiles.java">SearchFiles.java</a> prompts for -queries and searches an index.</li> -</ul> -To demonstrate these, try something like: -<blockquote><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFiles rec.food.recipes/soups</b></tt> -<br><tt>adding rec.food.recipes/soups/abalone-chowder</tt> -<br><tt> </tt>[ ... ] -<p><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFilesSearchFiles</b></tt> -<br><tt>Query: <b>chowder</b></tt> -<br><tt>Searching for: chowder</tt> -<br><tt>34 total matching documents</tt> -<br><tt>0. rec.food.recipes/soups/spam-chowder</tt> -<br><tt> </tt>[ ... thirty-four documents contain the word "chowder", -"spam-chowder" with the greatest density.] -<p><tt>Query: <b>path:chowder</b></tt> -<br><tt>Searching for: path:chowder</tt> -<br><tt>31 total matching documents</tt> -<br><tt>0. rec.food.recipes/soups/abalone-chowder</tt> -<br><tt> </tt>[ ... only thrity-one have "chowder" in the "path" -field. ] -<p><tt>Query: <b>path:"clam chowder"</b></tt> -<br><tt>Searching for: path:"clam chowder"</tt> -<br><tt>10 total matching documents</tt> -<br><tt>0. rec.food.recipes/soups/clam-chowder</tt> -<br><tt> </tt>[ ... only ten have "clam chowder" in the "path" field. -] -<p><tt>Query: <b>path:"clam chowder" AND manhattan</b></tt> -<br><tt>Searching for: +path:"clam chowder" +manhattan</tt> -<br><tt>2 total matching documents</tt> -<br><tt>0. rec.food.recipes/soups/clam-chowder</tt> -<br><tt> </tt>[ ... only two also have "manhattan" in the contents. -] -<br> [ Note: "+" and "-" are canonical, but "AND", "OR" -and "NOT" may be used. ]</blockquote> -The <a href="../../demo/src/org/apache/lucene/IndexHTML.java">IndexHtml</a> demo is more sophisticated. -It incrementally maintains an index of HTML files, adding new files as -they appear, deleting old files as they disappear and re-indexing files -as they change. -<blockquote><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFilesIndexHTML -create java/jdk1.1.6/docs/relnotes</b></tt> -<br><tt>adding java/jdk1.1.6/docs/relnotes/SMICopyright.html</tt> -<br><tt> </tt>[ ... create an index containing all the relnotes ] -<p><tt>> <b>rm java/jdk1.1.6/docs/relnotes/smicopyright.html</b></tt> -<p><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFilesIndexHTML java/jdk1.1.6/docs/relnotes</b></tt> -<br><tt>deleting java/jdk1.1.6/docs/relnotes/SMICopyright.html</tt></blockquote> -HTML indexes are searched using SUN's <a href="http://jserv.javasoft.com/products/webserver/index.html">JavaWebServer</a> -(JWS) and <a href="../../demo/src/org/apache/lucene/Search.jhtml">Search.jhtml</a>. To use -this: -<ul> -<li> -copy <tt>Search.html</tt> and <tt>Search.jhtml</tt> to JWS's <tt>public_html</tt> -directory;</li> - -<li> -copy lucene.jar to JWS's lib directory;</li> - -<li> -create and maintain your indexes with demo.IndexHTML in JWS's top-level -directory;</li> - -<li> -launch JWS, with the <tt>demo</tt> directory on CLASSPATH (only one class -is actually needed);</li> - -<li> -visit <a href="../../demo/src/org/apache/lucene/Search.html">Search.html</a>.</li> -</ul> -Note that indexes can be updated while searches are going on. <tt>Search.jhtml</tt> -will re-open the index when it is updated so that the latest version is -immediately available. -<br> -</body> -</html> +<html> +<head> + <title>Jakarta Lucene API</title> +</head> +<body> + +<h1>Jakarta Lucene API</h1> +The Jakarta Lucene API is divided into several packages: +<ul> +<li> +<b><a href="org/apache/lucene/util/package-summary.html">com.lucene.util</a></b> +contains a few handy data structures, e.g., <a href="org/apache/lucene/util/BitVector.html">BitVector</a> +and <a href="org/apache/lucene/util/PriorityQueue.html">PriorityQueue</a>.</li> + +<li> +<b><a href="org/apache/lucene/store/package-summary.html">com.lucene.store</a></b> +defines an abstract class for storing persistent data, the <a href="org/apache/lucene/store/Directory.html">Directory</a>, +a collection of named files written by an <a href="org/apache/lucene/store/OutputStream.html">OutputStream</a> +and read by an <a href="org/apache/lucene/store/InputStream.html">InputStream</a>. +Two implementations are provided, <a href="org/apache/lucene/store/FSDirectory.html">FSDirectory</a>, +which uses a file system directory to store files, and <a href="org/apache/lucene/store/RAMDirectory.html">RAMDirectory</a> +which implements files as memory-resident data structures.</li> + +<li> +<b><a href="org/apache/lucene/document/package-summary.html">com.lucene.document</a></b> +provides a simple <a href="org/apache/lucene/document/Document.html">Document</a> +class. A document is simply a set of named <a href="org/apache/lucene/document/Field.html">Field</a>'s, +whose values may be strings or instances of <a href="http://java.sun.com/products/jdk/1.2/docs/api/java/io/Reader.html">java.io.Reader</a>.</li> + +<li> +<b><a href="org/apache/lucene/analysis/package-summary.html">com.lucene.analysis</a></b> +defines an abstract <a href="org/apache/lucene/analysis/Analyzer.html">Analyzer</a> +API for converting text from a <a href="http://java.sun.com/products/jdk/1.2/docs/api/java/io/Reader.html">java.io.Reader</a> +into a <a href="org/apache/lucene/analysis/TokenStream.html">TokenStream</a>, +an enumeration of <a href="org/apache/lucene/analysis/Token.html">Token</a>'s. +A TokenStream is composed by applying <a href="org/apache/lucene/analysis/TokenFilter.html">TokenFilter</a>'s +to the output of a <a href="org/apache/lucene/analysis/Tokenizer.html">Tokenizer</a>. +A few simple implemenations are provided, including <a href="org/apache/lucene/analysis/StopAnalyzer.html">StopAnalyzer</a> +and the grammar-based <a href="org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a>.</li> + +<li> +<b><a href="org/apache/lucene/index/package-summary.html">com.lucene.index</a></b> +provides two primary classes: <a href="org/apache/lucene/index/IndexWriter.html">IndexWriter</a>, +which creates and adds documents to indices; and <a href="org/apache/lucene/index/IndexReader.html">IndexReader</a>, +which accesses the data in the index.</li> + +<li> +<b><a href="org/apache/lucene/search/package-summary.html">com.lucene.search</a></b> +provides data structures to represent queries (<a href="org/apache/lucene/search/TermQuery.html">TermQuery</a> +for individual words, <a href="org/apache/lucene/search/PhraseQuery.html">PhraseQuery</a> +for phrases, and <a href="org/apache/lucene/search/BooleanQuery.html">BooleanQuery</a> +for boolean combinations of queries) and the abstract <a href="org/apache/lucene/search/Searcher.html">Searcher</a> +which turns queries into <a href="org/apache/lucene/search/Hits.html">Hits</a>. +<a href="org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a> +implements search over a single IndexReader.</li> + +<li> +<b><a href="org/apache/lucene/queryParser/package-summary.html">com.lucene.queryParser</a></b> +uses <a href="http://www.suntest.com/JavaCC/">JavaCC</a> to implement a +<a href="org/apache/lucene/queryParser/QueryParser.html">QueryParser</a>.</li> +</ul> +To use Lucene, an application should: +<ol> +<li> +Create <a href="org/apache/lucene/document/Document.html">Document</a>'s by +adding +<a href="org/apache/lucene/document/Field.html">Field</a>'s.</li> + +<li> +Create an <a href="org/apache/lucene/index/IndexWriter.html">IndexWriter</a> +and add documents to to it with <a href="org/apache/lucene/index/IndexWriter.html#addDocument(com.lucene.document.Document)">addDocument()</a>;</li> + +<li> +Call <a href="org/apache/lucene/queryParser/QueryParser.html#parse(java.lang.String)">QueryParser.parse()</a> +to build a query from a string; and</li> + +<li> +Create an <a href="org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a> +and pass the query to it's <a href="org/apache/lucene/search/Searcher.html#search(com.lucene.search.Query)">search()</a> +method.</li> +</ol> +Some simple examples of code which does this are: +<ul> +<li> + <a href="../../src/demo/org/apache/lucene/FileDocument.java">FileDocument.java</a> contains +code to create a Document for a file.</li> + +<li> + <a href="../../src/demo/org/apache/lucene/IndexFiles.java">IndexFiles.java</a> creates an +index for all the files contained in a directory.</li> + +<li> + <a href="../../src/demo/org/apache/lucene/DeleteFiles.java">DeleteFiles.java</a> deletes some +of these files from the index.</li> + +<li> + <a href="../../src/demo/org/apache/lucene/SearchFiles.java">SearchFiles.java</a> prompts for +queries and searches an index.</li> +</ul> +To demonstrate these, try something like: +<blockquote><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFiles rec.food.recipes/soups</b></tt> +<br><tt>adding rec.food.recipes/soups/abalone-chowder</tt> +<br><tt> </tt>[ ... ] +<p><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFilesSearchFiles</b></tt> +<br><tt>Query: <b>chowder</b></tt> +<br><tt>Searching for: chowder</tt> +<br><tt>34 total matching documents</tt> +<br><tt>0. rec.food.recipes/soups/spam-chowder</tt> +<br><tt> </tt>[ ... thirty-four documents contain the word "chowder", +"spam-chowder" with the greatest density.] +<p><tt>Query: <b>path:chowder</b></tt> +<br><tt>Searching for: path:chowder</tt> +<br><tt>31 total matching documents</tt> +<br><tt>0. rec.food.recipes/soups/abalone-chowder</tt> +<br><tt> </tt>[ ... only thrity-one have "chowder" in the "path" +field. ] +<p><tt>Query: <b>path:"clam chowder"</b></tt> +<br><tt>Searching for: path:"clam chowder"</tt> +<br><tt>10 total matching documents</tt> +<br><tt>0. rec.food.recipes/soups/clam-chowder</tt> +<br><tt> </tt>[ ... only ten have "clam chowder" in the "path" field. +] +<p><tt>Query: <b>path:"clam chowder" AND manhattan</b></tt> +<br><tt>Searching for: +path:"clam chowder" +manhattan</tt> +<br><tt>2 total matching documents</tt> +<br><tt>0. rec.food.recipes/soups/clam-chowder</tt> +<br><tt> </tt>[ ... only two also have "manhattan" in the contents. +] +<br> [ Note: "+" and "-" are canonical, but "AND", "OR" +and "NOT" may be used. ]</blockquote> +The <a href="../../src/demo/org/apache/lucene/IndexHTML.java">IndexHtml</a> demo is more sophisticated. +It incrementally maintains an index of HTML files, adding new files as +they appear, deleting old files as they disappear and re-indexing files +as they change. +<blockquote><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFilesIndexHTML -create java/jdk1.1.6/docs/relnotes</b></tt> +<br><tt>adding java/jdk1.1.6/docs/relnotes/SMICopyright.html</tt> +<br><tt> </tt>[ ... create an index containing all the relnotes ] +<p><tt>> <b>rm java/jdk1.1.6/docs/relnotes/smicopyright.html</b></tt> +<p><tt>> <b>java -cp lucene.jar:demo/classes org.apache.lucene.IndexFilesIndexHTML java/jdk1.1.6/docs/relnotes</b></tt> +<br><tt>deleting java/jdk1.1.6/docs/relnotes/SMICopyright.html</tt></blockquote> +HTML indexes are searched using SUN's <a href="http://jserv.javasoft.com/products/webserver/index.html">JavaWebServer</a> +(JWS) and <a href="../../src/demo/org/apache/lucene/Search.jhtml">Search.jhtml</a>. To use +this: +<ul> +<li> +copy <tt>Search.html</tt> and <tt>Search.jhtml</tt> to JWS's <tt>public_html</tt> +directory;</li> + +<li> +copy lucene.jar to JWS's lib directory;</li> + +<li> +create and maintain your indexes with demo.IndexHTML in JWS's top-level +directory;</li> + +<li> +launch JWS, with the <tt>demo</tt> directory on CLASSPATH (only one class +is actually needed);</li> + +<li> +visit <a href="../../src/demo/org/apache/lucene/Search.html">Search.html</a>.</li> +</ul> +Note that indexes can be updated while searches are going on. <tt>Search.jhtml</tt> +will re-open the index when it is updated so that the latest version is +immediately available. +<br> +</body> +</html> 1.3 +18 -25 jakarta-lucene/src/test/org/apache/lucene/HighFreqTerms.java Index: HighFreqTerms.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/HighFreqTerms.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- HighFreqTerms.java 2001/09/18 17:35:57 1.2 +++ HighFreqTerms.java 2001/10/19 17:15:19 1.3 @@ -64,37 +64,30 @@ class HighFreqTerms { public static int numTerms = 100; - public static void main(String[] args) { - try { - Directory directory = new FSDirectory("demo index", false); - IndexReader reader = IndexReader.open(directory); + public static void main(String[] args) throws Exception { + IndexReader reader = IndexReader.open("index"); - TermInfoQueue tiq = new TermInfoQueue(numTerms); - TermEnum terms = reader.terms(); + TermInfoQueue tiq = new TermInfoQueue(numTerms); + TermEnum terms = reader.terms(); - int minFreq = 0; - while (terms.next()) { - if (terms.docFreq() > minFreq) { - tiq.put(new TermInfo(terms.term(), terms.docFreq())); - if (tiq.size() > numTerms) { // if tiq overfull - tiq.pop(); // remove lowest in tiq - minFreq = ((TermInfo)tiq.top()).docFreq; // reset minFreq - } - } + int minFreq = 0; + while (terms.next()) { + if (terms.docFreq() > minFreq) { + tiq.put(new TermInfo(terms.term(), terms.docFreq())); + if (tiq.size() > numTerms) { // if tiq overfull + tiq.pop(); // remove lowest in tiq + minFreq = ((TermInfo)tiq.top()).docFreq; // reset minFreq + } } + } - while (tiq.size() != 0) { - TermInfo termInfo = (TermInfo)tiq.pop(); - System.out.println(termInfo.term + " " + termInfo.docFreq); - } + while (tiq.size() != 0) { + TermInfo termInfo = (TermInfo)tiq.pop(); + System.out.println(termInfo.term + " " + termInfo.docFreq); + } - reader.close(); - directory.close(); + reader.close(); - } catch (Exception e) { - System.out.println(" caught a " + e.getClass() + - "\n with message: " + e.getMessage()); - } } }