demo DeleteFiles.java HTMLDocument.java SearchFiles.java IndexHTML.java FileDocument.java

dnaber Tue, 03 Aug 2004 14:49:58 -0700

dnaber      2004/08/03 14:49:24

  Modified:    src/demo/org/apache/lucene/demo DeleteFiles.java
                        HTMLDocument.java SearchFiles.java IndexHTML.java
                        FileDocument.java
  Log:
  Store the path as a Keyword field. Also rename HTMLDocument's "url" to "path"
  and store it as a Keyword field, too. DeleteFiles now takes such a path
  as its command line parameter and works on the index directory "index", like the
  other demos.
  
  This fixes bug http://issues.apache.org/bugzilla/show_bug.cgi?id=30330
  
  Revision  Changes    Path
  1.3       +18 -9     jakarta-lucene/src/demo/org/apache/lucene/demo/DeleteFiles.java
  
  Index: DeleteFiles.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/DeleteFiles.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DeleteFiles.java  29 Mar 2004 22:48:00 -0000      1.2
  +++ DeleteFiles.java  3 Aug 2004 21:49:24 -0000       1.3
  @@ -16,27 +16,36 @@
    * limitations under the License.
    */
   
  -import java.io.IOException;
  -
   import org.apache.lucene.store.Directory;
   import org.apache.lucene.store.FSDirectory;
   import org.apache.lucene.index.IndexReader;
   import org.apache.lucene.index.Term;
  +//import org.apache.lucene.index.Term;
   
   class DeleteFiles {
  +  
     public static void main(String[] args) {
  +    String usage = "java org.apache.lucene.demo.DeleteFiles <unique_term>";
  +    if (args.length == 0) {
  +      System.err.println("Usage: " + usage);
  +      System.exit(1);
  +    }
       try {
  -      Directory directory = FSDirectory.getDirectory("demo index", false);
  +      Directory directory = FSDirectory.getDirectory("index", false);
         IndexReader reader = IndexReader.open(directory);
   
  -//       Term term = new Term("path", "pizza");
  -//       int deleted = reader.delete(term);
  +      Term term = new Term("path", args[0]);
  +      int deleted = reader.delete(term);
   
  -//       System.out.println("deleted " + deleted +
  -//                    " documents containing " + term);
  +      System.out.println("deleted " + deleted +
  +                      " documents containing " + term);
   
  -      for (int i = 0; i < reader.maxDoc(); i++)
  -     reader.delete(i);
  +      // one can also delete documents by their internal id:
  +      /*
  +      for (int i = 0; i < reader.maxDoc(); i++) {
  +        System.out.println("Deleting document with id " + i);
  +        reader.delete(i);
  +      }*/
   
         reader.close();
         directory.close();
  
  
  
  1.3       +4 -3      jakarta-lucene/src/demo/org/apache/lucene/demo/HTMLDocument.java
  
  Index: HTMLDocument.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/HTMLDocument.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- HTMLDocument.java 29 Mar 2004 22:48:00 -0000      1.2
  +++ HTMLDocument.java 3 Aug 2004 21:49:24 -0000       1.3
  @@ -45,9 +45,10 @@
       // make a new, empty document
       Document doc = new Document();
   
  -    // Add the url as a field named "url".  Use an UnIndexed field, so
  -    // that the url is just stored with the document, but is not searchable.
  -    doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/')));
  +    // Add the url as a field named "path".  Use a Keyword field, so 
  +    // that it's searchable, but so that no attempt is made
  +    // to tokenize the field into words.
  +    doc.add(Field.Keyword("path", f.getPath().replace(dirSep, '/')));
   
       // Add the last modified date of the file a field named "modified".  Use a
       // Keyword field, so that it's searchable, but so that no attempt is made
  
  
  
  1.3       +6 -9      jakarta-lucene/src/demo/org/apache/lucene/demo/SearchFiles.java
  
  Index: SearchFiles.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/SearchFiles.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SearchFiles.java  29 Mar 2004 22:48:00 -0000      1.2
  +++ SearchFiles.java  3 Aug 2004 21:49:24 -0000       1.3
  @@ -16,7 +16,6 @@
    * limitations under the License.
    */
   
  -import java.io.IOException;
   import java.io.BufferedReader;
   import java.io.InputStreamReader;
   
  @@ -56,15 +55,13 @@
            Document doc = hits.doc(i);
            String path = doc.get("path");
            if (path != null) {
  -              System.out.println(i + ". " + path);
  -         } else {
  -              String url = doc.get("url");
  -           if (url != null) {
  -             System.out.println(i + ". " + url);
  -             System.out.println("   - " + doc.get("title"));
  -           } else {
  -             System.out.println(i + ". " + "No path nor URL for this document");
  +              System.out.println((i+1) + ". " + path);
  +              String title = doc.get("title");
  +           if (title != null) {
  +             System.out.println("   Title: " + doc.get("title"));
              }
  +         } else {
  +           System.out.println((i+1) + ". " + "No path for this document");
            }
          }
   
  
  
  
  1.4       +2 -2      jakarta-lucene/src/demo/org/apache/lucene/demo/IndexHTML.java
  
  Index: IndexHTML.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/IndexHTML.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- IndexHTML.java    29 Mar 2004 22:48:00 -0000      1.3
  +++ IndexHTML.java    3 Aug 2004 21:49:24 -0000       1.4
  @@ -144,12 +144,12 @@
          uidIter.next();                         // keep matching docs
        } else if (!deleting) {                   // add new docs
          Document doc = HTMLDocument.Document(file);
  -       System.out.println("adding " + doc.get("url"));
  +       System.out.println("adding " + doc.get("path"));
        writer.addDocument(doc);
        }
         } else {                                         // creating a new index
        Document doc = HTMLDocument.Document(file);
  -     System.out.println("adding " + doc.get("url"));
  +     System.out.println("adding " + doc.get("path"));
        writer.addDocument(doc);                  // add docs unconditionally
         }
       }
  
  
  
  1.3       +4 -3      jakarta-lucene/src/demo/org/apache/lucene/demo/FileDocument.java
  
  Index: FileDocument.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/demo/org/apache/lucene/demo/FileDocument.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- FileDocument.java 29 Mar 2004 22:48:00 -0000      1.2
  +++ FileDocument.java 3 Aug 2004 21:49:24 -0000       1.3
  @@ -47,9 +47,10 @@
       // make a new, empty document
       Document doc = new Document();
   
  -    // Add the path of the file as a field named "path".  Use a Text field, so
  -    // that the index stores the path, and so that the path is searchable
  -    doc.add(Field.Text("path", f.getPath()));
  +    // Add the path of the file as a field named "path".  Use a
  +    // Keyword field, so that it's searchable, but so that no attempt is made
  +    // to tokenize the field into words.
  +    doc.add(Field.Keyword("path", f.getPath()));
   
       // Add the last modified date of the file a field named "modified".  Use a
       // Keyword field, so that it's searchable, but so that no attempt is made


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-lucene/src/demo/org/apache/lucene/demo DeleteFiles.java HTMLDocument.java SearchFiles.java IndexHTML.java FileDocument.java

Reply via email to