ehatcher    2004/01/25 05:16:00

  Modified:    contributions/miscellaneous/src/java/org/apache/lucene/misc
                        HighFreqTerms.java
  Log:
  #26396 - HighFreqTerms fixup from Jean-François Halleux
  
  Revision  Changes    Path
  1.3       +54 -72    
jakarta-lucene-sandbox/contributions/miscellaneous/src/java/org/apache/lucene/misc/HighFreqTerms.java
  
  Index: HighFreqTerms.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene-sandbox/contributions/miscellaneous/src/java/org/apache/lucene/misc/HighFreqTerms.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- HighFreqTerms.java        6 Jan 2004 01:17:37 -0000       1.2
  +++ HighFreqTerms.java        25 Jan 2004 13:16:00 -0000      1.3
  @@ -3,7 +3,7 @@
   /* ====================================================================
    * The Apache Software License, Version 1.1
    *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * Copyright (c) 2001,2004 The Apache Software Foundation.  All rights
    * reserved.
    *
    * Redistribution and use in source and binary forms, with or without
  @@ -54,10 +54,10 @@
    * <http://www.apache.org/>.
    */
   
  -import org.apache.lucene.util.PriorityQueue;
   import org.apache.lucene.index.IndexReader;
   import org.apache.lucene.index.Term;
   import org.apache.lucene.index.TermEnum;
  +import org.apache.lucene.util.PriorityQueue;
   
   /**
    * <code>HighFreqTerms</code> class extracts terms and their frequencies out
  @@ -65,77 +65,59 @@
    *
    * @version $Id$
    */
  -public class HighFreqTerms
  -{
  -    public static int numTerms = 100;
  -
  -    public static void main(String[] args) throws Exception
  -    {
  -        IndexReader reader = null;
  -        if (args.length == 1)
  -        {
  -            reader = IndexReader.open(args[0]);
  -        }
  -        else
  -        {
  -            usage();
  -            System.exit(1);
  -        }
  -
  -        TermInfoQueue tiq = new TermInfoQueue(numTerms);
  -        TermEnum terms = reader.terms();
  -
  -        int minFreq = 0;
  -        while (terms.next())
  -        {
  -            if (terms.docFreq() > minFreq)
  -            {
  -                tiq.put(new TermInfo(terms.term(), terms.docFreq()));
  -                if (tiq.size() > numTerms)                // if tiq overfull
  -                {
  -                    tiq.pop();                                    // remove lowest 
in tiq
  -                    minFreq = ((TermInfo)tiq.top()).docFreq; // reset minFreq
  -                }
  -            }
  -        }
  -
  -        while (tiq.size() != 0)
  -        {
  -            TermInfo termInfo = (TermInfo)tiq.pop();
  -            System.out.println(termInfo.term + " " + termInfo.docFreq);
  -        }
  -
  -        reader.close();
  -    }
  -
  -    private static void usage()
  -    {
  -        System.out.println("\n\n" +
  -            "java org.apache.lucene.misc.HighFreqTerms <index dir>\n\n");
  -    }
  +public class HighFreqTerms {
  +     
  +     // The top numTerms will be displayed
  +     public static final int numTerms = 100;
  +
  +     public static void main(String[] args) throws Exception {
  +             IndexReader reader = null;
  +             if (args.length == 1) {
  +                     reader = IndexReader.open(args[0]);
  +             } else {
  +                     usage();
  +                     System.exit(1);
  +             }
  +
  +             TermInfoQueue tiq = new TermInfoQueue(numTerms);
  +             TermEnum terms = reader.terms();
  +
  +             while (terms.next()) {
  +                     tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
  +             }
  +
  +             while (tiq.size() != 0) {
  +                     TermInfo termInfo = (TermInfo) tiq.pop();
  +                     System.out.println(termInfo.term + " " + termInfo.docFreq);
  +             }
  +
  +             reader.close();
  +     }
  +
  +     private static void usage() {
  +             System.out.println(
  +                     "\n\n"
  +                             + "java org.apache.lucene.misc.HighFreqTerms <index 
dir>\n\n");
  +     }
   }
   
  -final class TermInfo
  -{
  -    TermInfo(Term t, int df)
  -    {
  -        term = t;
  -        docFreq = df;
  -    }
  -    int docFreq;
  -    Term term;
  +final class TermInfo {
  +     TermInfo(Term t, int df) {
  +             term = t;
  +             docFreq = df;
  +     }
  +     int docFreq;
  +     Term term;
   }
   
  -final class TermInfoQueue extends PriorityQueue
  -{
  -    TermInfoQueue(int size)
  -    {
  -        initialize(size);
  -    }
  -    protected final boolean lessThan(Object a, Object b)
  -    {
  -        TermInfo termInfoA = (TermInfo)a;
  -        TermInfo termInfoB = (TermInfo)b;
  -        return termInfoA.docFreq < termInfoB.docFreq;
  -    }
  +final class TermInfoQueue extends PriorityQueue {
  +     TermInfoQueue(int size) {
  +             initialize(size);
  +     }
  +
  +     protected final boolean lessThan(Object a, Object b) {
  +             TermInfo termInfoA = (TermInfo) a;
  +             TermInfo termInfoB = (TermInfo) b;
  +             return termInfoA.docFreq < termInfoB.docFreq;
  +     }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to