Re: lucene 4.0.0

Brendan Grainger Fri, 24 May 2013 10:41:30 -0700

Hi Mary,

I've been out of the loop with Lucene and java for a bit so this is might
not be too correct, but here is an example of how it might be accomplished
(also you can see it in this gist: https://gist.github.com/rainkinz/5645139).
The output looks like this:


** Also note I'm using Lucene 4.3, however I set the version to be
Version.LUCENE_40 for you. I don't think the APIs are different in this
case.

---------------------------------------------------
Term 'mary' appears 5 in the index
in doc 0 the term mary appears 1 times at positions 1
in doc 2 the term mary appears 1 times at positions 3
in doc 4 the term mary appears 1 times at positions 1
in doc 8 the term mary appears 1 times at positions 3
in doc 9 the term mary appears 1 times at positions 6
etc



import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;

import java.io.IOException;
import java.util.Random;

public class CountingTerms {

  private static final Version VERSION = Version.LUCENE_40;

  private static final String[] terms = "hi am mary and i have a problem
with lucene".split(" ");

  private final Directory indexDir = new RAMDirectory();

  private String randomTerms() {
    Random rand = new Random();
    StringBuilder sb = new StringBuilder();
    int numTerms = rand.nextInt(terms.length);
    for (int i = 0; i < numTerms; i++) {
      sb.append(terms[rand.nextInt(terms.length)]).append(" ");
    }
    return sb.toString();
  }

  private void addDocs(IndexWriter writer) throws IOException {
    for (int i = 0; i < 10; i++) {
      Document doc = new Document();
      String randomStr = randomTerms();
      puts("Adding random str: " + randomStr);
      IndexableField field = new TextField("text", randomStr,
Field.Store.YES);
      doc.add(field);
      writer.addDocument(doc);
    }
  }

  private void countTerms() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    AtomicReader reader = indexReader.leaves().get(0).reader();

    Fields fields = reader.fields();
    Terms terms = fields.terms("text");
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef term;

    while ((term = termsEnum.next()) != null) {
      puts("---------------------------------------------------");
      puts("Term '" + term.utf8ToString() + "' appears " +
termsEnum.totalTermFreq() + " in the index");
      DocsAndPositionsEnum docPosEnum =
termsEnum.docsAndPositions(reader.getLiveDocs(),
              null,
              DocsAndPositionsEnum.FLAG_OFFSETS);
      int docid;
      while ((docid = docPosEnum.nextDoc()) !=
DocsAndPositionsEnum.NO_MORE_DOCS) {

        int freq = docPosEnum.freq();
        int[] positions = new int[freq];
        for (int i = 0; i < freq; i++) {
          int position = docPosEnum.nextPosition();
          positions[i]=position;
        }

        puts("in doc " + docid + " the term " + term.utf8ToString() + "
appears " + freq + " times at positions " + ppArray(positions));
      }

    }

    indexReader.close();
  }

  private String ppArray(int[] arr) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < arr.length; i++) {
      sb.append(arr[i]);
      if (i + 1 < arr.length) sb.append(", ");
    }
    return sb.toString();
  }

  private void puts(Object msg) {
    System.out.println(msg);
  }

  private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(VERSION, new
WhitespaceAnalyzer(VERSION)));
    addDocs(indexWriter);
    indexWriter.commit();
    indexWriter.close();
  }

  public static void main(String[] args) throws Exception {
    CountingTerms ct = new CountingTerms();
    ct.index();
    ct.countTerms();
  }

}



On Fri, May 24, 2013 at 12:14 PM, mary meriem <[email protected]> wrote:

> hii am mary and i have a problem with lucene, Actually a work with lucene
> 4.0.0, my problem is, how can I more listed all the terms, the display
> position for each term in each document and their frequency?please help
>




-- 
Brendan Grainger
www.kuripai.com

Re: lucene 4.0.0

Reply via email to