Hi Mary, I've been out of the loop with Lucene and java for a bit so this is might not be too correct, but here is an example of how it might be accomplished (also you can see it in this gist: https://gist.github.com/rainkinz/5645139). The output looks like this:
** Also note I'm using Lucene 4.3, however I set the version to be Version.LUCENE_40 for you. I don't think the APIs are different in this case. --------------------------------------------------- Term 'mary' appears 5 in the index in doc 0 the term mary appears 1 times at positions 1 in doc 2 the term mary appears 1 times at positions 3 in doc 4 the term mary appears 1 times at positions 1 in doc 8 the term mary appears 1 times at positions 3 in doc 9 the term mary appears 1 times at positions 6 etc import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import java.io.IOException; import java.util.Random; public class CountingTerms { private static final Version VERSION = Version.LUCENE_40; private static final String[] terms = "hi am mary and i have a problem with lucene".split(" "); private final Directory indexDir = new RAMDirectory(); private String randomTerms() { Random rand = new Random(); StringBuilder sb = new StringBuilder(); int numTerms = rand.nextInt(terms.length); for (int i = 0; i < numTerms; i++) { sb.append(terms[rand.nextInt(terms.length)]).append(" "); } return sb.toString(); } private void addDocs(IndexWriter writer) throws IOException { for (int i = 0; i < 10; i++) { Document doc = new Document(); String randomStr = randomTerms(); puts("Adding random str: " + randomStr); IndexableField field = new TextField("text", randomStr, Field.Store.YES); doc.add(field); writer.addDocument(doc); } } private void countTerms() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); AtomicReader reader = indexReader.leaves().get(0).reader(); Fields fields = reader.fields(); Terms terms = fields.terms("text"); TermsEnum termsEnum = terms.iterator(null); BytesRef term; while ((term = termsEnum.next()) != null) { puts("---------------------------------------------------"); puts("Term '" + term.utf8ToString() + "' appears " + termsEnum.totalTermFreq() + " in the index"); DocsAndPositionsEnum docPosEnum = termsEnum.docsAndPositions(reader.getLiveDocs(), null, DocsAndPositionsEnum.FLAG_OFFSETS); int docid; while ((docid = docPosEnum.nextDoc()) != DocsAndPositionsEnum.NO_MORE_DOCS) { int freq = docPosEnum.freq(); int[] positions = new int[freq]; for (int i = 0; i < freq; i++) { int position = docPosEnum.nextPosition(); positions[i]=position; } puts("in doc " + docid + " the term " + term.utf8ToString() + " appears " + freq + " times at positions " + ppArray(positions)); } } indexReader.close(); } private String ppArray(int[] arr) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < arr.length; i++) { sb.append(arr[i]); if (i + 1 < arr.length) sb.append(", "); } return sb.toString(); } private void puts(Object msg) { System.out.println(msg); } private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(VERSION, new WhitespaceAnalyzer(VERSION))); addDocs(indexWriter); indexWriter.commit(); indexWriter.close(); } public static void main(String[] args) throws Exception { CountingTerms ct = new CountingTerms(); ct.index(); ct.countTerms(); } } On Fri, May 24, 2013 at 12:14 PM, mary meriem <mel-mer...@hotmail.fr> wrote: > hii am mary and i have a problem with lucene, Actually a work with lucene > 4.0.0, my problem is, how can I more listed all the terms, the display > position for each term in each document and their frequency?please help > -- Brendan Grainger www.kuripai.com