package npg1.search;

import java.util.Arrays;
import java.util.StringTokenizer;
import org.apache.lucene.index.IndexReader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;

public class Categories {

	public Vector getCategories(String index_dir) {

		try {
			IndexReader reader = IndexReader.open(index_dir);
	
			int num_docs = reader.numDocs();

			HashSet categories = new HashSet();

			String codes = null;
	
			for (int i = 0 ; i < num_docs; i++ ) {
		
				codes = reader.document(i).get("code");

				if(codes != null && codes.length() > 0) {
	
					StringTokenizer codelist = new StringTokenizer(codes);
	
					while(codelist.hasMoreTokens()) {
	
						categories.add((String)codelist.nextToken());
					}
				}
			}
			Object[] cat_array = categories.toArray();
			Arrays.sort(cat_array);
			return new Vector(Arrays.asList(cat_array));
		} catch (Exception e) {
			return null;
		}
	}

	public static void main(String[] args) {

		//arg[0] is the index directory, relative or absolute
		Categories cats = new Categories();
		Vector categories = cats.getCategories(args[0]);
		System.out.println("Found the following " + categories.size() + " unique categories");

		for(int i = 0; i < categories.size(); i++) {
			System.out.println(categories.get(i));
		}
	}

}


