package net.charabia.normalizer;

/* ====================================================================
 * Copyright (c) 2002 Rodrigo Reyes.  All rights reserved.
 * ====================================================================
 */

import java.util.*;

/**
 * A Transducer is a complete set of rule that are applied on each
 * character of a string to process. 
 * 
 * <p> Acting as a transducer, only one rule is effectively applied at
 * a given position of the string.
 *
 * */

public class Transducer
{
    /**
     * The m_rules hashtable stores the rule, hashed with the first
     * char of the rule. For exemple, if the matching string of a rule
     * begins with "ing", the rule is stored in the hashtable using
     * the Character "i".
     */
    private Hashtable m_rules = new Hashtable();

    /**
     * The m_unrangedrules vector stores all the rules for which it is
     * not possible to have a single char specified at the first
     * position. This is the case when a range (like "[aeiou]") is at
     * the first position in the matching string.
     * */
    private Vector m_unrangedrules = new Vector();

    /**
     * Stores the so-called options. Those options are checked prior
     * to applying the rules, in order to apply some pre-processing on
     * the string (like lower-casing).  
     */
    private Hashtable m_options = new Hashtable();


    /**
     * Adds a rule in the Transducer. Each rule is added at a lower
     * priority than the previous one. It is therefore possible for a
     * rule to mask another one if the latter is subsumed by the
     * previous rule stored. For example, a rule "ed" would mask a
     * rule "eds" if added in the order ("ed", "eds") but not in the
     * order ("eds", "ed").
     *
     * @param rule the rule to be stored.
     *  */
    public void addRule(Rule rule)
    {
	TokenMatcher tokm = (TokenMatcher)((Vector)rule.getMatched()).elementAt(0);
	if (tokm instanceof RangeMatcher)
	    {
		if (((RangeMatcher)tokm).isNegative())
		    {
			m_unrangedrules.add(rule);
		    }
		else
		    {
			char[] cs = ((RangeMatcher)tokm).getChars();
			for (int i=0; i<cs.length; i++)
			    {
				Character c = new Character(cs[i]);
				Vector v = (Vector)m_rules.get(c);
				if (v == null)
				    {
					v = new Vector();
					m_rules.put(c,v);
				    }
				v.add(rule);
			    }
		    }
	    }
	else if (tokm instanceof CharMatcher)
	    {
		Character c = new Character(((CharMatcher)tokm).getChar());
		Vector v = (Vector)m_rules.get(c);
		if (v == null)
		    {
			v = new Vector();
			m_rules.put(c,v);
		    }
		v.add(rule);

	    }
    }

    public void setOption(String key, String value)
    {
	m_options.put(key.toLowerCase(),value.toLowerCase());
    }

    /**
     * Process a string by applying the transducer rules on it.
     * @param input the input string to be processed
     * @return the result of the processing, that is the string normalized.
     */

    public String process(String input)
    {
	input = applyOptions(input);
	TransducerBuffer buffer = new TransducerBuffer(input);

	while (buffer.isComplete() == false)
	    {
		Vector v = (Vector)m_rules.get(new Character(buffer.getChar()));
		if ((applyRules(v, buffer) == false) && (applyRules(m_unrangedrules, buffer) == false))
		    {
			buffer.inject(buffer.getChar());
			buffer.setIndex(buffer.getIndex()+1);
		    }
	    }
	return buffer.getOutput();
    }

    private boolean applyRules(Vector rules, TransducerBuffer buffer)
    {
	if (rules == null)
	    return false;

	for (Iterator iter = rules.iterator(); iter.hasNext(); )
	    {
		Rule rule = (Rule)iter.next();
		if (rule.matches(buffer))
		    return true;
	    }
	return false;
    }


    private String applyOptions(String word)
    {
	if ("true".equals(m_options.get("tolowercase")))
	    {
		word = word.toLowerCase();
		//	System.out.println("tolower : " + word);
	    }

	if ("true".equals(m_options.get("touppercase")))
	    {
		word = word.toUpperCase();
		// System.out.println("toupper : " + word);
	    }

	if ("true".equals(m_options.get("uniquify")))
	    {
		StringBuffer buf = new StringBuffer();
		char lastchar = 0;
		for (int i=0; i<word.length(); i++)
		    {
			char cur = word.charAt(i);
			if (cur != lastchar)
			    {
				lastchar = cur;
				buf.append(cur);
			    }
		    }
		word = buf.toString();
		// System.out.println("uniquified : " + word);
	    }

	return word;
    }

}
