/**
 * Dutch Stemmer . 
 *
 * @author    Maurits van Wijland
 */

package org.apache.lucene.analysis.nl;

import java.util.*;
import java.lang.*;
import org.apache.lucene.analysis.*;


public class DutchStemmer {
  final static int EndsWithC =  1;
  final static int EndsWithV =  2;
  final static int EndsWithVX = 3;
  final static int greaterthan2 = 4;
  
  static RuleVector rv1;
  static RuleVector rv2;
  static RuleVector rv3;
  static RuleVector rv4;
  static RuleVector rv1a;
  static RuleVector rv1b;
  static RuleVector rv1c;
  static RuleVector rv7;
  static RuleVector rv6;
  Rule testRule;
  String stemmed;

  public DutchStemmer() {
  	stemmed = new String("");
  	initRules();  	
  }


  public static void initRules() {  
  
  /* Step 1 rule set... */
  rv1 = new RuleVector();
  rv1.addElement(100, "'s", 	    "", -1, 0);  
  rv1.addElement(101,  "ts",	    "ts",    0,  0);
  rv1.addElement(102,  "s", 	    "",      0,  EndsWithC);
  rv1.addElement(103,  "ies",       "ie",    0,  0);
  rv1.addElement(104,  "eres",      "er",    0,  EndsWithC);
  rv1.addElement(105,  "ares",      "ar",    0,  EndsWithC);
  rv1.addElement(106,  "es",	    "e",     0,  EndsWithC);
  rv1.addElement(107,  "ës",	    "e",     0,  EndsWithC);   
  rv1.addElement(108,  "aus",       "au",    0,  EndsWithV);
  rv1.addElement(109,  "heden",     "heid",  0,  0);
  rv1.addElement(110,  "nden",      "nd",   -1,  0);
  rv1.addElement(111,  "nde",       "nd",   -1,  0);
  rv1.addElement(112,  "den",       "",      0,  EndsWithC);
  rv1.addElement(113,  "ien",       "i",    -1,  EndsWithV);
  rv1.addElement(114,  "jen",       "j",    -1,  EndsWithV);
  rv1.addElement(115,  "en",	    ""    ,  0,  EndsWithC);
  rv1.addElement(000,  "",   	    "",      0,  0);
  
  /* Step 2 rule set... */
  rv2 = new RuleVector();
  rv2.addElement(201,  "'tje",       "", -1,  0);
  rv2.addElement(202,  "etje",       "",0,  EndsWithC);
  rv2.addElement(203,  "rntje",      "rn",-1,  0);
  rv2.addElement(204,  "tje","",0,  EndsWithVX);
  rv2.addElement(205,  "inkje",      "ing",-1,  0);
  rv2.addElement(206,  "mpje",       "m",-1,  0);
  rv2.addElement(207,  "'je","",0,  0);
  rv2.addElement(208,  "je", "",0,  EndsWithC);
  rv2.addElement(209,  "ge", "g",0,  0);
  rv2.addElement(210,  "lijke",      "lijk",0,  0);
  rv2.addElement(211,  "ische",      "isch",0,  0);
  rv2.addElement(212,  "de", "",0,  EndsWithC);
  rv2.addElement(213,  "te", "t",0,  0);
  rv2.addElement(214,  "se", "s",0,  0);
  rv2.addElement(215,  "re", "r",0,  0);
  rv2.addElement(216,  "le", "l",0,  0);
  rv2.addElement(217,  "ene","en",0,  EndsWithC);
  rv2.addElement(218,  "ieve",       "ief",0,  0);
  rv2.addElement(000,  "", "", 0, 0); 


  /* Step 3 rule set...*/
  rv3 = new RuleVector();
  rv3.addElement(301,  "atie",      "eer",0,  0);
  rv3.addElement(302,  "iteit",     "",0,  0);
  rv3.addElement(303,  "heid",      "",0,  0);
  rv3.addElement(306,  "sel",       "",0,  0);  
  rv3.addElement(307,  "ster",      "",0,  0);
  rv3.addElement(308,  "rder",      "r",-1,  0);
  rv3.addElement(312,  "ing",       "",0,  0);
  rv3.addElement(313,  "isme",      "",0,  0);
  rv3.addElement(314,  "erij",      "",0,  0);
  rv3.addElement(315,  "arij",      "aar",0,  EndsWithC);
  rv3.addElement(316,  "fie",       "f",1,  0);
  rv3.addElement(317,  "gie",       "g",1,  0);
  rv3.addElement(318,  "tst",       "t",0,  EndsWithC);
  rv3.addElement(319,  "dst",       "d",0,  EndsWithC);
  rv3.addElement(000,  "", "",    0,  0);

  /* Step 4 rule set...*/
  rv4 = new RuleVector();
   rv4.addElement(401,  "ioneel","ie",0,  0);
   rv4.addElement(402,  "atief","eer",0,  0);
   rv4.addElement(403,  "baar","",0,  0);
   rv4.addElement(404,  "naar","n",0,  EndsWithV);
   rv4.addElement(405,  "laar","l",0,  EndsWithV);
   rv4.addElement(406,  "raar","r",0,  EndsWithV);
   rv4.addElement(407,  "tant","teer",0,  0);
   rv4.addElement(408,  "lijker","lijk",0,  0);
   rv4.addElement(409,  "lijkst","lijk",0,  0);
   rv4.addElement(410,  "achtig","",0,  0);
   rv4.addElement(410,  "achtiger","",0,  0);
   rv4.addElement(410,  "achtigst","",0,  0);
   rv4.addElement(411,  "eriger","",0,  EndsWithC);
   rv4.addElement(412,  "erigst","",0,  EndsWithC);
   rv4.addElement(413,  "iger","",0,  EndsWithC);
   rv4.addElement(414,  "igst","",0,  EndsWithC);
   rv4.addElement(415,  "erig","",0,  EndsWithC);
   rv4.addElement(416,  "ig","",0,  EndsWithC);
   rv4.addElement(417,  "end","",0,  EndsWithC);
   rv4.addElement(000,  "","",    0,  0);
   
   rv1a = new RuleVector();
   rv1a.addElement(501,  "ge","",0,  greaterthan2 );
   rv1a.addElement(000,  "","",    0,  0);

   rv1b = new RuleVector();
   rv1b.addElement(502,  "ge","",0,  greaterthan2 );
   rv1b.addElement(000,  "","",    0,  0);

   rv1c = new RuleVector();
   rv1c.addElement(503,  "nd","nd",0,  0);
   rv1c.addElement(504,  "d", "",0,  EndsWithC);
   rv1c.addElement(505,  "ht","ht",0,  0);
   rv1c.addElement(506,  "t", "",0,  EndsWithC);
   rv1c.addElement(000,  "", "", 0,  0);
   

   rv7 = new RuleVector();
   rv7.addElement(701,  "kt","k",-1,  0);
   rv7.addElement(701,  "ft","f",-1,  0);
   rv7.addElement(701,  "pt","p",-1,  0);
   rv7.addElement(000,  "","", 0,  0);
   

   rv6 = new RuleVector();
   rv6.addElement(601,  "bb", "b",-1,  0);
   rv6.addElement(602,  "cc", "c",-1,  0);
   rv6.addElement(603,  "dd", "d",-1,  0);
   rv6.addElement(604,  "ff", "f",-1,  0);
   rv6.addElement(605,  "gg", "g",-1,  0);
   rv6.addElement(606,  "hh", "h",-1,  0);
   rv6.addElement(607,  "jj", "j",-1,  0);
   rv6.addElement(608,  "kk", "k",-1,  0);
   rv6.addElement(609,  "ll", "l",-1,  0);
   rv6.addElement(610,  "mm", "m",-1,  0);
   rv6.addElement(611,  "nn", "n",-1,  0);
   rv6.addElement(612,  "pp", "p",-1,  0);
   rv6.addElement(613,  "qq", "q",-1,  0);
   rv6.addElement(614,  "rr", "r",-1,  0);
   rv6.addElement(615,  "ss", "s",-1,  0);
   rv6.addElement(616,  "tt", "t",-1,  0);
   rv6.addElement(617,  "vv", "v",-1,  0);
   rv6.addElement(618,  "ww", "w",-1,  0);
   rv6.addElement(619,  "xx", "x",-1,  0);
   rv6.addElement(620,  "zz", "z",-1,  0);
   rv6.addElement(621,  "v",  "f",-1,  0);
   rv6.addElement(622,  "z",  "s",-1,  0);
   rv6.addElement(000,  "","", 0,  0);
  
  return;
  }

  public String RemoveDia( String word ) {
  	return word;
  }
  
  /* Berekend het aantal letter grepen....nou ja zo'n beetje... */
  /* Calculate the number of sillabels */

  public int measure(String stem)
    {
        int length;
        int count;
        int i;

        i = 0;
        count = 0;
        length = stem.length();
        while( i < length )
        {
            while( i < length )
            {
                if( i > 0 )
                {
                    if( vowel( stem.charAt( i ), stem.charAt( i - 1 ) ) )
                        break;
                }
                else if( vowel( stem.charAt( i ), 'a' ) )
                    break;
                ++i;
            }
            while( i < length )
            {
                if( i > 0 )
                {
                    if( !vowel( stem.charAt( i ), stem.charAt( i - 1 ) ) )
                        break;
                }
                else if( !vowel( stem.charAt( i ), '?' ) )
                    break;
                ++i;
            }
            if( i >= length )
                continue;
            ++count;
            ++i;
        }
        return count;
    }

  public int wordSize(String stem)
    {
    	return measure(stem);
    }

    /* Check op Vowels, medeklikers... */
    /* Check for Vowels */
        private boolean vowel(char ch, char prev)
    {
        switch( ch )
        {
            case 97:
            case 101:
            case 105:
            case 111:
            case 117:
            case 137:
            case 139:
                return true;
            case 121:
                switch( prev )
                {
                    case 97:
                    case 101:
                    case 105:
                    case 111:
                    case 117:
	            case 137:
        	    case 139:
                        return false;
                    default:
                        return true;
                }
            case 106:
               if (prev == 105) return true;
               //fall trough...
            default:
                return false;
        }
    }
    
    /* Search an string on character ch */
    public boolean strSearch(String sequence, char ch)
    {
    	int i;
    	i = 0;
    	while (i < sequence.length())
    	{
    		if (sequence.charAt(i) == ch) return true;
    		i++;
    	}
    	return false;
    }

    /* Kijk of het woord een medeklinker heeft... */
    /* See if a word has a vowel... */
    public boolean containsVowel(String word)
    {
        int i;

        i = 0;
        while( i < word.length() )
        {
            if( i > 0 )
            {
                if( vowel( word.charAt( i ), word.charAt( i - 1 ) ) )
                    return true;
            }
            else if( vowel( word.charAt( 0 ), 'a' ) )
                return true;
            ++i;
        }
        return false;
    }

    /* Look for duplicate Vowel condition... */
    public boolean DupVCond(String word )
    {
    	int length;

        length = word.length();

        switch ( length ) {
          case 0:
          case 1: return ( false);
          case 2: return (!(strSearch("aeiouwxy", word.charAt(length - 1))) && 
                           (strSearch("aeou", word.charAt(length - 2))));
	  default:
		   if (  !(strSearch("aeiouwxy", word.charAt(length - 1))) &&
		         strSearch("e", word.charAt(length -2)) &&
		         strSearch("AEIOU", word.charAt(length - 3)) &&
		         length > 3) { /* == CeC */
	    	   		switch ( length ) {
	       				case 4: /* !VCeC */
		   				return( !strSearch("aeiou", word.charAt(length - 4)));		   		   
	       				case 5: /*!(aiou)CeC */
		   				return( !strSearch("aiou", word.charAt(length - 4)));
		   		
	       				default: /* ! (aiou)CeC of !C(aiou)XCeC */
	       		        
	       		        		return !(!(strSearch("aiou", word.charAt(length - 4))) || 
	       		               		(!strSearch("aiou", word.charAt(length-5))  &&
	       		               		  strSearch("aeiou", word.charAt(length-6)))); 
	       			}
            		}
	          	       
	    		else { /* CVC */
	       			return ( !(strSearch("aeiouwxy", word.charAt(length - 1))) && 
	                 		(strSearch("aeou", word.charAt(length - 2))) && 
	                		!(strSearch("aeiou", word.charAt(length - 3))));
	
	   		}
   		}
    }// DupVCond 

   private boolean wordMeetCondition(String word, int Cond) {
   	if (Cond == 0) return true;
   	
	switch(Cond) {
		case EndsWithC:
			return (endsWithC(word));	    
		case EndsWithV:
			return (endsWithV(word));	    
		case EndsWithVX:
			return (endsWithVX(word));	    
		case greaterthan2 :
			return (greaterthan2 (word));
		default:
		     return false;
	}
   }


    public boolean endsWithV(String word)
    {
    	int length;
		
    	length = word.length();
    	if (length > 1) return (vowel(word.charAt(length-1), word.charAt(length - 2)));
    	return(false);
    }
    
    public boolean endsWithVX(String word)
    {
    	int length;
    	
    	length = word.length();
    	if (length > 1) return (vowel(word.charAt(length - 2), word.charAt(length - 2)));
    	
    	return(false);
    }
    
    public boolean endsWithC(String word)
    {
    	int length;
    	
    	length = word.length();
    	if (length > 0) {
    		if (vowel(word.charAt(length-1), word.charAt(length - 2)) == true) return (false);
    	}
    	return(true);
    }
    
    public boolean greaterthan2 (String word)
    {
    	if (word.length() > 2) return true;
    	return false;
    }


   public int  DuplicateV( String word )
   {   	
   	int length;       
   	int tmpResult;  
        String tmpCharacter; 
        char tmpChar;
        StringBuffer tmpBuffer;
        
        tmpResult = -1;
        tmpBuffer = new StringBuffer();
        
        length = word.length();
   	if ( length < 2 ) {
   		stemmed = word;
   		tmpResult = -1;
   		return (tmpResult);
        } else {
        
          word = String.valueOf(word).concat(String.valueOf(word.charAt(length - 1)));
          
          length = word.length();
          
          if (word.charAt(length - 3) == 'ë') {
             tmpBuffer = new StringBuffer(word);
             tmpBuffer.setCharAt(length - 2, 'e');
             word = tmpBuffer.toString();              
             
             if (word.charAt(length - 4) == 'i') {
                tmpBuffer = new StringBuffer(word);
                tmpBuffer.setCharAt(length - 3, 'e');
                word = tmpBuffer.toString();              
             	}
          }
          else {
                tmpChar = word.charAt(length - 3);
                tmpBuffer = new StringBuffer(word);
                tmpBuffer.setCharAt(length - 2, tmpChar);
                word = tmpBuffer.toString();              
          	 
          }
	}      
	
	stemmed = word;
	tmpResult = 1;
	return tmpResult;
   }


   public int ReplaceSuffix(String word, RuleVector rules)
   {
   	 // General variables for testing of stemmer
   	 int i = 0;
   	 int id=0;
   	 int cond=0;
   	 Rule rule;
   	 String stem;
   	 // Strip and concat the suffix...
         int tmpResult;
         String suffix;
         String replace;

	 tmpResult = -1; 
	    	    	 	 
   	 if (rules.isEmpty() || word.length() < 4) {
		tmpResult = -1;
        	stemmed = word;

   		return tmpResult;
   	 }
   	 
   	 rule = new Rule();
   	 stem = word;
   	    	    	 
   	 for(i = 0; i < rules.size(); i++) {
   	    rule = (Rule)rules.elementAt(i);
   	    id = rule.getId();
   	    
   	    if (rule.getId() == 0) {
   	    	stem = word;
   	    	break;
   	    }
   	    
   	    if (word.endsWith(rule.getSuffix())) {
   	       	 suffix = rule.getSuffix();
   	       	 stem = word.substring(0, word.length() - suffix.length());
   	    if (rule.getMinrootsize() < wordSize(stem)) {
   	       cond = rule.getCondition();
   	       if (wordMeetCondition(stem, cond) == true) {
   	       	  	 replace = rule.getReplace();
   	       	  	  	       	 
   	       	  	 if (replace.length() > 0)
   	       	  	    stem = String.valueOf(stem).concat(replace);
   	       	  	 break;
   	       	}
   	       }
   	    }
   	    
  
        }
        
        tmpResult = id;
        stemmed = stem;

   	return tmpResult;
   }  


    private int ReplacePrefix(String str, RuleVector rules)
    {
   	 // General variables for testing of stemmer
   	 int i = 0;
   	 int id=0;
   	 int cond=0;
         String temp;
   	 Rule rule;
   	  
   	 // Strip and concat the suffix...
         int stepper;
         int tmpResult;
         String prefix;
	 tmpResult = -1;  
	    	    	 	 
   	 if (rules.isEmpty() || str.length() < 4) {
		tmpResult = -1;
        	stemmed = str;

   		return tmpResult;
   	 }   	 
   	 
   	 rule = new Rule();
   	 temp = str;   	 
   	 for(i = 0; i < rules.size(); i++) {
   	    rule = (Rule)rules.elementAt(i);
   	    id = rule.getId();
  	    prefix = rule.getSuffix();

   	    if (rule.getId() == 0) {
   	    	temp = str;
   	    	break;
   	    }

   	    if (rule.getMinrootsize() < wordSize(str)) {
   	       cond = rule.getCondition();
               temp = rule.getReplace() + str.substring(rule.getSuffix().length());
   	       if (wordMeetCondition(temp, cond) == true) {
         	    if (str.startsWith(prefix)) {
                       break;
                    }
               }
   	    }
   	    
   	 }
   	 tmpResult = id;
   	 stemmed = temp;
   	 return tmpResult;
    }

//Kijk naar deze functie...
   public int ReplaceInfix(String word, RuleVector rules)
   {
   	 // General variables for testing of stemmer
   	 int i = 0;
   	 int id=0;
   	 int cond=0;
   	 int j = -1;
   	 Rule rule;
   	 
   	 // Strip and concat the suffix...
         int stepper;
         String temp;
         int tmpResult;
         
         temp = word;
	 tmpResult = -1;
	    	    	 	 
   	 if (rules.isEmpty() || word.length() < 4) {
		tmpResult = -1;
        	stemmed = word;

   		return tmpResult;
   	 }
   	 
   	 rule = new Rule();
   	 
   	 for(i = 0; i < rules.size(); i++) {
   	    rule = (Rule)rules.elementAt(i);
   	    id = rule.getId();

   	    if (rule.getId() == 0) break;
   	    
   	    
        if ((j = word.indexOf(rule.getSuffix(), 1)) != -1) {
        	temp = word.substring(0, j) +
	        	       rule.getReplace() +
	    	               word.substring(j + rule.getSuffix().length());
   	    
   	    if (rule.getMinrootsize() < wordSize(temp)) {
   	       cond = rule.getCondition();
   	       if (wordMeetCondition(word, cond) == true) {
	                break;
	            }
	       }
	    }	    
	 }
        
        tmpResult = id;
        stemmed = temp;

   	return tmpResult;
   }  


public String stem( String word )   
   {
   int result;    /* which rule is fired in replacing an end */
   int cleanup; /* Part 1: Check to ensure the word is all alphabetic */
   cleanup = 0;

   stemmed = word;
     
   //Step 1 of results
   result = ReplaceSuffix( word, rv1);
   if (result > 0) cleanup = 1;
   if ( (105 == result || 115 == result) && DupVCond( stemmed ) ) {
    result = DuplicateV( stemmed );
   }
   
   
   result = ReplaceSuffix( stemmed, rv2);
   if (result > 0) cleanup = 1;
   if ( (216 == result || 217 == result) && DupVCond( stemmed ) ) {
    result = DuplicateV( stemmed );
   }
   
   result = ReplaceSuffix( stemmed, rv3);
   if (result > 0) cleanup = 1;
   if ( (302 == result || 312 == result || 313 == result || 314 == result || 316 == result || 317 == result) && DupVCond( stemmed ) )  {
    result = DuplicateV( stemmed );
   }
   
   result = ReplaceSuffix( stemmed, rv4);
   if (result > 0) cleanup = 1;
   if ( (411 == result || 412 == result || 413 == result || 414 == result || 415 == result || 416 == result || 417 == result) && DupVCond( stemmed ) ) 
   {
    result = DuplicateV( stemmed );
   }
   
   result = ReplacePrefix( stemmed, rv1a);
   if ( 501 == result) {
   	 stemmed = RemoveDia( stemmed);
   }
   if (result > 0) cleanup = 1;
   if ( (501 == result) ) {
       result = ReplaceSuffix( stemmed, rv1c);
   }
   
   result = ReplaceInfix( stemmed, rv1b);
   if (result > 0) cleanup = 1;
   if ( (502 == result) ) {
       result = ReplaceSuffix( stemmed, rv1c);
   }

   result = ReplaceSuffix( stemmed, rv7);
   if (result > 0) cleanup = 1;
   
   if ( (cleanup == 1) ) {   
     result = ReplaceSuffix( stemmed, rv6);
   }

   /* Part 3: Return an indication of successful stemming */

   return(stemmed);

   } /* Stem */


}