
import java.io.Writer;

import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTML;
import javax.swing.text.MutableAttributeSet;


/**
 * This class is an extension of javax.swing.text.html.HTMLEditorKit.ParserCallback
 * with overridden methods that only stores text and ignores html tags.
 * This class is used by the HtmlParser.
 * @version $Revision: 1.1 $, $Date: 2002/11/18 08:27:36 $
 * @author  Ronnie Kolehmainen
 * @see     HtmlParser
 */
public class HtmlDocument extends HTMLEditorKit.ParserCallback {

    Writer text;
    boolean write = true;

    /* Default ignoretags. These tags contain nothing of interest */
    String [] ignoretags = new String [] { "style",
					   "script" };
					  

    /**
     * Constructor.
     * @param parser the HtmlParser using this document
     */
    public HtmlDocument(HtmlParser parser)
    {
	super();
	this.text = parser.textwriter;
	if(parser.ignoredtags != null) {
	    ignoretags = parser.ignoredtags;
	}
    }


   /**
     * Overridden method. Determines if the text following the tag should be
     * taken care of or ignored, i e is this an ignored tag or not.
     */
    public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos)
    {
	for(int i=0; i<ignoretags.length; i++) {
	    if(t.toString().equalsIgnoreCase(ignoretags[i])) {
		write = false;
		break; // exit on any non-wanted tag content
	    } else {
		write = true;
	    }
	}
    } 


    /**
     * Overridden method. Does nothing besides signalling internally that
     * whatever comes next might be text.
     */
    public void handleEndTag(HTML.Tag t, int pos)
    {
	write = true;
    }


    /**
     * Overridden method. Here we write contents into internal Writer for
     * later retrieval if we are not inside an ignored tag.
     * @see HtmlParser#setIgnoreTags(String[])
     */
    public void handleText(char[] data, int pos)
    {
	try {
	    if(write) {
		text.write(new String(data) + " ");
	    }
	} catch(Exception e) {
	}
    }


    /**
     * Overridden method. Does nothing besides signalling internally that
     * whatever this tag contains should be ignored.
     */
    public void handleComment(char[] data, int pos)
    {
	write = false;
    }

}
