// The -unicode switch works by ignoring the higher order byte of the unicode
// input. Since the underlying machinery for regexp mathcing works by building
// a character table, it cannot accept unicode and so, neither can this.
// However, for UTF-8 encodings that contain ASCII only (which is the case
// with most files on the English version of Windows 98), this should work.

package com.seeingwithc.email;

import java.io.*;
import java.util.*;

import org.apache.oro.text.awk.*;
import org.apache.oro.text.regex.*;

import com.seeingwithc.io.*;

public class Emails {
	
	private static AwkMatcher matcher=new AwkMatcher();
	private static PatternCompiler compiler=new AwkCompiler();
	private static Pattern pattern=null;
	private static boolean acceptUnicode=false;
	private static boolean hasPrinted=false;
	
	public static void main(String args[]) 
	{
		if (args.length < 1) {
			System.out.println("USAGE: java Emails filename [-comma] [-unicode]");
			System.exit(1);
		}

		boolean comma=false;
		
		String strRe=new String("[a-z0-9\\.\\-\\_]+@(([a-z0-9\\-\\_])+\\.)+(" +
					"com|net|org|edu|int|mil|gov|arpa|biz|" + 
					"aero|name|coop|info|pro|museum|tv|[a-z]{2})");

		try {
			pattern = compiler.compile(strRe,AwkCompiler.CASE_INSENSITIVE_MASK);
		} catch(MalformedPatternException e) {
			System.out.println("Internal error.\n"+e);
			return;
		}

		if (args.length>=2) {
			if (args[1].equalsIgnoreCase("-comma"))
				comma=true;
			if (args[1].equalsIgnoreCase("-unicode"))
				acceptUnicode=true;
		}

		if (args.length>=3) {
			if (args[2].equalsIgnoreCase("-comma"))
				comma=true;
			if (args[2].equalsIgnoreCase("-unicode"))
				acceptUnicode=true;
		}

    		Stack list=new Stack();
                String dir = new String(args[0]);
                if (dir.length()==2 && dir.charAt(1)==':')
                	dir=new String(dir+"/");
    		list.push(dir);
    		
    		while (!list.empty()) {
    			String name=(String)list.pop();
    			File myFile=new File(name);
	        	if (!myFile.exists())
	        		continue;
	        	try {
		        	if (myFile.isFile())
		        		handleFile(myFile.getCanonicalPath(),comma);
		        }
		        catch (IOException ioe) {
		        	// unable to get canonical path; ignore
		        }
	        	if (myFile.isDirectory()) {
	        		String[] fileNames = myFile.list();
	        		for (int i=0;i<fileNames.length;i++)
	        			list.push(myFile.getAbsolutePath()+"/"+fileNames[i]);
	        	}
    		}
	}

	public static void handleFile(String fileName, boolean comma)
	{
		String content=null;
		Reader rin=null;
		try {
			InputStreamReader isr;
			if (acceptUnicode && validUnicode(fileName))
				isr = new UnicodeIgnoreByteReader(new FileInputStream(fileName),"ISO-8859-1");
			else {
				try {
					isr = new InputStreamReader(new FileInputStream(fileName),"ISO-8859-1");
				}
				catch (UnsupportedEncodingException uee)
				{
					isr = new InputStreamReader(new FileInputStream(fileName));
				}
			}
			rin = isr;
		}
		catch (Exception e) {
			// do nothing.
			return;
		}
    
		AwkStreamInput input=new AwkStreamInput(rin);
		
		try {
			while (matcher.contains(input, pattern)) {
				MatchResult result = matcher.getMatch();  
				if (comma)
					if (!hasPrinted)
						System.out.print(result);
					else
						System.out.print(", "+result);
				else
					System.out.println(result);
				hasPrinted=true;
			}
		}
		catch (IOException ioe) {
			// just ignore.
		}
	}
	
	public static boolean validUnicode(String fileName)
	{
		InputStreamReader isr = null;
		try {
			isr = new InputStreamReader(new FileInputStream(fileName),"Unicode");
			isr.read();
		}
		catch (IOException ioe) {
			return false;
		}
		catch (InternalError ierr) {
			return false;
		}
		finally {
			if (isr!=null) {
				try {
					isr.close();
				}
				catch (IOException ioe) {
					// ignore
				}
				isr = null;
			}
		}
		return true;
	}
}

