import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSStream;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Collection;
import java.util.Iterator;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;

/* This is an ugly hack, that started from an example file from 'pdfbox'.
 *
 * Purpose is to display the "ToUnicode" tables in the PDF file, which
 * is provided as an argument.
 *
 * See PDF Reference, version 1.7 (2006)
 * Sect. 5.9.2 -- ToUnicode CMaps
 *
 * To compile: requires 'pdfbox' library in classpath.
 * Note a couple of warnings appear.  No nice way to fix that, as the authors
 * of 'pdfbox' don't seem to  use Java generics.
 *
 * To run, requires 'pdfbox' and 'commons-logging' libraries.  Something like
 *      java -classpath /usr/share/java/pdfbox.jar:/usr/share/java/commons-logging.jar:. PDFView path_to_pdf_file.pdf
 */

final class
PDFView
{
	public static void
	main( String[] args )
	{
		String filename = "";
		if( args.length > 0 )
		{
			filename = args[0];
		}
		else
		{
			System.err.println( "Path to PDF file is missing." );
			return;
		}

		System.out.println( "Loading document: " + filename );
		PDDocument doc = null;
		try
		{
			doc = PDDocument.load( filename );
		}
		catch( Exception e )
		{
			System.err.println( e.toString() );
			return;
		}

		PDDocumentInformation info = doc.getDocumentInformation();
		COSDictionary dict = info.getDictionary();
		//List<COSName> keylist = dict.keyList();
		Set< Map.Entry<COSName,COSBase> > entries = dict.entrySet();
		Iterator< Map.Entry<COSName,COSBase> > eit = entries.iterator();
		for( Map.Entry<COSName,COSBase> o : entries )
		{
			System.out.println( o.toString() );
		}

		try
		{
			PDDocumentCatalog cat = doc.getDocumentCatalog();
			printCatalog( cat );
		}
		catch( Exception e )
		{
			System.err.println( e.toString() );
			return;
		}
	}

	static void
	printCatalog( PDDocumentCatalog cat ) throws IOException
	{
		System.out.println( "Catalog:" + cat );

		List<PDPage> pageList = cat.getAllPages();
		System.out.println( "Total Pages: " + pageList.size() );

		for( int i = 0; i < pageList.size(); i++ )
		{
			PDPage page = pageList.get( i );
			System.out.println( "==============================" );
			System.out.println( "Page " + Integer.toString( i ) );
			printPageInfo( page );
		}
	}
	static void
	printPageInfo( PDPage page ) throws IOException
	{
		System.out.println( "Page:        " + page);
		System.out.println( "\tCropBox:   " + page.getCropBox());
		System.out.println( "\tMediaBox:  " + page.getMediaBox());
		System.out.println( "\tResources: " + page.getResources());
		System.out.println( "\tRotation:  " + page.getRotation());
		System.out.println( "\tArtBox:    " + page.getArtBox());
		System.out.println( "\tBleedBox:  " + page.getBleedBox());
		System.out.println( "\tContents:  " + page.getContents());
		System.out.println( "\tTrimBox:   " + page.getTrimBox());
		try
		{
			List<PDAnnotation> la = page.getAnnotations();
			System.out.println("\t# Annotations: " + la.size());
		}
		catch( Exception e )
		{
			System.err.println( e.toString() );
		}
		PDResources res = page.findResources();
		try
		{
			Map<String,PDFont> fonts = res.getFonts();
			Collection<PDFont> fontcollection = fonts.values();
			Iterator<PDFont> it = fontcollection.iterator();
			while( it.hasNext() )
			{
				PDFont f = it.next();
				System.out.println( f.getBaseFont() );
				if( f instanceof PDSimpleFont )
				{
					printFontInfo( (PDSimpleFont)f );
				}
			}
		}
		catch( Exception e )
		{
			System.err.println( e.toString() );
		}
	}
	static void
	printFontInfo( PDSimpleFont f ) throws IOException
	{

		COSStream tu = (COSStream)f.getToUnicode();
		Set<Map.Entry<COSName,COSBase>> es = tu.entrySet();

		System.out.println( tu.toString() );

		InputStream is = tu.getUnfilteredStream();
		InputStreamReader r = new InputStreamReader( is );
		BufferedReader br = new BufferedReader( r );
		System.out.println( "ToUnicode instructions:" );
		String rln = br.readLine();
		while( rln != null )
		{
			System.out.println( rln );
			rln = br.readLine();
		}

		Iterator<Map.Entry<COSName,COSBase>> esit = es.iterator();
		while( esit.hasNext() )
		{
			Map.Entry<COSName,COSBase> entry
				= esit.next();
			COSName k = entry.getKey();
			COSBase b = entry.getValue();
			System.out.println( "\t" + k.getName() );
			System.out.println( "\t\t" + b.toString() );
		}
	}
}

