jav...

Andreas Lehmkuehler Fri, 06 Apr 2012 05:45:37 -0700

Hi,

I just realized that the headers of all new files aren't o.k., e.g. see [1]


@Adam

Do you have the time to fix this. If not, do you give me the permission tochange the headers in question?


BR
Andreas Lehmkühler

[1]http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?view=markup&pathrev=1142109



Am 02.07.2011 00:28, schrieb a...@apache.org:

Author: adam
Date: Fri Jul  1 22:28:23 2011
New Revision: 1142109

URL: http://svn.apache.org/viewvc?rev=1142109&view=rev
Log:
PDFBOX-1000: Conforming parser.  Initial commit to make it easier for others to 
test&  contribute.

Added:
     
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
     pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
     
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
     
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
     
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
     
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
     pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/
     
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
   (with props)
Modified:
     pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
     
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
     
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java 
(original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java 
Fri Jul  1 22:28:23 2011
@@ -43,7 +43,7 @@ public class COSDictionary extends COSBa
       * The name-value pairs of this dictionary. The pairs are kept in the
       * order they were added to the dictionary.
       */
-    private final Map<COSName, COSBase>  items =
+    protected final Map<COSName, COSBase>  items =
          new LinkedHashMap<COSName, COSBase>();

      /**
@@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa
      /**
       * {@inheritDoc}
       */
-    public String toString()
-    {
+    @Override
+    public String toString() {
          StringBuilder retVal = new StringBuilder("COSDictionary{");
-        for( COSName key : items.keySet() )
-        {
-            retVal.append("(" + key + ":" + getDictionaryObject(key).toString() + 
") ");
+        for(COSName key : items.keySet()) {
+            retVal.append("(");
+            retVal.append(key);
+            retVal.append(":");
+            if(getDictionaryObject(key) != null)
+                retVal.append(getDictionaryObject(key).toString());
+            else
+                retVal.append("<null>");
+            retVal.append(") ");
          }
          retVal.append("}");
          return retVal.toString();

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
 Fri Jul  1 22:28:23 2011
@@ -0,0 +1,61 @@
+/*
+ *  Copyright 2011 adam.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.cos;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.pdfparser.ConformingPDFParser;
+
+/**
+ *
+ * @author adam
+ */
+public class COSDictionaryLateBinding extends COSDictionary {
+    public static final Log log = 
LogFactory.getLog(COSDictionaryLateBinding.class);
+    ConformingPDFParser parser;
+
+    public COSDictionaryLateBinding(ConformingPDFParser parser) {
+        super();
+        this.parser = parser;
+    }
+
+    /**
+     * This will get an object from this dictionary.  If the object is a 
reference then it will
+     * dereference it and get it from the document.  If the object is COSNull 
then
+     * null will be returned.
+     * @param key The key to the object that we are getting.
+     * @return The object that matches the key.
+     */
+    @Override
+    public COSBase getDictionaryObject(COSName key) {
+        COSBase retval = items.get(key);
+        if(retval instanceof COSObject) {
+            int objectNumber = 
((COSObject)retval).getObjectNumber().intValue();
+            int generation = 
((COSObject)retval).getGenerationNumber().intValue();
+            try {
+                retval = parser.getObject(objectNumber, generation);
+            } catch(Exception e) {
+                log.warn("Unable to read information for object " + 
objectNumber);
+            }
+        }
+        if(retval instanceof COSNull) {
+            retval = null;
+        }
+        return retval;
+    }
+}

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java 
(added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java Fri 
Jul  1 22:28:23 2011
@@ -0,0 +1,100 @@
+/*
+ *  Copyright 2011 adam.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.cos;
+
+import org.apache.pdfbox.exceptions.COSVisitorException;
+import org.apache.pdfbox.pdfparser.ConformingPDFParser;
+
+/**
+ *
+ * @author adam
+ */
+public class COSUnread extends COSBase {
+    private long objectNumber;
+    private long generation;
+    private ConformingPDFParser parser;
+
+    public COSUnread() {
+        super();
+    }
+
+    public COSUnread(long objectNumber, long generation) {
+        this();
+        this.objectNumber = objectNumber;
+        this.generation = generation;
+    }
+
+    public COSUnread(long objectNumber, long generation, ConformingPDFParser 
parser) {
+        this(objectNumber, generation);
+        this.parser = parser;
+    }
+
+    @Override
+    public Object accept(ICOSVisitor visitor) throws COSVisitorException {
+        // TODO: read the object using the parser (if available) and visit 
that object
+        throw new UnsupportedOperationException("COSUnread can not be 
written/visited.");
+    }
+
+    @Override
+    public String toString() {
+        return "COSUnread{" + objectNumber + "," + generation + "}";
+    }
+
+    /**
+     * @return the objectNumber
+     */
+    public long getObjectNumber() {
+        return objectNumber;
+    }
+
+    /**
+     * @param objectNumber the objectNumber to set
+     */
+    public void setObjectNumber(long objectNumber) {
+        this.objectNumber = objectNumber;
+    }
+
+    /**
+     * @return the generation
+     */
+    public long getGeneration() {
+        return generation;
+    }
+
+    /**
+     * @param generation the generation to set
+     */
+    public void setGeneration(long generation) {
+        this.generation = generation;
+    }
+
+    /**
+     * @return the parser
+     */
+    public ConformingPDFParser getParser() {
+        return parser;
+    }
+
+    /**
+     * @param parser the parser to set
+     */
+    public void setParser(ConformingPDFParser parser) {
+        this.parser = parser;
+    }
+
+}

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java 
Fri Jul  1 22:28:23 2011
@@ -110,6 +110,10 @@ public abstract class BaseParser
       */
      protected final boolean forceParsing;

+    public BaseParser() {
+        this.forceParsing = FORCE_PARSING;
+    }
+
      /**
       * Constructor.
       *
@@ -876,7 +880,7 @@ public abstract class BaseParser
              throw new IOException("expected='/' actual='" + (char)c + "'-" + c + " 
" + pdfSource );
          }
          // costruisce il nome
-        StringBuffer buffer = new StringBuffer();
+        StringBuilder buffer = new StringBuilder();
          c = pdfSource.read();
          while( c != -1 )
          {
@@ -1063,7 +1067,7 @@ public abstract class BaseParser
          {
              if( Character.isDigit(c) || c == '-' || c == '+' || c == '.')
              {
-                StringBuffer buf = new StringBuffer();
+                StringBuilder buf = new StringBuilder();
                  int ic = pdfSource.read();
                  c = (char)ic;
                  while( Character.isDigit( c )||
@@ -1118,7 +1122,7 @@ public abstract class BaseParser
      protected String readString() throws IOException
      {
          skipSpaces();
-        StringBuffer buffer = new StringBuffer();
+        StringBuilder buffer = new StringBuilder();
          int c = pdfSource.read();
          while( !isEndOfName((char)c)&&  !isClosing(c)&&  c != -1 )
          {
@@ -1148,7 +1152,7 @@ public abstract class BaseParser
          {
              c = pdfSource.read();
          }
-        StringBuffer buffer = new StringBuffer( theString.length() );
+        StringBuilder buffer = new StringBuilder( theString.length() );
          int charsRead = 0;
          while( !isEOL(c)&&  c != -1&&  charsRead<  theString.length() )
          {
@@ -1194,7 +1198,7 @@ public abstract class BaseParser

          //average string size is around 2 and the normal string buffer size is
          //about 16 so lets save some space.
-        StringBuffer buffer = new StringBuffer(length);
+        StringBuilder buffer = new StringBuilder(length);
          while( !isWhitespace(c)&&  !isClosing(c)&&  c != -1&&  buffer.length()<  
length&&
                  c != '['&&
                  c != '<'&&
@@ -1250,7 +1254,7 @@ public abstract class BaseParser
              throw new IOException( "Error: End-of-File, expected line");
          }

-        StringBuffer buffer = new StringBuffer( 11 );
+        StringBuilder buffer = new StringBuilder( 11 );

          int c;
          while ((c = pdfSource.read()) != -1)
@@ -1300,10 +1304,9 @@ public abstract class BaseParser
      }

      /**
-     * This will tell if the next byte is whitespace or not.
-     *
+     * This will tell if the next byte is whitespace or not.  These values are
+     * specified in table 1 (page 12) of ISO 32000-1:2008.
       * @param c The character to check against whitespace
-     *
       * @return true if the next byte in the stream is a whitespace character.
       */
      protected boolean isWhitespace( int c )

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
 Fri Jul  1 22:28:23 2011
@@ -0,0 +1,696 @@
+/*
+ *  Copyright 2010 adam.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.cos.COSUnread;
+import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.io.RandomAccessFile;
+import org.apache.pdfbox.pdmodel.ConformingPDDocument;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.common.XrefEntry;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/**
+ *
+ * @author<a href="a...@apache.org">Adam Nichols</a>
+ */
+public class ConformingPDFParser extends BaseParser {
+    protected RandomAccess inputFile;
+    List<XrefEntry>  xrefEntries;
+    private long currentOffset;
+    private ConformingPDDocument doc = null;
+    private boolean throwNonConformingException = true;
+    private boolean recursivlyRead = true;
+
+    /**
+     * Constructor.
+     *
+     * @param input The input stream that contains the PDF document.
+     *
+     * @throws IOException If there is an error initializing the stream.
+     */
+    public ConformingPDFParser(File inputFile) throws IOException {
+        this.inputFile = new RandomAccessFile(inputFile, "r");
+    }
+
+    /**
+     * This will parse the stream and populate the COSDocument object.  This 
will close
+     * the stream when it is done parsing.
+     *
+     * @throws IOException If there is an error reading from the stream or 
corrupt data
+     * is found.
+     */
+    public void parse() throws IOException {
+        document = new COSDocument();
+        doc = new ConformingPDDocument(document);
+        currentOffset = inputFile.length()-1;
+        long xRefTableLocation = parseTrailerInformation();
+        currentOffset = xRefTableLocation;
+        parseXrefTable();
+        // now that we read the xref table and put null references in the doc,
+        // we can deference those objects now.
+        boolean oldValue = recursivlyRead;
+        recursivlyRead = false;
+        List<COSObjectKey>  keys = doc.getObjectKeysFromPool();
+        for(COSObjectKey key : keys) {
+            // getObject will put it into the document's object pool for us
+            getObject(key.getNumber(), key.getGeneration());
+        }
+        recursivlyRead = oldValue;
+    }
+
+    /**
+     * This will get the document that was parsed.  parse() must be called 
before this is called.
+     * When you are done with this document you must call close() on it to 
release
+     * resources.
+     *
+     * @return The document that was parsed.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public COSDocument getDocument() throws IOException {
+        if( document == null ) {
+            throw new IOException( "You must call parse() before calling 
getDocument()" );
+        }
+        return document;
+    }
+
+    /**
+     * This will get the PD document that was parsed.  When you are done with
+     * this document you must call close() on it to release resources.
+     *
+     * @return The document at the PD layer.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public PDDocument getPDDocument() throws IOException {
+        return doc;
+    }
+
+    private boolean parseXrefTable() throws IOException {
+        String currentLine = readLine();
+        if(throwNonConformingException) {
+            if(!"xref".equals(currentLine))
+                throw new AssertionError("xref table not found.\nExpected: 
xref\nFound: "+currentLine);
+        }
+
+        int objectNumber = readInt();
+        int entries = readInt();
+        xrefEntries = new ArrayList<XrefEntry>(entries);
+        for(int i=0; i<entries; i++)
+            xrefEntries.add(new XrefEntry(objectNumber++, readInt(), 
readInt(), readLine()));
+
+        return true;
+    }
+
+    protected long parseTrailerInformation() throws IOException, 
NumberFormatException {
+        long xrefLocation = -1;
+        consumeWhitespaceBackwards();
+        String currentLine = readLineBackwards();
+        if(throwNonConformingException) {
+            if(!"%%EOF".equals(currentLine))
+                throw new AssertionError("Invalid EOF marker.\nExpected: 
%%EOF\nFound: "+currentLine);
+        }
+
+        xrefLocation = readLongBackwards();
+        currentLine = readLineBackwards();
+        if(throwNonConformingException) {
+            if(!"startxref".equals(currentLine))
+                throw new AssertionError("Invalid trailer.\nExpected: 
startxref\nFound: "+currentLine);
+        }
+
+        document.setTrailer(readDictionaryBackwards());
+        consumeWhitespaceBackwards();
+        currentLine = readLineBackwards();
+        if(throwNonConformingException) {
+            if(!"trailer".equals(currentLine))
+                throw new AssertionError("Invalid trailer.\nExpected: 
trailer\nFound: "+currentLine);
+        }
+
+        return xrefLocation;
+    }
+
+    protected byte readByteBackwards() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        currentOffset--;
+        return singleByte;
+    }
+
+    protected byte readByte() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        currentOffset++;
+        return singleByte;
+    }
+
+    protected String readBackwardUntilWhitespace() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        byte singleByte = readByteBackwards();
+        while(!isWhitespace(singleByte)) {
+            sb.insert(0, (char)singleByte);
+            singleByte = readByteBackwards();
+        }
+        return sb.toString();
+    }
+
+    /**
+     * This will read all bytes (backwards) until a non-whitespace character is
+     * found.  To save you an extra read, the non-whitespace character is
+     * returned.  If the current character is not whitespace, this method will
+     * just return the current char.
+     * @return the first non-whitespace character found
+     * @throws IOException if there is an error reading from the file
+     */
+    protected byte consumeWhitespaceBackwards() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        if(!isWhitespace(singleByte))
+            return singleByte;
+
+        // we have some whitespace, let's consume it
+        while(isWhitespace(singleByte)) {
+            singleByte = readByteBackwards();
+        }
+        // readByteBackwards will decrement the currentOffset to point the byte
+        // before the one just read, so we increment it back to the current 
byte
+        currentOffset++;
+        return singleByte;
+    }
+
+    /**
+     * This will read all bytes until a non-whitespace character is
+     * found.  To save you an extra read, the non-whitespace character is
+     * returned.  If the current character is not whitespace, this method will
+     * just return the current char.
+     * @return the first non-whitespace character found
+     * @throws IOException if there is an error reading from the file
+     */
+    protected byte consumeWhitespace() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        if(!isWhitespace(singleByte))
+            return singleByte;
+
+        // we have some whitespace, let's consume it
+        while(isWhitespace(singleByte)) {
+            singleByte = readByte();
+        }
+        // readByte() will increment the currentOffset to point the byte
+        // after the one just read, so we decrement it back to the current byte
+        currentOffset--;
+        return singleByte;
+    }
+
+    /**
+     * This will consume any whitespace, read in bytes until whitespace is 
found
+     * again and then parse the characters which have been read as a long.  The
+     * current offset will then point at the first whitespace character which
+     * preceeds the number.
+     * @return the parsed number
+     * @throws IOException if there is an error reading from the file
+     * @throws NumberFormatException if the bytes read can not be converted to 
a number
+     */
+    protected long readLongBackwards() throws IOException, 
NumberFormatException {
+        StringBuilder sb = new StringBuilder();
+        consumeWhitespaceBackwards();
+        byte singleByte = readByteBackwards();
+        while(!isWhitespace(singleByte)) {
+            sb.insert(0, (char)singleByte);
+            singleByte = readByteBackwards();
+        }
+        if(sb.length() == 0)
+            throw new AssertionError("Number not found.  Expected number at offset: 
" + currentOffset);
+        return Long.parseLong(sb.toString());
+    }
+
+    @Override
+    protected int readInt() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        consumeWhitespace();
+        byte singleByte = readByte();
+        while(!isWhitespace(singleByte)) {
+            sb.append((char)singleByte);
+            singleByte = readByte();
+        }
+        if(sb.length() == 0)
+            throw new AssertionError("Number not found.  Expected number at offset: 
" + currentOffset);
+        return Integer.parseInt(sb.toString());
+    }
+
+    /**
+     * This will read in a number and return the COS version of the number (be
+     * it a COSInteger or a COSFloat).
+     * @return the COSNumber which was read/parsed
+     * @throws IOException
+     */
+    protected COSNumber readNumber() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        consumeWhitespace();
+        byte singleByte = readByte();
+        while(!isWhitespace(singleByte)) {
+            sb.append((char)singleByte);
+            singleByte = readByte();
+        }
+        if(sb.length() == 0)
+            throw new AssertionError("Number not found.  Expected number at offset: 
" + currentOffset);
+        return parseNumber(sb.toString());
+    }
+
+    protected COSNumber parseNumber(String number) throws IOException {
+        if(number.matches("^[0-9]+$"))
+            return COSInteger.get(number);
+        return new COSFloat(Float.parseFloat(number));
+    }
+
+    protected COSBase processCosObject(String string) throws IOException {
+        if(string != null&&  string.endsWith(">")) {
+            // string of hex codes
+            return COSString.createFromHexString(string.replaceAll("^<", 
"").replaceAll(">$", ""));
+        }
+        return null;
+    }
+
+    protected COSBase readObjectBackwards() throws IOException {
+        COSBase obj = null;
+        consumeWhitespaceBackwards();
+        String lastSection = readBackwardUntilWhitespace();
+        if("R".equals(lastSection)) {
+            // indirect reference
+            long gen = readLongBackwards();
+            long number = readLongBackwards();
+            // We just put a placeholder in the pool for now, we'll read the 
data later
+            doc.putObjectInPool(new COSUnread(), number, gen);
+            obj = new COSUnread(number, gen, this);
+        } else if(">>".equals(lastSection)) {
+            // dictionary
+            throw new RuntimeException("Not yet implemented");
+        } else if(lastSection != null&&  lastSection.endsWith("]")) {
+            // array
+            COSArray array = new COSArray();
+            lastSection = lastSection.replaceAll("]$", "");
+            while(!lastSection.startsWith("[")) {
+                if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
+                    array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", 
"").replaceAll(">\\s*$", "")));
+                lastSection = readBackwardUntilWhitespace();
+            }
+            lastSection = lastSection.replaceAll("^\\[", "");
+            if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
+                array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", 
"").replaceAll(">\\s*$", "")));
+            obj = array;
+        } else if(lastSection != null&&  lastSection.endsWith(">")) {
+            // string of hex codes
+            obj = processCosObject(lastSection);
+        } else {
+            // try a number, otherwise fall back on a string
+            try {
+                Long.parseLong(lastSection);
+                obj = COSNumber.get(lastSection);
+            } catch(NumberFormatException e) {
+                throw new RuntimeException("Not yet implemented");
+            }
+        }
+
+        return obj;
+    }
+
+    protected COSName readNameBackwards() throws IOException {
+        String name = readBackwardUntilWhitespace();
+        name = name.replaceAll("^/", "");
+        return COSName.getPDFName(name);
+    }
+
+    public COSBase getObject(long objectNumber, long generation) throws 
IOException {
+        // we could optionally, check to see if parse() have been called&
+        // throw an exception here, but I don't think that's really necessary
+        XrefEntry entry = xrefEntries.get((int)objectNumber);
+        currentOffset = entry.getByteOffset();
+        return readObject(objectNumber, generation);
+    }
+
+    /**
+     * This will read an object from the inputFile at whatever our 
currentOffset
+     * is.  If the object and generation are not the expected values and this
+     * object is set to throw an exception for non-conforming documents, then 
an
+     * exception will be thrown.
+     * @param objectNumber the object number you expect to read
+     * @param generation the generation you expect this object to be
+     * @return
+     */
+    public COSBase readObject(long objectNumber, long generation) throws 
IOException {
+        // when recursivly reading, we always pull the object from the 
filesystem
+        if(document != null&&  recursivlyRead) {
+            // check to see if it is in the document cache before hitting the 
filesystem
+            COSBase obj = doc.getObjectFromPool(objectNumber, generation);
+            if(obj != null)
+                return obj;
+        }
+
+        int actualObjectNumber = readInt();
+        if(objectNumber != actualObjectNumber)
+            if(throwNonConformingException)
+                throw new AssertionError("Object numer expected was " +
+                        objectNumber + " but actual was " + 
actualObjectNumber);
+        consumeWhitespace();
+
+        int actualGeneration = readInt();
+        if(generation != actualGeneration)
+            if(throwNonConformingException)
+                throw new AssertionError("Generation expected was " +
+                        generation + " but actual was " + actualGeneration);
+        consumeWhitespace();
+
+        String obj = readWord();
+        if(!"obj".equals(obj))
+            if(throwNonConformingException)
+                throw new AssertionError("Expected keyword 'obj' but found " + 
obj);
+
+        // put placeholder object in doc to prevent infinite recursion
+        // e.g. read Root ->  dereference object ->  read object which has 
/Parent ->  GOTO read Root
+        doc.putObjectInPool(new COSObject(null), objectNumber, generation);
+        COSBase object = readObject();
+        doc.putObjectInPool(object, objectNumber, generation);
+        return object;
+    }
+
+    /**
+     * This actually reads the object data.
+     * @return the object which is read
+     * @throws IOException
+     */
+    protected COSBase readObject() throws IOException {
+        consumeWhitespace();
+        String string = readWord();
+        if(string.startsWith("<<")) {
+            // this is a dictionary
+            COSDictionary dictionary = new COSDictionary();
+            boolean atEndOfDictionary = false;
+            // remove the marker for the beginning of the dictionary
+            string = string.replaceAll("^<<", "");
+
+            if("".equals(string) || string.matches("^\\w$"))
+                string = readWord().trim();
+            while(!atEndOfDictionary) {
+                COSName name = COSName.getPDFName(string);
+                COSBase object = readObject();
+                dictionary.setItem(name, object);
+
+                byte singleByte = consumeWhitespace();
+                if(singleByte == '>') {
+                    readByte(); // get rid of the second '>'
+                    atEndOfDictionary = true;
+                }
+                if(!atEndOfDictionary)
+                    string = readWord().trim();
+            }
+            return dictionary;
+        } else if(string.startsWith("/")) {
+            // it's a dictionary label. i.e. /Type or /Pages or something 
similar
+            COSBase name = COSName.getPDFName(string);
+            return name;
+        } else if(string.startsWith("-")) {
+            // it's a negitive number
+            return parseNumber(string);
+        } else if(string.charAt(0)>= '0'&&  string.charAt(0)<= '9' ) {
+            // it's a COSInt or COSFloat, or a weak reference (i.e. "3 0 R")
+            // we'll have to peek ahead a little to see if it's a reference or 
not
+            long tempOffset = this.currentOffset;
+            consumeWhitespace();
+            String tempString = readWord();
+            if(tempString.matches("^[0-9]+$")) {
+                // it is an int, might be a weak reference...
+                tempString = readWord();
+                if(!"R".equals(tempString)) {
+                    // it's just a number, not a weak reference
+                    this.currentOffset = tempOffset;
+                    return parseNumber(string);
+                }
+            } else {
+                // it's just a number, not a weak reference
+                this.currentOffset = tempOffset;
+                return parseNumber(string);
+            }
+
+            // it wasn't a number, so we need to parse the weak-reference
+            this.currentOffset = tempOffset;
+            int number = Integer.parseInt(string);
+            int gen = readInt();
+            String r = readWord();
+
+            if(!"R".equals(r))
+                if(throwNonConformingException)
+                    throw new AssertionError("Expected keyword 'R' but found " 
+ r);
+
+            if(recursivlyRead) {
+                // seek to the object, read it, seek back to current location
+                long tempLocation = this.currentOffset;
+                this.currentOffset = 
this.xrefEntries.get(number).getByteOffset();
+                COSBase returnValue = readObject(number, gen);
+                this.currentOffset = tempLocation;
+                return returnValue;
+            } else {
+                // Put a COSUnknown there as a placeholder
+                COSObject obj = new COSObject(new COSUnread());
+                obj.setObjectNumber(COSInteger.get(number));
+                obj.setGenerationNumber(COSInteger.get(gen));
+                return obj;
+            }
+        } else if(string.startsWith("]")) {
+            // end of an array, just return null
+            if("]".equals(string))
+                return null;
+            int oldLength = string.length();
+            this.currentOffset -= oldLength;
+            return null;
+        } else if(string.startsWith("[")) {
+            // array of values
+            // we'll just pay attention to the first part (this is in case 
there
+            // is no whitespace between the "[" and the first element)
+            int oldLength = string.length();
+            string = "[";
+            this.currentOffset -= (oldLength - string.length() + 1);
+
+            COSArray array = new COSArray();
+            COSBase object = readObject();
+            while(object != null) {
+                array.add(object);
+                object = readObject();
+            }
+            return array;
+        } else if(string.startsWith("(")) {
+            // this is a string (not hex encoded), strip off the '(' and read 
until ')'
+            StringBuilder sb = new StringBuilder(string.substring(1));
+            byte singleByte = readByte();
+            while(singleByte != ')') {
+                sb.append((char)singleByte);
+                singleByte = readByte();
+            }
+            return new COSString(sb.toString());
+        } else {
+            throw new RuntimeException("Not yet implemented: " + string
+                    + " loation=" + this.currentOffset);
+        }
+    }
+
+    /**
+     * This will read the next string from the stream.
+     * @return The string that was read from the stream.
+     * @throws IOException If there is an error reading from the stream.
+     */
+    @Override
+    protected String readString() throws IOException {
+        consumeWhitespace();
+        StringBuilder buffer = new StringBuilder();
+        int c = pdfSource.read();
+        while(!isEndOfName((char)c)&&  !isClosing(c)&&  c != -1) {
+            buffer.append( (char)c );
+            c = pdfSource.read();
+        }
+        if (c != -1) {
+            pdfSource.unread(c);
+        }
+        return buffer.toString();
+    }
+
+    protected COSDictionary readDictionaryBackwards() throws IOException {
+        COSDictionary dict = new COSDictionary();
+
+        // consume the last two '>' chars which signify the end of the 
dictionary
+        consumeWhitespaceBackwards();
+        byte singleByte = readByteBackwards();
+        if(throwNonConformingException) {
+            if(singleByte != '>')
+                throw new AssertionError("");
+        }
+        singleByte = readByteBackwards();
+        if(throwNonConformingException) {
+            if(singleByte != '>')
+                throw new AssertionError("");
+        }
+
+        // check to see if we're at the end of the dictionary
+        boolean atEndOfDictionary = false;
+        singleByte = consumeWhitespaceBackwards();
+        if(singleByte == '<') {
+            inputFile.seek(currentOffset-1);
+            atEndOfDictionary =  ((byte)inputFile.read()) == '<';
+        }
+
+        COSDictionary backwardsDictionary = new COSDictionary();
+        // while we're not at the end of the dictionary, read in entries
+        while(!atEndOfDictionary) {
+            COSBase object = readObjectBackwards();
+            COSName name = readNameBackwards();
+            backwardsDictionary.setItem(name, object);
+
+            singleByte = consumeWhitespaceBackwards();
+            if(singleByte == '<') {
+                inputFile.seek(currentOffset-1);
+                atEndOfDictionary =  ((byte)inputFile.read()) == '<';
+            }
+        }
+
+        // the dictionaries preserve the order keys were added, as such we 
shall
+        // add them in the proper order, not the reverse order
+        Set<COSName>  backwardsKeys = backwardsDictionary.keySet();
+        for(int i = backwardsKeys.size()-1; i>=0; i--)
+            dict.setItem((COSName)backwardsKeys.toArray()[i], 
backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i]));
+
+        // consume the last two '<' chars
+        readByteBackwards();
+        readByteBackwards();
+
+        return dict;
+    }
+
+    /**
+     * This will read a line starting with the byte at offset and going
+     * backwards until it finds a newline.  This should only be used if we are
+     * certain that the data will only be text, and not binary data.
+     *
+     * @param offset the location of the file where we should start reading
+     * @return the string which was read
+     * @throws IOException if there was an error reading data from the file
+     */
+    protected String readLineBackwards() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        boolean endOfObject = false;
+
+        do {
+            // first we read the %%EOF marker
+            byte singleByte = readByteBackwards();
+            if(singleByte == '\n') {
+                // if ther's a preceeding \r, we'll eat that as well
+                inputFile.seek(currentOffset);
+                if((byte)inputFile.read() == '\r')
+                    currentOffset--;
+                endOfObject = true;
+            } else if(singleByte == '\r') {
+                endOfObject = true;
+            } else {
+                sb.insert(0, (char)singleByte);
+            }
+        } while(!endOfObject);
+
+        return sb.toString();
+    }
+
+    /**
+     * This will read a line starting with the byte at offset and going
+     * forward until it finds a newline.  This should only be used if we are
+     * certain that the data will only be text, and not binary data.
+     * @param offset the location of the file where we should start reading
+     * @return the string which was read
+     * @throws IOException if there was an error reading data from the file
+     */
+    @Override
+    protected String readLine() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        boolean endOfLine = false;
+
+        do {
+            // first we read the %%EOF marker
+            byte singleByte = readByte();
+            if(singleByte == '\n') {
+                // if ther's a preceeding \r, we'll eat that as well
+                inputFile.seek(currentOffset);
+                if((byte)inputFile.read() == '\r')
+                    currentOffset++;
+                endOfLine = true;
+            } else if(singleByte == '\r') {
+                endOfLine = true;
+            } else {
+                sb.append((char)singleByte);
+            }
+        } while(!endOfLine);
+
+        return sb.toString();
+    }
+
+    protected String readWord() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        boolean stop = true;
+        do {
+            byte singleByte = readByte();
+            stop = this.isWhitespace(singleByte);
+
+            // there are some additional characters which indicate the next 
element/word has begun
+            // ignore the first char we read, b/c the first char is the 
beginnging of this object, not the next one
+            if(!stop&&  sb.length()>  0) {
+                stop = singleByte == '/' || singleByte == '['
+                        || singleByte == ']'
+                        || (singleByte == '>'&&  !">".equals(sb.toString()));
+                if(stop) // we're stopping on a non-whitespace char, decrement 
the
+                    this.currentOffset--; // counter so we don't miss this 
character
+            }
+            if(!stop)
+                sb.append((char)singleByte);
+        } while(!stop);
+
+        return sb.toString();
+    }
+
+    /**
+     * @return the recursivlyRead
+     */
+    public boolean isRecursivlyRead() {
+        return recursivlyRead;
+    }
+
+    /**
+     * @param recursivlyRead the recursivlyRead to set
+     */
+    public void setRecursivlyRead(boolean recursivlyRead) {
+        this.recursivlyRead = recursivlyRead;
+    }
+}

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
 Fri Jul  1 22:28:23 2011
@@ -0,0 +1,115 @@
+/*
+ *  Copyright 2011 adam.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdmodel;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.pdfparser.ConformingPDFParser;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/**
+ *
+ * @author adam
+ */
+public class ConformingPDDocument extends PDDocument {
+    /**
+     * Maps ObjectKeys to a COSObject. Note that references to these objects
+     * are also stored in COSDictionary objects that map a name to a specific 
object.
+     */
+    private final Map<COSObjectKey, COSBase>  objectPool =
+        new HashMap<COSObjectKey, COSBase>();
+    private ConformingPDFParser parser = null;
+
+    public ConformingPDDocument() throws IOException {
+        super();
+    }
+
+    public ConformingPDDocument(COSDocument doc) throws IOException {
+        super(doc);
+    }
+
+    /**
+     * This will load a document from an input stream.
+     * @param input The File which contains the document.
+     * @return The document that was loaded.
+     * @throws IOException If there is an error reading from the stream.
+     */
+    public static PDDocument load(File input) throws IOException {
+        ConformingPDFParser parser = new ConformingPDFParser(input);
+        parser.parse();
+        return parser.getPDDocument();
+    }
+
+    /**
+     * This will get an object from the pool.
+     * @param key The object key.
+     * @return The object in the pool or a new one if it has not been parsed 
yet.
+     * @throws IOException If there is an error getting the proxy object.
+     */
+    public COSBase getObjectFromPool(COSObjectKey key) throws IOException {
+        return objectPool.get(key);
+    }
+
+    /**
+     * This will get an object from the pool.
+     * @param key The object key.
+     * @return The object in the pool or a new one if it has not been parsed 
yet.
+     * @throws IOException If there is an error getting the proxy object.
+     */
+    public List<COSObjectKey>  getObjectKeysFromPool() throws IOException {
+        List<COSObjectKey>  keys = new ArrayList<COSObjectKey>();
+        for(COSObjectKey key : objectPool.keySet())
+            keys.add(key);
+        return keys;
+    }
+
+    /**
+     * This will get an object from the pool.
+     * @param number the object number
+     * @param generation the generation of this object you wish to load
+     * @return The object in the pool
+     * @throws IOException If there is an error getting the proxy object.
+     */
+    public COSBase getObjectFromPool(long number, long generation) throws 
IOException {
+        return objectPool.get(new COSObjectKey(number, generation));
+    }
+
+    public void putObjectInPool(COSBase object, long number, long generation) {
+        objectPool.put(new COSObjectKey(number, generation), object);
+    }
+
+    /**
+     * @return the parser
+     */
+    public ConformingPDFParser getParser() {
+        return parser;
+    }
+
+    /**
+     * @param parser the parser to set
+     */
+    public void setParser(ConformingPDFParser parser) {
+        this.parser = parser;
+    }
+}

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
 Fri Jul  1 22:28:23 2011
@@ -0,0 +1,43 @@
+/*
+ *  Copyright 2011 adam.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdmodel.common;
+
+/**
+ *
+ * @author adam
+ */
+public class XrefEntry {
+    private int objectNumber = 0;
+    private int byteOffset = 0;
+    private int generation = 0;
+    private boolean inUse = true;
+
+    public XrefEntry() {
+    }
+
+    public XrefEntry(int objectNumber, int byteOffset, int generation, String 
inUse) {
+        this.objectNumber = objectNumber;
+        this.byteOffset = byteOffset;
+        this.generation = generation;
+        this.inUse = "n".equals(inUse);
+    }
+
+    public int getByteOffset() {
+        return byteOffset;
+    }
+}

Added: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
 Fri Jul  1 22:28:23 2011
@@ -0,0 +1,73 @@
+/*
+ *  Copyright 2010 adam.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.File;
+import java.net.URL;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ *
+ * @author adam
+ */
+public class ConformingPDFParserTest {
+
+    public ConformingPDFParserTest() {
+    }
+
+    @BeforeClass
+    public static void setUpClass() throws Exception {
+    }
+
+    @AfterClass
+    public static void tearDownClass() throws Exception {
+    }
+
+    @Before
+    public void setUp() {
+    }
+
+    @After
+    public void tearDown() {
+    }
+
+    /**
+     * Test of parse method, of class ConformingPDFParser.
+     */
+    @Test
+    public void testParse() throws Exception {
+        URL inputUrl = 
ConformingPDFParser.class.getResource("gdb-refcard.pdf");
+        File inputFile = new File(inputUrl.toURI());
+        ConformingPDFParser instance = new ConformingPDFParser(inputFile);
+        instance.parse();
+
+        COSDictionary trailer = instance.getDocument().getTrailer();
+        assertNotNull(trailer);
+        System.out.println("Trailer: " + 
instance.getDocument().getTrailer().toString());
+        assertEquals(3, trailer.size());
+        assertNotNull(trailer.getDictionaryObject("Root"));
+        assertNotNull(trailer.getDictionaryObject("Info"));
+        assertNotNull(trailer.getDictionaryObject("Size"));
+    }
+}
\ No newline at end of file

Modified: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
 Fri Jul  1 22:28:23 2011
@@ -16,7 +16,6 @@
   */
  package org.apache.pdfbox.pdmodel;

-import java.io.File;
  import junit.framework.TestCase;

  public class TestPDDocumentCatalog extends TestCase {
@@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten
              doc = 
PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf"));
              PDDocumentCatalog cat = doc.getDocumentCatalog();
              // getLabelsByPageIndices() should not throw an exception
-            String[] labels = cat.getPageLabels().getLabelsByPageIndices();
+            cat.getPageLabels().getLabelsByPageIndices();
          } catch(Exception e) {
-            e.printStackTrace();
              fail("Threw exception!");
          } finally {
              if(doc != null)
                  doc.close();
          }
      }
+
+    /**
+     * Test case for
+     *<a href="https://issues.apache.org/jira/browse/PDFBOX-911";
+     *>PDFBOX-911</a>  - Method PDDocument.getNumberOfPages() returns wrong
+     * number of pages
+     */
+    public void testGetNumberOfPages() throws Exception {
+        PDDocument doc = null;
+        try {
+            doc = 
PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf"));
+            assertEquals(4, doc.getNumberOfPages());
+        } finally {
+            if(doc != null)
+                doc.close();
+        }
+    }
  }

Added: 
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
------------------------------------------------------------------------------
     svn:mime-type = application/octet-stream

Re: svn commit: r1142109 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ main/java/org/apache/pdfbox/pdmodel/common/ test/java/org/apache/pdfbox/pdfparser/ test/jav...

Reply via email to