jav...

Adam Nichols Sat, 07 Apr 2012 20:04:36 -0700

Headers should all be fixed as of revision 1310946.  I updated all the
headers which were non-conforming (pdmodel/common/XrefEntry.java
pdmodel/ConformingPDDocument.java cos/COSDictionaryLateBinding.java
cos/COSUnread.java).


If I missed any, let me know and I'll take care of it.

Thanks,
Adam

On 04/06/2012 08:45 AM, Andreas Lehmkuehler wrote:
> Hi,
> 
> I just realized that the headers of all new files aren't o.k., e.g. see [1]
> 
> @Adam
> Do you have the time to fix this. If not, do you give me the permission
> to change the headers in question?
> 
> BR
> Andreas Lehmkühler
> 
> [1]
> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?view=markup&pathrev=1142109
> 
> 
> 
> Am 02.07.2011 00:28, schrieb a...@apache.org:
>> Author: adam
>> Date: Fri Jul  1 22:28:23 2011
>> New Revision: 1142109
>>
>> URL: http://svn.apache.org/viewvc?rev=1142109&view=rev
>> Log:
>> PDFBOX-1000: Conforming parser.  Initial commit to make it easier for
>> others to test&  contribute.
>>
>> Added:
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>
>>      pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/
>>     
>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>   
>> (with props)
>> Modified:
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>
>>
>> Modified:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>> (original)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>> Fri Jul  1 22:28:23 2011
>> @@ -43,7 +43,7 @@ public class COSDictionary extends COSBa
>>        * The name-value pairs of this dictionary. The pairs are kept
>> in the
>>        * order they were added to the dictionary.
>>        */
>> -    private final Map<COSName, COSBase>  items =
>> +    protected final Map<COSName, COSBase>  items =
>>           new LinkedHashMap<COSName, COSBase>();
>>
>>       /**
>> @@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa
>>       /**
>>        * {@inheritDoc}
>>        */
>> -    public String toString()
>> -    {
>> +    @Override
>> +    public String toString() {
>>           StringBuilder retVal = new StringBuilder("COSDictionary{");
>> -        for( COSName key : items.keySet() )
>> -        {
>> -            retVal.append("(" + key + ":" +
>> getDictionaryObject(key).toString() + ") ");
>> +        for(COSName key : items.keySet()) {
>> +            retVal.append("(");
>> +            retVal.append(key);
>> +            retVal.append(":");
>> +            if(getDictionaryObject(key) != null)
>> +                retVal.append(getDictionaryObject(key).toString());
>> +            else
>> +                retVal.append("<null>");
>> +            retVal.append(") ");
>>           }
>>           retVal.append("}");
>>           return retVal.toString();
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,61 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.cos;
>> +
>> +import org.apache.commons.logging.Log;
>> +import org.apache.commons.logging.LogFactory;
>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class COSDictionaryLateBinding extends COSDictionary {
>> +    public static final Log log =
>> LogFactory.getLog(COSDictionaryLateBinding.class);
>> +    ConformingPDFParser parser;
>> +
>> +    public COSDictionaryLateBinding(ConformingPDFParser parser) {
>> +        super();
>> +        this.parser = parser;
>> +    }
>> +
>> +    /**
>> +     * This will get an object from this dictionary.  If the object
>> is a reference then it will
>> +     * dereference it and get it from the document.  If the object is
>> COSNull then
>> +     * null will be returned.
>> +     * @param key The key to the object that we are getting.
>> +     * @return The object that matches the key.
>> +     */
>> +    @Override
>> +    public COSBase getDictionaryObject(COSName key) {
>> +        COSBase retval = items.get(key);
>> +        if(retval instanceof COSObject) {
>> +            int objectNumber =
>> ((COSObject)retval).getObjectNumber().intValue();
>> +            int generation =
>> ((COSObject)retval).getGenerationNumber().intValue();
>> +            try {
>> +                retval = parser.getObject(objectNumber, generation);
>> +            } catch(Exception e) {
>> +                log.warn("Unable to read information for object " +
>> objectNumber);
>> +            }
>> +        }
>> +        if(retval instanceof COSNull) {
>> +            retval = null;
>> +        }
>> +        return retval;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,100 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.cos;
>> +
>> +import org.apache.pdfbox.exceptions.COSVisitorException;
>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class COSUnread extends COSBase {
>> +    private long objectNumber;
>> +    private long generation;
>> +    private ConformingPDFParser parser;
>> +
>> +    public COSUnread() {
>> +        super();
>> +    }
>> +
>> +    public COSUnread(long objectNumber, long generation) {
>> +        this();
>> +        this.objectNumber = objectNumber;
>> +        this.generation = generation;
>> +    }
>> +
>> +    public COSUnread(long objectNumber, long generation,
>> ConformingPDFParser parser) {
>> +        this(objectNumber, generation);
>> +        this.parser = parser;
>> +    }
>> +
>> +    @Override
>> +    public Object accept(ICOSVisitor visitor) throws
>> COSVisitorException {
>> +        // TODO: read the object using the parser (if available) and
>> visit that object
>> +        throw new UnsupportedOperationException("COSUnread can not be
>> written/visited.");
>> +    }
>> +
>> +    @Override
>> +    public String toString() {
>> +        return "COSUnread{" + objectNumber + "," + generation + "}";
>> +    }
>> +
>> +    /**
>> +     * @return the objectNumber
>> +     */
>> +    public long getObjectNumber() {
>> +        return objectNumber;
>> +    }
>> +
>> +    /**
>> +     * @param objectNumber the objectNumber to set
>> +     */
>> +    public void setObjectNumber(long objectNumber) {
>> +        this.objectNumber = objectNumber;
>> +    }
>> +
>> +    /**
>> +     * @return the generation
>> +     */
>> +    public long getGeneration() {
>> +        return generation;
>> +    }
>> +
>> +    /**
>> +     * @param generation the generation to set
>> +     */
>> +    public void setGeneration(long generation) {
>> +        this.generation = generation;
>> +    }
>> +
>> +    /**
>> +     * @return the parser
>> +     */
>> +    public ConformingPDFParser getParser() {
>> +        return parser;
>> +    }
>> +
>> +    /**
>> +     * @param parser the parser to set
>> +     */
>> +    public void setParser(ConformingPDFParser parser) {
>> +        this.parser = parser;
>> +    }
>> +
>> +}
>>
>> Modified:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>> (original)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>> Fri Jul  1 22:28:23 2011
>> @@ -110,6 +110,10 @@ public abstract class BaseParser
>>        */
>>       protected final boolean forceParsing;
>>
>> +    public BaseParser() {
>> +        this.forceParsing = FORCE_PARSING;
>> +    }
>> +
>>       /**
>>        * Constructor.
>>        *
>> @@ -876,7 +880,7 @@ public abstract class BaseParser
>>               throw new IOException("expected='/' actual='" + (char)c
>> + "'-" + c + " " + pdfSource );
>>           }
>>           // costruisce il nome
>> -        StringBuffer buffer = new StringBuffer();
>> +        StringBuilder buffer = new StringBuilder();
>>           c = pdfSource.read();
>>           while( c != -1 )
>>           {
>> @@ -1063,7 +1067,7 @@ public abstract class BaseParser
>>           {
>>               if( Character.isDigit(c) || c == '-' || c == '+' || c ==
>> '.')
>>               {
>> -                StringBuffer buf = new StringBuffer();
>> +                StringBuilder buf = new StringBuilder();
>>                   int ic = pdfSource.read();
>>                   c = (char)ic;
>>                   while( Character.isDigit( c )||
>> @@ -1118,7 +1122,7 @@ public abstract class BaseParser
>>       protected String readString() throws IOException
>>       {
>>           skipSpaces();
>> -        StringBuffer buffer = new StringBuffer();
>> +        StringBuilder buffer = new StringBuilder();
>>           int c = pdfSource.read();
>>           while( !isEndOfName((char)c)&&  !isClosing(c)&&  c != -1 )
>>           {
>> @@ -1148,7 +1152,7 @@ public abstract class BaseParser
>>           {
>>               c = pdfSource.read();
>>           }
>> -        StringBuffer buffer = new StringBuffer( theString.length() );
>> +        StringBuilder buffer = new StringBuilder( theString.length() );
>>           int charsRead = 0;
>>           while( !isEOL(c)&&  c != -1&&  charsRead<  theString.length() )
>>           {
>> @@ -1194,7 +1198,7 @@ public abstract class BaseParser
>>
>>           //average string size is around 2 and the normal string
>> buffer size is
>>           //about 16 so lets save some space.
>> -        StringBuffer buffer = new StringBuffer(length);
>> +        StringBuilder buffer = new StringBuilder(length);
>>           while( !isWhitespace(c)&&  !isClosing(c)&&  c != -1&& 
>> buffer.length()<  length&&
>>                   c != '['&&
>>                   c != '<'&&
>> @@ -1250,7 +1254,7 @@ public abstract class BaseParser
>>               throw new IOException( "Error: End-of-File, expected
>> line");
>>           }
>>
>> -        StringBuffer buffer = new StringBuffer( 11 );
>> +        StringBuilder buffer = new StringBuilder( 11 );
>>
>>           int c;
>>           while ((c = pdfSource.read()) != -1)
>> @@ -1300,10 +1304,9 @@ public abstract class BaseParser
>>       }
>>
>>       /**
>> -     * This will tell if the next byte is whitespace or not.
>> -     *
>> +     * This will tell if the next byte is whitespace or not.  These
>> values are
>> +     * specified in table 1 (page 12) of ISO 32000-1:2008.
>>        * @param c The character to check against whitespace
>> -     *
>>        * @return true if the next byte in the stream is a whitespace
>> character.
>>        */
>>       protected boolean isWhitespace( int c )
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,696 @@
>> +/*
>> + *  Copyright 2010 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdfparser;
>> +
>> +import java.io.File;
>> +import java.io.IOException;
>> +import java.util.ArrayList;
>> +import java.util.List;
>> +import java.util.Set;
>> +import org.apache.pdfbox.cos.COSArray;
>> +import org.apache.pdfbox.cos.COSBase;
>> +import org.apache.pdfbox.cos.COSDictionary;
>> +import org.apache.pdfbox.cos.COSDocument;
>> +import org.apache.pdfbox.cos.COSFloat;
>> +import org.apache.pdfbox.cos.COSInteger;
>> +import org.apache.pdfbox.cos.COSName;
>> +import org.apache.pdfbox.cos.COSNumber;
>> +import org.apache.pdfbox.cos.COSObject;
>> +import org.apache.pdfbox.cos.COSString;
>> +import org.apache.pdfbox.cos.COSUnread;
>> +import org.apache.pdfbox.io.RandomAccess;
>> +import org.apache.pdfbox.io.RandomAccessFile;
>> +import org.apache.pdfbox.pdmodel.ConformingPDDocument;
>> +import org.apache.pdfbox.pdmodel.PDDocument;
>> +import org.apache.pdfbox.pdmodel.common.XrefEntry;
>> +import org.apache.pdfbox.persistence.util.COSObjectKey;
>> +
>> +/**
>> + *
>> + * @author<a href="a...@apache.org">Adam Nichols</a>
>> + */
>> +public class ConformingPDFParser extends BaseParser {
>> +    protected RandomAccess inputFile;
>> +    List<XrefEntry>  xrefEntries;
>> +    private long currentOffset;
>> +    private ConformingPDDocument doc = null;
>> +    private boolean throwNonConformingException = true;
>> +    private boolean recursivlyRead = true;
>> +
>> +    /**
>> +     * Constructor.
>> +     *
>> +     * @param input The input stream that contains the PDF document.
>> +     *
>> +     * @throws IOException If there is an error initializing the stream.
>> +     */
>> +    public ConformingPDFParser(File inputFile) throws IOException {
>> +        this.inputFile = new RandomAccessFile(inputFile, "r");
>> +    }
>> +
>> +    /**
>> +     * This will parse the stream and populate the COSDocument
>> object.  This will close
>> +     * the stream when it is done parsing.
>> +     *
>> +     * @throws IOException If there is an error reading from the
>> stream or corrupt data
>> +     * is found.
>> +     */
>> +    public void parse() throws IOException {
>> +        document = new COSDocument();
>> +        doc = new ConformingPDDocument(document);
>> +        currentOffset = inputFile.length()-1;
>> +        long xRefTableLocation = parseTrailerInformation();
>> +        currentOffset = xRefTableLocation;
>> +        parseXrefTable();
>> +        // now that we read the xref table and put null references in
>> the doc,
>> +        // we can deference those objects now.
>> +        boolean oldValue = recursivlyRead;
>> +        recursivlyRead = false;
>> +        List<COSObjectKey>  keys = doc.getObjectKeysFromPool();
>> +        for(COSObjectKey key : keys) {
>> +            // getObject will put it into the document's object pool
>> for us
>> +            getObject(key.getNumber(), key.getGeneration());
>> +        }
>> +        recursivlyRead = oldValue;
>> +    }
>> +
>> +    /**
>> +     * This will get the document that was parsed.  parse() must be
>> called before this is called.
>> +     * When you are done with this document you must call close() on
>> it to release
>> +     * resources.
>> +     *
>> +     * @return The document that was parsed.
>> +     *
>> +     * @throws IOException If there is an error getting the document.
>> +     */
>> +    public COSDocument getDocument() throws IOException {
>> +        if( document == null ) {
>> +            throw new IOException( "You must call parse() before
>> calling getDocument()" );
>> +        }
>> +        return document;
>> +    }
>> +
>> +    /**
>> +     * This will get the PD document that was parsed.  When you are
>> done with
>> +     * this document you must call close() on it to release resources.
>> +     *
>> +     * @return The document at the PD layer.
>> +     *
>> +     * @throws IOException If there is an error getting the document.
>> +     */
>> +    public PDDocument getPDDocument() throws IOException {
>> +        return doc;
>> +    }
>> +
>> +    private boolean parseXrefTable() throws IOException {
>> +        String currentLine = readLine();
>> +        if(throwNonConformingException) {
>> +            if(!"xref".equals(currentLine))
>> +                throw new AssertionError("xref table not
>> found.\nExpected: xref\nFound: "+currentLine);
>> +        }
>> +
>> +        int objectNumber = readInt();
>> +        int entries = readInt();
>> +        xrefEntries = new ArrayList<XrefEntry>(entries);
>> +        for(int i=0; i<entries; i++)
>> +            xrefEntries.add(new XrefEntry(objectNumber++, readInt(),
>> readInt(), readLine()));
>> +
>> +        return true;
>> +    }
>> +
>> +    protected long parseTrailerInformation() throws IOException,
>> NumberFormatException {
>> +        long xrefLocation = -1;
>> +        consumeWhitespaceBackwards();
>> +        String currentLine = readLineBackwards();
>> +        if(throwNonConformingException) {
>> +            if(!"%%EOF".equals(currentLine))
>> +                throw new AssertionError("Invalid EOF
>> marker.\nExpected: %%EOF\nFound: "+currentLine);
>> +        }
>> +
>> +        xrefLocation = readLongBackwards();
>> +        currentLine = readLineBackwards();
>> +        if(throwNonConformingException) {
>> +            if(!"startxref".equals(currentLine))
>> +                throw new AssertionError("Invalid trailer.\nExpected:
>> startxref\nFound: "+currentLine);
>> +        }
>> +
>> +        document.setTrailer(readDictionaryBackwards());
>> +        consumeWhitespaceBackwards();
>> +        currentLine = readLineBackwards();
>> +        if(throwNonConformingException) {
>> +            if(!"trailer".equals(currentLine))
>> +                throw new AssertionError("Invalid trailer.\nExpected:
>> trailer\nFound: "+currentLine);
>> +        }
>> +
>> +        return xrefLocation;
>> +    }
>> +
>> +    protected byte readByteBackwards() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        currentOffset--;
>> +        return singleByte;
>> +    }
>> +
>> +    protected byte readByte() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        currentOffset++;
>> +        return singleByte;
>> +    }
>> +
>> +    protected String readBackwardUntilWhitespace() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        byte singleByte = readByteBackwards();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.insert(0, (char)singleByte);
>> +            singleByte = readByteBackwards();
>> +        }
>> +        return sb.toString();
>> +    }
>> +
>> +    /**
>> +     * This will read all bytes (backwards) until a non-whitespace
>> character is
>> +     * found.  To save you an extra read, the non-whitespace
>> character is
>> +     * returned.  If the current character is not whitespace, this
>> method will
>> +     * just return the current char.
>> +     * @return the first non-whitespace character found
>> +     * @throws IOException if there is an error reading from the file
>> +     */
>> +    protected byte consumeWhitespaceBackwards() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        if(!isWhitespace(singleByte))
>> +            return singleByte;
>> +
>> +        // we have some whitespace, let's consume it
>> +        while(isWhitespace(singleByte)) {
>> +            singleByte = readByteBackwards();
>> +        }
>> +        // readByteBackwards will decrement the currentOffset to
>> point the byte
>> +        // before the one just read, so we increment it back to the
>> current byte
>> +        currentOffset++;
>> +        return singleByte;
>> +    }
>> +
>> +    /**
>> +     * This will read all bytes until a non-whitespace character is
>> +     * found.  To save you an extra read, the non-whitespace
>> character is
>> +     * returned.  If the current character is not whitespace, this
>> method will
>> +     * just return the current char.
>> +     * @return the first non-whitespace character found
>> +     * @throws IOException if there is an error reading from the file
>> +     */
>> +    protected byte consumeWhitespace() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        if(!isWhitespace(singleByte))
>> +            return singleByte;
>> +
>> +        // we have some whitespace, let's consume it
>> +        while(isWhitespace(singleByte)) {
>> +            singleByte = readByte();
>> +        }
>> +        // readByte() will increment the currentOffset to point the byte
>> +        // after the one just read, so we decrement it back to the
>> current byte
>> +        currentOffset--;
>> +        return singleByte;
>> +    }
>> +
>> +    /**
>> +     * This will consume any whitespace, read in bytes until
>> whitespace is found
>> +     * again and then parse the characters which have been read as a
>> long.  The
>> +     * current offset will then point at the first whitespace
>> character which
>> +     * preceeds the number.
>> +     * @return the parsed number
>> +     * @throws IOException if there is an error reading from the file
>> +     * @throws NumberFormatException if the bytes read can not be
>> converted to a number
>> +     */
>> +    protected long readLongBackwards() throws IOException,
>> NumberFormatException {
>> +        StringBuilder sb = new StringBuilder();
>> +        consumeWhitespaceBackwards();
>> +        byte singleByte = readByteBackwards();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.insert(0, (char)singleByte);
>> +            singleByte = readByteBackwards();
>> +        }
>> +        if(sb.length() == 0)
>> +            throw new AssertionError("Number not found.  Expected
>> number at offset: " + currentOffset);
>> +        return Long.parseLong(sb.toString());
>> +    }
>> +
>> +    @Override
>> +    protected int readInt() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        consumeWhitespace();
>> +        byte singleByte = readByte();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.append((char)singleByte);
>> +            singleByte = readByte();
>> +        }
>> +        if(sb.length() == 0)
>> +            throw new AssertionError("Number not found.  Expected
>> number at offset: " + currentOffset);
>> +        return Integer.parseInt(sb.toString());
>> +    }
>> +
>> +    /**
>> +     * This will read in a number and return the COS version of the
>> number (be
>> +     * it a COSInteger or a COSFloat).
>> +     * @return the COSNumber which was read/parsed
>> +     * @throws IOException
>> +     */
>> +    protected COSNumber readNumber() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        consumeWhitespace();
>> +        byte singleByte = readByte();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.append((char)singleByte);
>> +            singleByte = readByte();
>> +        }
>> +        if(sb.length() == 0)
>> +            throw new AssertionError("Number not found.  Expected
>> number at offset: " + currentOffset);
>> +        return parseNumber(sb.toString());
>> +    }
>> +
>> +    protected COSNumber parseNumber(String number) throws IOException {
>> +        if(number.matches("^[0-9]+$"))
>> +            return COSInteger.get(number);
>> +        return new COSFloat(Float.parseFloat(number));
>> +    }
>> +
>> +    protected COSBase processCosObject(String string) throws
>> IOException {
>> +        if(string != null&&  string.endsWith(">")) {
>> +            // string of hex codes
>> +            return
>> COSString.createFromHexString(string.replaceAll("^<",
>> "").replaceAll(">$", ""));
>> +        }
>> +        return null;
>> +    }
>> +
>> +    protected COSBase readObjectBackwards() throws IOException {
>> +        COSBase obj = null;
>> +        consumeWhitespaceBackwards();
>> +        String lastSection = readBackwardUntilWhitespace();
>> +        if("R".equals(lastSection)) {
>> +            // indirect reference
>> +            long gen = readLongBackwards();
>> +            long number = readLongBackwards();
>> +            // We just put a placeholder in the pool for now, we'll
>> read the data later
>> +            doc.putObjectInPool(new COSUnread(), number, gen);
>> +            obj = new COSUnread(number, gen, this);
>> +        } else if(">>".equals(lastSection)) {
>> +            // dictionary
>> +            throw new RuntimeException("Not yet implemented");
>> +        } else if(lastSection != null&&  lastSection.endsWith("]")) {
>> +            // array
>> +            COSArray array = new COSArray();
>> +            lastSection = lastSection.replaceAll("]$", "");
>> +            while(!lastSection.startsWith("[")) {
>> +                if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a
>> hex string
>> +                   
>> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<",
>> "").replaceAll(">\\s*$", "")));
>> +                lastSection = readBackwardUntilWhitespace();
>> +            }
>> +            lastSection = lastSection.replaceAll("^\\[", "");
>> +            if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex
>> string
>> +               
>> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<",
>> "").replaceAll(">\\s*$", "")));
>> +            obj = array;
>> +        } else if(lastSection != null&&  lastSection.endsWith(">")) {
>> +            // string of hex codes
>> +            obj = processCosObject(lastSection);
>> +        } else {
>> +            // try a number, otherwise fall back on a string
>> +            try {
>> +                Long.parseLong(lastSection);
>> +                obj = COSNumber.get(lastSection);
>> +            } catch(NumberFormatException e) {
>> +                throw new RuntimeException("Not yet implemented");
>> +            }
>> +        }
>> +
>> +        return obj;
>> +    }
>> +
>> +    protected COSName readNameBackwards() throws IOException {
>> +        String name = readBackwardUntilWhitespace();
>> +        name = name.replaceAll("^/", "");
>> +        return COSName.getPDFName(name);
>> +    }
>> +
>> +    public COSBase getObject(long objectNumber, long generation)
>> throws IOException {
>> +        // we could optionally, check to see if parse() have been
>> called&
>> +        // throw an exception here, but I don't think that's really
>> necessary
>> +        XrefEntry entry = xrefEntries.get((int)objectNumber);
>> +        currentOffset = entry.getByteOffset();
>> +        return readObject(objectNumber, generation);
>> +    }
>> +
>> +    /**
>> +     * This will read an object from the inputFile at whatever our
>> currentOffset
>> +     * is.  If the object and generation are not the expected values
>> and this
>> +     * object is set to throw an exception for non-conforming
>> documents, then an
>> +     * exception will be thrown.
>> +     * @param objectNumber the object number you expect to read
>> +     * @param generation the generation you expect this object to be
>> +     * @return
>> +     */
>> +    public COSBase readObject(long objectNumber, long generation)
>> throws IOException {
>> +        // when recursivly reading, we always pull the object from
>> the filesystem
>> +        if(document != null&&  recursivlyRead) {
>> +            // check to see if it is in the document cache before
>> hitting the filesystem
>> +            COSBase obj = doc.getObjectFromPool(objectNumber,
>> generation);
>> +            if(obj != null)
>> +                return obj;
>> +        }
>> +
>> +        int actualObjectNumber = readInt();
>> +        if(objectNumber != actualObjectNumber)
>> +            if(throwNonConformingException)
>> +                throw new AssertionError("Object numer expected was " +
>> +                        objectNumber + " but actual was " +
>> actualObjectNumber);
>> +        consumeWhitespace();
>> +
>> +        int actualGeneration = readInt();
>> +        if(generation != actualGeneration)
>> +            if(throwNonConformingException)
>> +                throw new AssertionError("Generation expected was " +
>> +                        generation + " but actual was " +
>> actualGeneration);
>> +        consumeWhitespace();
>> +
>> +        String obj = readWord();
>> +        if(!"obj".equals(obj))
>> +            if(throwNonConformingException)
>> +                throw new AssertionError("Expected keyword 'obj' but
>> found " + obj);
>> +
>> +        // put placeholder object in doc to prevent infinite recursion
>> +        // e.g. read Root ->  dereference object ->  read object
>> which has /Parent ->  GOTO read Root
>> +        doc.putObjectInPool(new COSObject(null), objectNumber,
>> generation);
>> +        COSBase object = readObject();
>> +        doc.putObjectInPool(object, objectNumber, generation);
>> +        return object;
>> +    }
>> +
>> +    /**
>> +     * This actually reads the object data.
>> +     * @return the object which is read
>> +     * @throws IOException
>> +     */
>> +    protected COSBase readObject() throws IOException {
>> +        consumeWhitespace();
>> +        String string = readWord();
>> +        if(string.startsWith("<<")) {
>> +            // this is a dictionary
>> +            COSDictionary dictionary = new COSDictionary();
>> +            boolean atEndOfDictionary = false;
>> +            // remove the marker for the beginning of the dictionary
>> +            string = string.replaceAll("^<<", "");
>> +
>> +            if("".equals(string) || string.matches("^\\w$"))
>> +                string = readWord().trim();
>> +            while(!atEndOfDictionary) {
>> +                COSName name = COSName.getPDFName(string);
>> +                COSBase object = readObject();
>> +                dictionary.setItem(name, object);
>> +
>> +                byte singleByte = consumeWhitespace();
>> +                if(singleByte == '>') {
>> +                    readByte(); // get rid of the second '>'
>> +                    atEndOfDictionary = true;
>> +                }
>> +                if(!atEndOfDictionary)
>> +                    string = readWord().trim();
>> +            }
>> +            return dictionary;
>> +        } else if(string.startsWith("/")) {
>> +            // it's a dictionary label. i.e. /Type or /Pages or
>> something similar
>> +            COSBase name = COSName.getPDFName(string);
>> +            return name;
>> +        } else if(string.startsWith("-")) {
>> +            // it's a negitive number
>> +            return parseNumber(string);
>> +        } else if(string.charAt(0)>= '0'&&  string.charAt(0)<= '9' ) {
>> +            // it's a COSInt or COSFloat, or a weak reference (i.e.
>> "3 0 R")
>> +            // we'll have to peek ahead a little to see if it's a
>> reference or not
>> +            long tempOffset = this.currentOffset;
>> +            consumeWhitespace();
>> +            String tempString = readWord();
>> +            if(tempString.matches("^[0-9]+$")) {
>> +                // it is an int, might be a weak reference...
>> +                tempString = readWord();
>> +                if(!"R".equals(tempString)) {
>> +                    // it's just a number, not a weak reference
>> +                    this.currentOffset = tempOffset;
>> +                    return parseNumber(string);
>> +                }
>> +            } else {
>> +                // it's just a number, not a weak reference
>> +                this.currentOffset = tempOffset;
>> +                return parseNumber(string);
>> +            }
>> +
>> +            // it wasn't a number, so we need to parse the
>> weak-reference
>> +            this.currentOffset = tempOffset;
>> +            int number = Integer.parseInt(string);
>> +            int gen = readInt();
>> +            String r = readWord();
>> +
>> +            if(!"R".equals(r))
>> +                if(throwNonConformingException)
>> +                    throw new AssertionError("Expected keyword 'R'
>> but found " + r);
>> +
>> +            if(recursivlyRead) {
>> +                // seek to the object, read it, seek back to current
>> location
>> +                long tempLocation = this.currentOffset;
>> +                this.currentOffset =
>> this.xrefEntries.get(number).getByteOffset();
>> +                COSBase returnValue = readObject(number, gen);
>> +                this.currentOffset = tempLocation;
>> +                return returnValue;
>> +            } else {
>> +                // Put a COSUnknown there as a placeholder
>> +                COSObject obj = new COSObject(new COSUnread());
>> +                obj.setObjectNumber(COSInteger.get(number));
>> +                obj.setGenerationNumber(COSInteger.get(gen));
>> +                return obj;
>> +            }
>> +        } else if(string.startsWith("]")) {
>> +            // end of an array, just return null
>> +            if("]".equals(string))
>> +                return null;
>> +            int oldLength = string.length();
>> +            this.currentOffset -= oldLength;
>> +            return null;
>> +        } else if(string.startsWith("[")) {
>> +            // array of values
>> +            // we'll just pay attention to the first part (this is in
>> case there
>> +            // is no whitespace between the "[" and the first element)
>> +            int oldLength = string.length();
>> +            string = "[";
>> +            this.currentOffset -= (oldLength - string.length() + 1);
>> +
>> +            COSArray array = new COSArray();
>> +            COSBase object = readObject();
>> +            while(object != null) {
>> +                array.add(object);
>> +                object = readObject();
>> +            }
>> +            return array;
>> +        } else if(string.startsWith("(")) {
>> +            // this is a string (not hex encoded), strip off the '('
>> and read until ')'
>> +            StringBuilder sb = new StringBuilder(string.substring(1));
>> +            byte singleByte = readByte();
>> +            while(singleByte != ')') {
>> +                sb.append((char)singleByte);
>> +                singleByte = readByte();
>> +            }
>> +            return new COSString(sb.toString());
>> +        } else {
>> +            throw new RuntimeException("Not yet implemented: " + string
>> +                    + " loation=" + this.currentOffset);
>> +        }
>> +    }
>> +
>> +    /**
>> +     * This will read the next string from the stream.
>> +     * @return The string that was read from the stream.
>> +     * @throws IOException If there is an error reading from the stream.
>> +     */
>> +    @Override
>> +    protected String readString() throws IOException {
>> +        consumeWhitespace();
>> +        StringBuilder buffer = new StringBuilder();
>> +        int c = pdfSource.read();
>> +        while(!isEndOfName((char)c)&&  !isClosing(c)&&  c != -1) {
>> +            buffer.append( (char)c );
>> +            c = pdfSource.read();
>> +        }
>> +        if (c != -1) {
>> +            pdfSource.unread(c);
>> +        }
>> +        return buffer.toString();
>> +    }
>> +
>> +    protected COSDictionary readDictionaryBackwards() throws
>> IOException {
>> +        COSDictionary dict = new COSDictionary();
>> +
>> +        // consume the last two '>' chars which signify the end of
>> the dictionary
>> +        consumeWhitespaceBackwards();
>> +        byte singleByte = readByteBackwards();
>> +        if(throwNonConformingException) {
>> +            if(singleByte != '>')
>> +                throw new AssertionError("");
>> +        }
>> +        singleByte = readByteBackwards();
>> +        if(throwNonConformingException) {
>> +            if(singleByte != '>')
>> +                throw new AssertionError("");
>> +        }
>> +
>> +        // check to see if we're at the end of the dictionary
>> +        boolean atEndOfDictionary = false;
>> +        singleByte = consumeWhitespaceBackwards();
>> +        if(singleByte == '<') {
>> +            inputFile.seek(currentOffset-1);
>> +            atEndOfDictionary =  ((byte)inputFile.read()) == '<';
>> +        }
>> +
>> +        COSDictionary backwardsDictionary = new COSDictionary();
>> +        // while we're not at the end of the dictionary, read in entries
>> +        while(!atEndOfDictionary) {
>> +            COSBase object = readObjectBackwards();
>> +            COSName name = readNameBackwards();
>> +            backwardsDictionary.setItem(name, object);
>> +
>> +            singleByte = consumeWhitespaceBackwards();
>> +            if(singleByte == '<') {
>> +                inputFile.seek(currentOffset-1);
>> +                atEndOfDictionary =  ((byte)inputFile.read()) == '<';
>> +            }
>> +        }
>> +
>> +        // the dictionaries preserve the order keys were added, as
>> such we shall
>> +        // add them in the proper order, not the reverse order
>> +        Set<COSName>  backwardsKeys = backwardsDictionary.keySet();
>> +        for(int i = backwardsKeys.size()-1; i>=0; i--)
>> +            dict.setItem((COSName)backwardsKeys.toArray()[i],
>> backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i]));
>> +
>> +        // consume the last two '<' chars
>> +        readByteBackwards();
>> +        readByteBackwards();
>> +
>> +        return dict;
>> +    }
>> +
>> +    /**
>> +     * This will read a line starting with the byte at offset and going
>> +     * backwards until it finds a newline.  This should only be used
>> if we are
>> +     * certain that the data will only be text, and not binary data.
>> +     *
>> +     * @param offset the location of the file where we should start
>> reading
>> +     * @return the string which was read
>> +     * @throws IOException if there was an error reading data from
>> the file
>> +     */
>> +    protected String readLineBackwards() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        boolean endOfObject = false;
>> +
>> +        do {
>> +            // first we read the %%EOF marker
>> +            byte singleByte = readByteBackwards();
>> +            if(singleByte == '\n') {
>> +                // if ther's a preceeding \r, we'll eat that as well
>> +                inputFile.seek(currentOffset);
>> +                if((byte)inputFile.read() == '\r')
>> +                    currentOffset--;
>> +                endOfObject = true;
>> +            } else if(singleByte == '\r') {
>> +                endOfObject = true;
>> +            } else {
>> +                sb.insert(0, (char)singleByte);
>> +            }
>> +        } while(!endOfObject);
>> +
>> +        return sb.toString();
>> +    }
>> +
>> +    /**
>> +     * This will read a line starting with the byte at offset and going
>> +     * forward until it finds a newline.  This should only be used if
>> we are
>> +     * certain that the data will only be text, and not binary data.
>> +     * @param offset the location of the file where we should start
>> reading
>> +     * @return the string which was read
>> +     * @throws IOException if there was an error reading data from
>> the file
>> +     */
>> +    @Override
>> +    protected String readLine() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        boolean endOfLine = false;
>> +
>> +        do {
>> +            // first we read the %%EOF marker
>> +            byte singleByte = readByte();
>> +            if(singleByte == '\n') {
>> +                // if ther's a preceeding \r, we'll eat that as well
>> +                inputFile.seek(currentOffset);
>> +                if((byte)inputFile.read() == '\r')
>> +                    currentOffset++;
>> +                endOfLine = true;
>> +            } else if(singleByte == '\r') {
>> +                endOfLine = true;
>> +            } else {
>> +                sb.append((char)singleByte);
>> +            }
>> +        } while(!endOfLine);
>> +
>> +        return sb.toString();
>> +    }
>> +
>> +    protected String readWord() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        boolean stop = true;
>> +        do {
>> +            byte singleByte = readByte();
>> +            stop = this.isWhitespace(singleByte);
>> +
>> +            // there are some additional characters which indicate
>> the next element/word has begun
>> +            // ignore the first char we read, b/c the first char is
>> the beginnging of this object, not the next one
>> +            if(!stop&&  sb.length()>  0) {
>> +                stop = singleByte == '/' || singleByte == '['
>> +                        || singleByte == ']'
>> +                        || (singleByte == '>'&& 
>> !">".equals(sb.toString()));
>> +                if(stop) // we're stopping on a non-whitespace char,
>> decrement the
>> +                    this.currentOffset--; // counter so we don't miss
>> this character
>> +            }
>> +            if(!stop)
>> +                sb.append((char)singleByte);
>> +        } while(!stop);
>> +
>> +        return sb.toString();
>> +    }
>> +
>> +    /**
>> +     * @return the recursivlyRead
>> +     */
>> +    public boolean isRecursivlyRead() {
>> +        return recursivlyRead;
>> +    }
>> +
>> +    /**
>> +     * @param recursivlyRead the recursivlyRead to set
>> +     */
>> +    public void setRecursivlyRead(boolean recursivlyRead) {
>> +        this.recursivlyRead = recursivlyRead;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,115 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdmodel;
>> +
>> +import java.io.File;
>> +import java.io.IOException;
>> +import java.util.ArrayList;
>> +import java.util.HashMap;
>> +import java.util.List;
>> +import java.util.Map;
>> +import org.apache.pdfbox.cos.COSBase;
>> +import org.apache.pdfbox.cos.COSDocument;
>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>> +import org.apache.pdfbox.persistence.util.COSObjectKey;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class ConformingPDDocument extends PDDocument {
>> +    /**
>> +     * Maps ObjectKeys to a COSObject. Note that references to these
>> objects
>> +     * are also stored in COSDictionary objects that map a name to a
>> specific object.
>> +     */
>> +    private final Map<COSObjectKey, COSBase>  objectPool =
>> +        new HashMap<COSObjectKey, COSBase>();
>> +    private ConformingPDFParser parser = null;
>> +
>> +    public ConformingPDDocument() throws IOException {
>> +        super();
>> +    }
>> +
>> +    public ConformingPDDocument(COSDocument doc) throws IOException {
>> +        super(doc);
>> +    }
>> +
>> +    /**
>> +     * This will load a document from an input stream.
>> +     * @param input The File which contains the document.
>> +     * @return The document that was loaded.
>> +     * @throws IOException If there is an error reading from the stream.
>> +     */
>> +    public static PDDocument load(File input) throws IOException {
>> +        ConformingPDFParser parser = new ConformingPDFParser(input);
>> +        parser.parse();
>> +        return parser.getPDDocument();
>> +    }
>> +
>> +    /**
>> +     * This will get an object from the pool.
>> +     * @param key The object key.
>> +     * @return The object in the pool or a new one if it has not been
>> parsed yet.
>> +     * @throws IOException If there is an error getting the proxy
>> object.
>> +     */
>> +    public COSBase getObjectFromPool(COSObjectKey key) throws
>> IOException {
>> +        return objectPool.get(key);
>> +    }
>> +
>> +    /**
>> +     * This will get an object from the pool.
>> +     * @param key The object key.
>> +     * @return The object in the pool or a new one if it has not been
>> parsed yet.
>> +     * @throws IOException If there is an error getting the proxy
>> object.
>> +     */
>> +    public List<COSObjectKey>  getObjectKeysFromPool() throws
>> IOException {
>> +        List<COSObjectKey>  keys = new ArrayList<COSObjectKey>();
>> +        for(COSObjectKey key : objectPool.keySet())
>> +            keys.add(key);
>> +        return keys;
>> +    }
>> +
>> +    /**
>> +     * This will get an object from the pool.
>> +     * @param number the object number
>> +     * @param generation the generation of this object you wish to load
>> +     * @return The object in the pool
>> +     * @throws IOException If there is an error getting the proxy
>> object.
>> +     */
>> +    public COSBase getObjectFromPool(long number, long generation)
>> throws IOException {
>> +        return objectPool.get(new COSObjectKey(number, generation));
>> +    }
>> +
>> +    public void putObjectInPool(COSBase object, long number, long
>> generation) {
>> +        objectPool.put(new COSObjectKey(number, generation), object);
>> +    }
>> +
>> +    /**
>> +     * @return the parser
>> +     */
>> +    public ConformingPDFParser getParser() {
>> +        return parser;
>> +    }
>> +
>> +    /**
>> +     * @param parser the parser to set
>> +     */
>> +    public void setParser(ConformingPDFParser parser) {
>> +        this.parser = parser;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,43 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdmodel.common;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class XrefEntry {
>> +    private int objectNumber = 0;
>> +    private int byteOffset = 0;
>> +    private int generation = 0;
>> +    private boolean inUse = true;
>> +
>> +    public XrefEntry() {
>> +    }
>> +
>> +    public XrefEntry(int objectNumber, int byteOffset, int
>> generation, String inUse) {
>> +        this.objectNumber = objectNumber;
>> +        this.byteOffset = byteOffset;
>> +        this.generation = generation;
>> +        this.inUse = "n".equals(inUse);
>> +    }
>> +
>> +    public int getByteOffset() {
>> +        return byteOffset;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,73 @@
>> +/*
>> + *  Copyright 2010 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdfparser;
>> +
>> +import java.io.File;
>> +import java.net.URL;
>> +import org.apache.pdfbox.cos.COSDictionary;
>> +import org.junit.After;
>> +import org.junit.AfterClass;
>> +import org.junit.Before;
>> +import org.junit.BeforeClass;
>> +import org.junit.Test;
>> +import static org.junit.Assert.*;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class ConformingPDFParserTest {
>> +
>> +    public ConformingPDFParserTest() {
>> +    }
>> +
>> +    @BeforeClass
>> +    public static void setUpClass() throws Exception {
>> +    }
>> +
>> +    @AfterClass
>> +    public static void tearDownClass() throws Exception {
>> +    }
>> +
>> +    @Before
>> +    public void setUp() {
>> +    }
>> +
>> +    @After
>> +    public void tearDown() {
>> +    }
>> +
>> +    /**
>> +     * Test of parse method, of class ConformingPDFParser.
>> +     */
>> +    @Test
>> +    public void testParse() throws Exception {
>> +        URL inputUrl =
>> ConformingPDFParser.class.getResource("gdb-refcard.pdf");
>> +        File inputFile = new File(inputUrl.toURI());
>> +        ConformingPDFParser instance = new
>> ConformingPDFParser(inputFile);
>> +        instance.parse();
>> +
>> +        COSDictionary trailer = instance.getDocument().getTrailer();
>> +        assertNotNull(trailer);
>> +        System.out.println("Trailer: " +
>> instance.getDocument().getTrailer().toString());
>> +        assertEquals(3, trailer.size());
>> +        assertNotNull(trailer.getDictionaryObject("Root"));
>> +        assertNotNull(trailer.getDictionaryObject("Info"));
>> +        assertNotNull(trailer.getDictionaryObject("Size"));
>> +    }
>> +}
>> \ No newline at end of file
>>
>> Modified:
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>> (original)
>> +++
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>> Fri Jul  1 22:28:23 2011
>> @@ -16,7 +16,6 @@
>>    */
>>   package org.apache.pdfbox.pdmodel;
>>
>> -import java.io.File;
>>   import junit.framework.TestCase;
>>
>>   public class TestPDDocumentCatalog extends TestCase {
>> @@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten
>>               doc =
>> PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf"));
>>
>>               PDDocumentCatalog cat = doc.getDocumentCatalog();
>>               // getLabelsByPageIndices() should not throw an exception
>> -            String[] labels =
>> cat.getPageLabels().getLabelsByPageIndices();
>> +            cat.getPageLabels().getLabelsByPageIndices();
>>           } catch(Exception e) {
>> -            e.printStackTrace();
>>               fail("Threw exception!");
>>           } finally {
>>               if(doc != null)
>>                   doc.close();
>>           }
>>       }
>> +
>> +    /**
>> +     * Test case for
>> +     *<a href="https://issues.apache.org/jira/browse/PDFBOX-911";
>> +     *>PDFBOX-911</a>  - Method PDDocument.getNumberOfPages() returns
>> wrong
>> +     * number of pages
>> +     */
>> +    public void testGetNumberOfPages() throws Exception {
>> +        PDDocument doc = null;
>> +        try {
>> +            doc =
>> PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf"));
>> +            assertEquals(4, doc.getNumberOfPages());
>> +        } finally {
>> +            if(doc != null)
>> +                doc.close();
>> +        }
>> +    }
>>   }
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> Binary file - no diff available.
>>
>> Propchange:
>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>
>> ------------------------------------------------------------------------------
>>
>>      svn:mime-type = application/octet-stream
>>
>>
>

Re: svn commit: r1142109 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ main/java/org/apache/pdfbox/pdmodel/common/ test/java/org/apache/pdfbox/pdfparser/ test/jav...

Reply via email to