Headers should all be fixed as of revision 1310946. I updated all the headers which were non-conforming (pdmodel/common/XrefEntry.java pdmodel/ConformingPDDocument.java cos/COSDictionaryLateBinding.java cos/COSUnread.java).
If I missed any, let me know and I'll take care of it. Thanks, Adam On 04/06/2012 08:45 AM, Andreas Lehmkuehler wrote: > Hi, > > I just realized that the headers of all new files aren't o.k., e.g. see [1] > > @Adam > Do you have the time to fix this. If not, do you give me the permission > to change the headers in question? > > BR > Andreas Lehmkühler > > [1] > http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?view=markup&pathrev=1142109 > > > > Am 02.07.2011 00:28, schrieb a...@apache.org: >> Author: adam >> Date: Fri Jul 1 22:28:23 2011 >> New Revision: 1142109 >> >> URL: http://svn.apache.org/viewvc?rev=1142109&view=rev >> Log: >> PDFBOX-1000: Conforming parser. Initial commit to make it easier for >> others to test& contribute. >> >> Added: >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java >> >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java >> >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java >> >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java >> >> >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java >> >> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/ >> >> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf >> >> (with props) >> Modified: >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java >> >> >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java >> >> >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java >> >> >> Modified: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java >> (original) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java >> Fri Jul 1 22:28:23 2011 >> @@ -43,7 +43,7 @@ public class COSDictionary extends COSBa >> * The name-value pairs of this dictionary. The pairs are kept >> in the >> * order they were added to the dictionary. >> */ >> - private final Map<COSName, COSBase> items = >> + protected final Map<COSName, COSBase> items = >> new LinkedHashMap<COSName, COSBase>(); >> >> /** >> @@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa >> /** >> * {@inheritDoc} >> */ >> - public String toString() >> - { >> + @Override >> + public String toString() { >> StringBuilder retVal = new StringBuilder("COSDictionary{"); >> - for( COSName key : items.keySet() ) >> - { >> - retVal.append("(" + key + ":" + >> getDictionaryObject(key).toString() + ") "); >> + for(COSName key : items.keySet()) { >> + retVal.append("("); >> + retVal.append(key); >> + retVal.append(":"); >> + if(getDictionaryObject(key) != null) >> + retVal.append(getDictionaryObject(key).toString()); >> + else >> + retVal.append("<null>"); >> + retVal.append(") "); >> } >> retVal.append("}"); >> return retVal.toString(); >> >> Added: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java >> (added) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java >> Fri Jul 1 22:28:23 2011 >> @@ -0,0 +1,61 @@ >> +/* >> + * Copyright 2011 adam. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + * under the License. >> + */ >> + >> +package org.apache.pdfbox.cos; >> + >> +import org.apache.commons.logging.Log; >> +import org.apache.commons.logging.LogFactory; >> +import org.apache.pdfbox.pdfparser.ConformingPDFParser; >> + >> +/** >> + * >> + * @author adam >> + */ >> +public class COSDictionaryLateBinding extends COSDictionary { >> + public static final Log log = >> LogFactory.getLog(COSDictionaryLateBinding.class); >> + ConformingPDFParser parser; >> + >> + public COSDictionaryLateBinding(ConformingPDFParser parser) { >> + super(); >> + this.parser = parser; >> + } >> + >> + /** >> + * This will get an object from this dictionary. If the object >> is a reference then it will >> + * dereference it and get it from the document. If the object is >> COSNull then >> + * null will be returned. >> + * @param key The key to the object that we are getting. >> + * @return The object that matches the key. >> + */ >> + @Override >> + public COSBase getDictionaryObject(COSName key) { >> + COSBase retval = items.get(key); >> + if(retval instanceof COSObject) { >> + int objectNumber = >> ((COSObject)retval).getObjectNumber().intValue(); >> + int generation = >> ((COSObject)retval).getGenerationNumber().intValue(); >> + try { >> + retval = parser.getObject(objectNumber, generation); >> + } catch(Exception e) { >> + log.warn("Unable to read information for object " + >> objectNumber); >> + } >> + } >> + if(retval instanceof COSNull) { >> + retval = null; >> + } >> + return retval; >> + } >> +} >> >> Added: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java >> (added) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java >> Fri Jul 1 22:28:23 2011 >> @@ -0,0 +1,100 @@ >> +/* >> + * Copyright 2011 adam. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + * under the License. >> + */ >> + >> +package org.apache.pdfbox.cos; >> + >> +import org.apache.pdfbox.exceptions.COSVisitorException; >> +import org.apache.pdfbox.pdfparser.ConformingPDFParser; >> + >> +/** >> + * >> + * @author adam >> + */ >> +public class COSUnread extends COSBase { >> + private long objectNumber; >> + private long generation; >> + private ConformingPDFParser parser; >> + >> + public COSUnread() { >> + super(); >> + } >> + >> + public COSUnread(long objectNumber, long generation) { >> + this(); >> + this.objectNumber = objectNumber; >> + this.generation = generation; >> + } >> + >> + public COSUnread(long objectNumber, long generation, >> ConformingPDFParser parser) { >> + this(objectNumber, generation); >> + this.parser = parser; >> + } >> + >> + @Override >> + public Object accept(ICOSVisitor visitor) throws >> COSVisitorException { >> + // TODO: read the object using the parser (if available) and >> visit that object >> + throw new UnsupportedOperationException("COSUnread can not be >> written/visited."); >> + } >> + >> + @Override >> + public String toString() { >> + return "COSUnread{" + objectNumber + "," + generation + "}"; >> + } >> + >> + /** >> + * @return the objectNumber >> + */ >> + public long getObjectNumber() { >> + return objectNumber; >> + } >> + >> + /** >> + * @param objectNumber the objectNumber to set >> + */ >> + public void setObjectNumber(long objectNumber) { >> + this.objectNumber = objectNumber; >> + } >> + >> + /** >> + * @return the generation >> + */ >> + public long getGeneration() { >> + return generation; >> + } >> + >> + /** >> + * @param generation the generation to set >> + */ >> + public void setGeneration(long generation) { >> + this.generation = generation; >> + } >> + >> + /** >> + * @return the parser >> + */ >> + public ConformingPDFParser getParser() { >> + return parser; >> + } >> + >> + /** >> + * @param parser the parser to set >> + */ >> + public void setParser(ConformingPDFParser parser) { >> + this.parser = parser; >> + } >> + >> +} >> >> Modified: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java >> (original) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java >> Fri Jul 1 22:28:23 2011 >> @@ -110,6 +110,10 @@ public abstract class BaseParser >> */ >> protected final boolean forceParsing; >> >> + public BaseParser() { >> + this.forceParsing = FORCE_PARSING; >> + } >> + >> /** >> * Constructor. >> * >> @@ -876,7 +880,7 @@ public abstract class BaseParser >> throw new IOException("expected='/' actual='" + (char)c >> + "'-" + c + " " + pdfSource ); >> } >> // costruisce il nome >> - StringBuffer buffer = new StringBuffer(); >> + StringBuilder buffer = new StringBuilder(); >> c = pdfSource.read(); >> while( c != -1 ) >> { >> @@ -1063,7 +1067,7 @@ public abstract class BaseParser >> { >> if( Character.isDigit(c) || c == '-' || c == '+' || c == >> '.') >> { >> - StringBuffer buf = new StringBuffer(); >> + StringBuilder buf = new StringBuilder(); >> int ic = pdfSource.read(); >> c = (char)ic; >> while( Character.isDigit( c )|| >> @@ -1118,7 +1122,7 @@ public abstract class BaseParser >> protected String readString() throws IOException >> { >> skipSpaces(); >> - StringBuffer buffer = new StringBuffer(); >> + StringBuilder buffer = new StringBuilder(); >> int c = pdfSource.read(); >> while( !isEndOfName((char)c)&& !isClosing(c)&& c != -1 ) >> { >> @@ -1148,7 +1152,7 @@ public abstract class BaseParser >> { >> c = pdfSource.read(); >> } >> - StringBuffer buffer = new StringBuffer( theString.length() ); >> + StringBuilder buffer = new StringBuilder( theString.length() ); >> int charsRead = 0; >> while( !isEOL(c)&& c != -1&& charsRead< theString.length() ) >> { >> @@ -1194,7 +1198,7 @@ public abstract class BaseParser >> >> //average string size is around 2 and the normal string >> buffer size is >> //about 16 so lets save some space. >> - StringBuffer buffer = new StringBuffer(length); >> + StringBuilder buffer = new StringBuilder(length); >> while( !isWhitespace(c)&& !isClosing(c)&& c != -1&& >> buffer.length()< length&& >> c != '['&& >> c != '<'&& >> @@ -1250,7 +1254,7 @@ public abstract class BaseParser >> throw new IOException( "Error: End-of-File, expected >> line"); >> } >> >> - StringBuffer buffer = new StringBuffer( 11 ); >> + StringBuilder buffer = new StringBuilder( 11 ); >> >> int c; >> while ((c = pdfSource.read()) != -1) >> @@ -1300,10 +1304,9 @@ public abstract class BaseParser >> } >> >> /** >> - * This will tell if the next byte is whitespace or not. >> - * >> + * This will tell if the next byte is whitespace or not. These >> values are >> + * specified in table 1 (page 12) of ISO 32000-1:2008. >> * @param c The character to check against whitespace >> - * >> * @return true if the next byte in the stream is a whitespace >> character. >> */ >> protected boolean isWhitespace( int c ) >> >> Added: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java >> (added) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java >> Fri Jul 1 22:28:23 2011 >> @@ -0,0 +1,696 @@ >> +/* >> + * Copyright 2010 adam. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + * under the License. >> + */ >> + >> +package org.apache.pdfbox.pdfparser; >> + >> +import java.io.File; >> +import java.io.IOException; >> +import java.util.ArrayList; >> +import java.util.List; >> +import java.util.Set; >> +import org.apache.pdfbox.cos.COSArray; >> +import org.apache.pdfbox.cos.COSBase; >> +import org.apache.pdfbox.cos.COSDictionary; >> +import org.apache.pdfbox.cos.COSDocument; >> +import org.apache.pdfbox.cos.COSFloat; >> +import org.apache.pdfbox.cos.COSInteger; >> +import org.apache.pdfbox.cos.COSName; >> +import org.apache.pdfbox.cos.COSNumber; >> +import org.apache.pdfbox.cos.COSObject; >> +import org.apache.pdfbox.cos.COSString; >> +import org.apache.pdfbox.cos.COSUnread; >> +import org.apache.pdfbox.io.RandomAccess; >> +import org.apache.pdfbox.io.RandomAccessFile; >> +import org.apache.pdfbox.pdmodel.ConformingPDDocument; >> +import org.apache.pdfbox.pdmodel.PDDocument; >> +import org.apache.pdfbox.pdmodel.common.XrefEntry; >> +import org.apache.pdfbox.persistence.util.COSObjectKey; >> + >> +/** >> + * >> + * @author<a href="a...@apache.org">Adam Nichols</a> >> + */ >> +public class ConformingPDFParser extends BaseParser { >> + protected RandomAccess inputFile; >> + List<XrefEntry> xrefEntries; >> + private long currentOffset; >> + private ConformingPDDocument doc = null; >> + private boolean throwNonConformingException = true; >> + private boolean recursivlyRead = true; >> + >> + /** >> + * Constructor. >> + * >> + * @param input The input stream that contains the PDF document. >> + * >> + * @throws IOException If there is an error initializing the stream. >> + */ >> + public ConformingPDFParser(File inputFile) throws IOException { >> + this.inputFile = new RandomAccessFile(inputFile, "r"); >> + } >> + >> + /** >> + * This will parse the stream and populate the COSDocument >> object. This will close >> + * the stream when it is done parsing. >> + * >> + * @throws IOException If there is an error reading from the >> stream or corrupt data >> + * is found. >> + */ >> + public void parse() throws IOException { >> + document = new COSDocument(); >> + doc = new ConformingPDDocument(document); >> + currentOffset = inputFile.length()-1; >> + long xRefTableLocation = parseTrailerInformation(); >> + currentOffset = xRefTableLocation; >> + parseXrefTable(); >> + // now that we read the xref table and put null references in >> the doc, >> + // we can deference those objects now. >> + boolean oldValue = recursivlyRead; >> + recursivlyRead = false; >> + List<COSObjectKey> keys = doc.getObjectKeysFromPool(); >> + for(COSObjectKey key : keys) { >> + // getObject will put it into the document's object pool >> for us >> + getObject(key.getNumber(), key.getGeneration()); >> + } >> + recursivlyRead = oldValue; >> + } >> + >> + /** >> + * This will get the document that was parsed. parse() must be >> called before this is called. >> + * When you are done with this document you must call close() on >> it to release >> + * resources. >> + * >> + * @return The document that was parsed. >> + * >> + * @throws IOException If there is an error getting the document. >> + */ >> + public COSDocument getDocument() throws IOException { >> + if( document == null ) { >> + throw new IOException( "You must call parse() before >> calling getDocument()" ); >> + } >> + return document; >> + } >> + >> + /** >> + * This will get the PD document that was parsed. When you are >> done with >> + * this document you must call close() on it to release resources. >> + * >> + * @return The document at the PD layer. >> + * >> + * @throws IOException If there is an error getting the document. >> + */ >> + public PDDocument getPDDocument() throws IOException { >> + return doc; >> + } >> + >> + private boolean parseXrefTable() throws IOException { >> + String currentLine = readLine(); >> + if(throwNonConformingException) { >> + if(!"xref".equals(currentLine)) >> + throw new AssertionError("xref table not >> found.\nExpected: xref\nFound: "+currentLine); >> + } >> + >> + int objectNumber = readInt(); >> + int entries = readInt(); >> + xrefEntries = new ArrayList<XrefEntry>(entries); >> + for(int i=0; i<entries; i++) >> + xrefEntries.add(new XrefEntry(objectNumber++, readInt(), >> readInt(), readLine())); >> + >> + return true; >> + } >> + >> + protected long parseTrailerInformation() throws IOException, >> NumberFormatException { >> + long xrefLocation = -1; >> + consumeWhitespaceBackwards(); >> + String currentLine = readLineBackwards(); >> + if(throwNonConformingException) { >> + if(!"%%EOF".equals(currentLine)) >> + throw new AssertionError("Invalid EOF >> marker.\nExpected: %%EOF\nFound: "+currentLine); >> + } >> + >> + xrefLocation = readLongBackwards(); >> + currentLine = readLineBackwards(); >> + if(throwNonConformingException) { >> + if(!"startxref".equals(currentLine)) >> + throw new AssertionError("Invalid trailer.\nExpected: >> startxref\nFound: "+currentLine); >> + } >> + >> + document.setTrailer(readDictionaryBackwards()); >> + consumeWhitespaceBackwards(); >> + currentLine = readLineBackwards(); >> + if(throwNonConformingException) { >> + if(!"trailer".equals(currentLine)) >> + throw new AssertionError("Invalid trailer.\nExpected: >> trailer\nFound: "+currentLine); >> + } >> + >> + return xrefLocation; >> + } >> + >> + protected byte readByteBackwards() throws IOException { >> + inputFile.seek(currentOffset); >> + byte singleByte = (byte)inputFile.read(); >> + currentOffset--; >> + return singleByte; >> + } >> + >> + protected byte readByte() throws IOException { >> + inputFile.seek(currentOffset); >> + byte singleByte = (byte)inputFile.read(); >> + currentOffset++; >> + return singleByte; >> + } >> + >> + protected String readBackwardUntilWhitespace() throws IOException { >> + StringBuilder sb = new StringBuilder(); >> + byte singleByte = readByteBackwards(); >> + while(!isWhitespace(singleByte)) { >> + sb.insert(0, (char)singleByte); >> + singleByte = readByteBackwards(); >> + } >> + return sb.toString(); >> + } >> + >> + /** >> + * This will read all bytes (backwards) until a non-whitespace >> character is >> + * found. To save you an extra read, the non-whitespace >> character is >> + * returned. If the current character is not whitespace, this >> method will >> + * just return the current char. >> + * @return the first non-whitespace character found >> + * @throws IOException if there is an error reading from the file >> + */ >> + protected byte consumeWhitespaceBackwards() throws IOException { >> + inputFile.seek(currentOffset); >> + byte singleByte = (byte)inputFile.read(); >> + if(!isWhitespace(singleByte)) >> + return singleByte; >> + >> + // we have some whitespace, let's consume it >> + while(isWhitespace(singleByte)) { >> + singleByte = readByteBackwards(); >> + } >> + // readByteBackwards will decrement the currentOffset to >> point the byte >> + // before the one just read, so we increment it back to the >> current byte >> + currentOffset++; >> + return singleByte; >> + } >> + >> + /** >> + * This will read all bytes until a non-whitespace character is >> + * found. To save you an extra read, the non-whitespace >> character is >> + * returned. If the current character is not whitespace, this >> method will >> + * just return the current char. >> + * @return the first non-whitespace character found >> + * @throws IOException if there is an error reading from the file >> + */ >> + protected byte consumeWhitespace() throws IOException { >> + inputFile.seek(currentOffset); >> + byte singleByte = (byte)inputFile.read(); >> + if(!isWhitespace(singleByte)) >> + return singleByte; >> + >> + // we have some whitespace, let's consume it >> + while(isWhitespace(singleByte)) { >> + singleByte = readByte(); >> + } >> + // readByte() will increment the currentOffset to point the byte >> + // after the one just read, so we decrement it back to the >> current byte >> + currentOffset--; >> + return singleByte; >> + } >> + >> + /** >> + * This will consume any whitespace, read in bytes until >> whitespace is found >> + * again and then parse the characters which have been read as a >> long. The >> + * current offset will then point at the first whitespace >> character which >> + * preceeds the number. >> + * @return the parsed number >> + * @throws IOException if there is an error reading from the file >> + * @throws NumberFormatException if the bytes read can not be >> converted to a number >> + */ >> + protected long readLongBackwards() throws IOException, >> NumberFormatException { >> + StringBuilder sb = new StringBuilder(); >> + consumeWhitespaceBackwards(); >> + byte singleByte = readByteBackwards(); >> + while(!isWhitespace(singleByte)) { >> + sb.insert(0, (char)singleByte); >> + singleByte = readByteBackwards(); >> + } >> + if(sb.length() == 0) >> + throw new AssertionError("Number not found. Expected >> number at offset: " + currentOffset); >> + return Long.parseLong(sb.toString()); >> + } >> + >> + @Override >> + protected int readInt() throws IOException { >> + StringBuilder sb = new StringBuilder(); >> + consumeWhitespace(); >> + byte singleByte = readByte(); >> + while(!isWhitespace(singleByte)) { >> + sb.append((char)singleByte); >> + singleByte = readByte(); >> + } >> + if(sb.length() == 0) >> + throw new AssertionError("Number not found. Expected >> number at offset: " + currentOffset); >> + return Integer.parseInt(sb.toString()); >> + } >> + >> + /** >> + * This will read in a number and return the COS version of the >> number (be >> + * it a COSInteger or a COSFloat). >> + * @return the COSNumber which was read/parsed >> + * @throws IOException >> + */ >> + protected COSNumber readNumber() throws IOException { >> + StringBuilder sb = new StringBuilder(); >> + consumeWhitespace(); >> + byte singleByte = readByte(); >> + while(!isWhitespace(singleByte)) { >> + sb.append((char)singleByte); >> + singleByte = readByte(); >> + } >> + if(sb.length() == 0) >> + throw new AssertionError("Number not found. Expected >> number at offset: " + currentOffset); >> + return parseNumber(sb.toString()); >> + } >> + >> + protected COSNumber parseNumber(String number) throws IOException { >> + if(number.matches("^[0-9]+$")) >> + return COSInteger.get(number); >> + return new COSFloat(Float.parseFloat(number)); >> + } >> + >> + protected COSBase processCosObject(String string) throws >> IOException { >> + if(string != null&& string.endsWith(">")) { >> + // string of hex codes >> + return >> COSString.createFromHexString(string.replaceAll("^<", >> "").replaceAll(">$", "")); >> + } >> + return null; >> + } >> + >> + protected COSBase readObjectBackwards() throws IOException { >> + COSBase obj = null; >> + consumeWhitespaceBackwards(); >> + String lastSection = readBackwardUntilWhitespace(); >> + if("R".equals(lastSection)) { >> + // indirect reference >> + long gen = readLongBackwards(); >> + long number = readLongBackwards(); >> + // We just put a placeholder in the pool for now, we'll >> read the data later >> + doc.putObjectInPool(new COSUnread(), number, gen); >> + obj = new COSUnread(number, gen, this); >> + } else if(">>".equals(lastSection)) { >> + // dictionary >> + throw new RuntimeException("Not yet implemented"); >> + } else if(lastSection != null&& lastSection.endsWith("]")) { >> + // array >> + COSArray array = new COSArray(); >> + lastSection = lastSection.replaceAll("]$", ""); >> + while(!lastSection.startsWith("[")) { >> + if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a >> hex string >> + >> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", >> "").replaceAll(">\\s*$", ""))); >> + lastSection = readBackwardUntilWhitespace(); >> + } >> + lastSection = lastSection.replaceAll("^\\[", ""); >> + if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex >> string >> + >> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", >> "").replaceAll(">\\s*$", ""))); >> + obj = array; >> + } else if(lastSection != null&& lastSection.endsWith(">")) { >> + // string of hex codes >> + obj = processCosObject(lastSection); >> + } else { >> + // try a number, otherwise fall back on a string >> + try { >> + Long.parseLong(lastSection); >> + obj = COSNumber.get(lastSection); >> + } catch(NumberFormatException e) { >> + throw new RuntimeException("Not yet implemented"); >> + } >> + } >> + >> + return obj; >> + } >> + >> + protected COSName readNameBackwards() throws IOException { >> + String name = readBackwardUntilWhitespace(); >> + name = name.replaceAll("^/", ""); >> + return COSName.getPDFName(name); >> + } >> + >> + public COSBase getObject(long objectNumber, long generation) >> throws IOException { >> + // we could optionally, check to see if parse() have been >> called& >> + // throw an exception here, but I don't think that's really >> necessary >> + XrefEntry entry = xrefEntries.get((int)objectNumber); >> + currentOffset = entry.getByteOffset(); >> + return readObject(objectNumber, generation); >> + } >> + >> + /** >> + * This will read an object from the inputFile at whatever our >> currentOffset >> + * is. If the object and generation are not the expected values >> and this >> + * object is set to throw an exception for non-conforming >> documents, then an >> + * exception will be thrown. >> + * @param objectNumber the object number you expect to read >> + * @param generation the generation you expect this object to be >> + * @return >> + */ >> + public COSBase readObject(long objectNumber, long generation) >> throws IOException { >> + // when recursivly reading, we always pull the object from >> the filesystem >> + if(document != null&& recursivlyRead) { >> + // check to see if it is in the document cache before >> hitting the filesystem >> + COSBase obj = doc.getObjectFromPool(objectNumber, >> generation); >> + if(obj != null) >> + return obj; >> + } >> + >> + int actualObjectNumber = readInt(); >> + if(objectNumber != actualObjectNumber) >> + if(throwNonConformingException) >> + throw new AssertionError("Object numer expected was " + >> + objectNumber + " but actual was " + >> actualObjectNumber); >> + consumeWhitespace(); >> + >> + int actualGeneration = readInt(); >> + if(generation != actualGeneration) >> + if(throwNonConformingException) >> + throw new AssertionError("Generation expected was " + >> + generation + " but actual was " + >> actualGeneration); >> + consumeWhitespace(); >> + >> + String obj = readWord(); >> + if(!"obj".equals(obj)) >> + if(throwNonConformingException) >> + throw new AssertionError("Expected keyword 'obj' but >> found " + obj); >> + >> + // put placeholder object in doc to prevent infinite recursion >> + // e.g. read Root -> dereference object -> read object >> which has /Parent -> GOTO read Root >> + doc.putObjectInPool(new COSObject(null), objectNumber, >> generation); >> + COSBase object = readObject(); >> + doc.putObjectInPool(object, objectNumber, generation); >> + return object; >> + } >> + >> + /** >> + * This actually reads the object data. >> + * @return the object which is read >> + * @throws IOException >> + */ >> + protected COSBase readObject() throws IOException { >> + consumeWhitespace(); >> + String string = readWord(); >> + if(string.startsWith("<<")) { >> + // this is a dictionary >> + COSDictionary dictionary = new COSDictionary(); >> + boolean atEndOfDictionary = false; >> + // remove the marker for the beginning of the dictionary >> + string = string.replaceAll("^<<", ""); >> + >> + if("".equals(string) || string.matches("^\\w$")) >> + string = readWord().trim(); >> + while(!atEndOfDictionary) { >> + COSName name = COSName.getPDFName(string); >> + COSBase object = readObject(); >> + dictionary.setItem(name, object); >> + >> + byte singleByte = consumeWhitespace(); >> + if(singleByte == '>') { >> + readByte(); // get rid of the second '>' >> + atEndOfDictionary = true; >> + } >> + if(!atEndOfDictionary) >> + string = readWord().trim(); >> + } >> + return dictionary; >> + } else if(string.startsWith("/")) { >> + // it's a dictionary label. i.e. /Type or /Pages or >> something similar >> + COSBase name = COSName.getPDFName(string); >> + return name; >> + } else if(string.startsWith("-")) { >> + // it's a negitive number >> + return parseNumber(string); >> + } else if(string.charAt(0)>= '0'&& string.charAt(0)<= '9' ) { >> + // it's a COSInt or COSFloat, or a weak reference (i.e. >> "3 0 R") >> + // we'll have to peek ahead a little to see if it's a >> reference or not >> + long tempOffset = this.currentOffset; >> + consumeWhitespace(); >> + String tempString = readWord(); >> + if(tempString.matches("^[0-9]+$")) { >> + // it is an int, might be a weak reference... >> + tempString = readWord(); >> + if(!"R".equals(tempString)) { >> + // it's just a number, not a weak reference >> + this.currentOffset = tempOffset; >> + return parseNumber(string); >> + } >> + } else { >> + // it's just a number, not a weak reference >> + this.currentOffset = tempOffset; >> + return parseNumber(string); >> + } >> + >> + // it wasn't a number, so we need to parse the >> weak-reference >> + this.currentOffset = tempOffset; >> + int number = Integer.parseInt(string); >> + int gen = readInt(); >> + String r = readWord(); >> + >> + if(!"R".equals(r)) >> + if(throwNonConformingException) >> + throw new AssertionError("Expected keyword 'R' >> but found " + r); >> + >> + if(recursivlyRead) { >> + // seek to the object, read it, seek back to current >> location >> + long tempLocation = this.currentOffset; >> + this.currentOffset = >> this.xrefEntries.get(number).getByteOffset(); >> + COSBase returnValue = readObject(number, gen); >> + this.currentOffset = tempLocation; >> + return returnValue; >> + } else { >> + // Put a COSUnknown there as a placeholder >> + COSObject obj = new COSObject(new COSUnread()); >> + obj.setObjectNumber(COSInteger.get(number)); >> + obj.setGenerationNumber(COSInteger.get(gen)); >> + return obj; >> + } >> + } else if(string.startsWith("]")) { >> + // end of an array, just return null >> + if("]".equals(string)) >> + return null; >> + int oldLength = string.length(); >> + this.currentOffset -= oldLength; >> + return null; >> + } else if(string.startsWith("[")) { >> + // array of values >> + // we'll just pay attention to the first part (this is in >> case there >> + // is no whitespace between the "[" and the first element) >> + int oldLength = string.length(); >> + string = "["; >> + this.currentOffset -= (oldLength - string.length() + 1); >> + >> + COSArray array = new COSArray(); >> + COSBase object = readObject(); >> + while(object != null) { >> + array.add(object); >> + object = readObject(); >> + } >> + return array; >> + } else if(string.startsWith("(")) { >> + // this is a string (not hex encoded), strip off the '(' >> and read until ')' >> + StringBuilder sb = new StringBuilder(string.substring(1)); >> + byte singleByte = readByte(); >> + while(singleByte != ')') { >> + sb.append((char)singleByte); >> + singleByte = readByte(); >> + } >> + return new COSString(sb.toString()); >> + } else { >> + throw new RuntimeException("Not yet implemented: " + string >> + + " loation=" + this.currentOffset); >> + } >> + } >> + >> + /** >> + * This will read the next string from the stream. >> + * @return The string that was read from the stream. >> + * @throws IOException If there is an error reading from the stream. >> + */ >> + @Override >> + protected String readString() throws IOException { >> + consumeWhitespace(); >> + StringBuilder buffer = new StringBuilder(); >> + int c = pdfSource.read(); >> + while(!isEndOfName((char)c)&& !isClosing(c)&& c != -1) { >> + buffer.append( (char)c ); >> + c = pdfSource.read(); >> + } >> + if (c != -1) { >> + pdfSource.unread(c); >> + } >> + return buffer.toString(); >> + } >> + >> + protected COSDictionary readDictionaryBackwards() throws >> IOException { >> + COSDictionary dict = new COSDictionary(); >> + >> + // consume the last two '>' chars which signify the end of >> the dictionary >> + consumeWhitespaceBackwards(); >> + byte singleByte = readByteBackwards(); >> + if(throwNonConformingException) { >> + if(singleByte != '>') >> + throw new AssertionError(""); >> + } >> + singleByte = readByteBackwards(); >> + if(throwNonConformingException) { >> + if(singleByte != '>') >> + throw new AssertionError(""); >> + } >> + >> + // check to see if we're at the end of the dictionary >> + boolean atEndOfDictionary = false; >> + singleByte = consumeWhitespaceBackwards(); >> + if(singleByte == '<') { >> + inputFile.seek(currentOffset-1); >> + atEndOfDictionary = ((byte)inputFile.read()) == '<'; >> + } >> + >> + COSDictionary backwardsDictionary = new COSDictionary(); >> + // while we're not at the end of the dictionary, read in entries >> + while(!atEndOfDictionary) { >> + COSBase object = readObjectBackwards(); >> + COSName name = readNameBackwards(); >> + backwardsDictionary.setItem(name, object); >> + >> + singleByte = consumeWhitespaceBackwards(); >> + if(singleByte == '<') { >> + inputFile.seek(currentOffset-1); >> + atEndOfDictionary = ((byte)inputFile.read()) == '<'; >> + } >> + } >> + >> + // the dictionaries preserve the order keys were added, as >> such we shall >> + // add them in the proper order, not the reverse order >> + Set<COSName> backwardsKeys = backwardsDictionary.keySet(); >> + for(int i = backwardsKeys.size()-1; i>=0; i--) >> + dict.setItem((COSName)backwardsKeys.toArray()[i], >> backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i])); >> + >> + // consume the last two '<' chars >> + readByteBackwards(); >> + readByteBackwards(); >> + >> + return dict; >> + } >> + >> + /** >> + * This will read a line starting with the byte at offset and going >> + * backwards until it finds a newline. This should only be used >> if we are >> + * certain that the data will only be text, and not binary data. >> + * >> + * @param offset the location of the file where we should start >> reading >> + * @return the string which was read >> + * @throws IOException if there was an error reading data from >> the file >> + */ >> + protected String readLineBackwards() throws IOException { >> + StringBuilder sb = new StringBuilder(); >> + boolean endOfObject = false; >> + >> + do { >> + // first we read the %%EOF marker >> + byte singleByte = readByteBackwards(); >> + if(singleByte == '\n') { >> + // if ther's a preceeding \r, we'll eat that as well >> + inputFile.seek(currentOffset); >> + if((byte)inputFile.read() == '\r') >> + currentOffset--; >> + endOfObject = true; >> + } else if(singleByte == '\r') { >> + endOfObject = true; >> + } else { >> + sb.insert(0, (char)singleByte); >> + } >> + } while(!endOfObject); >> + >> + return sb.toString(); >> + } >> + >> + /** >> + * This will read a line starting with the byte at offset and going >> + * forward until it finds a newline. This should only be used if >> we are >> + * certain that the data will only be text, and not binary data. >> + * @param offset the location of the file where we should start >> reading >> + * @return the string which was read >> + * @throws IOException if there was an error reading data from >> the file >> + */ >> + @Override >> + protected String readLine() throws IOException { >> + StringBuilder sb = new StringBuilder(); >> + boolean endOfLine = false; >> + >> + do { >> + // first we read the %%EOF marker >> + byte singleByte = readByte(); >> + if(singleByte == '\n') { >> + // if ther's a preceeding \r, we'll eat that as well >> + inputFile.seek(currentOffset); >> + if((byte)inputFile.read() == '\r') >> + currentOffset++; >> + endOfLine = true; >> + } else if(singleByte == '\r') { >> + endOfLine = true; >> + } else { >> + sb.append((char)singleByte); >> + } >> + } while(!endOfLine); >> + >> + return sb.toString(); >> + } >> + >> + protected String readWord() throws IOException { >> + StringBuilder sb = new StringBuilder(); >> + boolean stop = true; >> + do { >> + byte singleByte = readByte(); >> + stop = this.isWhitespace(singleByte); >> + >> + // there are some additional characters which indicate >> the next element/word has begun >> + // ignore the first char we read, b/c the first char is >> the beginnging of this object, not the next one >> + if(!stop&& sb.length()> 0) { >> + stop = singleByte == '/' || singleByte == '[' >> + || singleByte == ']' >> + || (singleByte == '>'&& >> !">".equals(sb.toString())); >> + if(stop) // we're stopping on a non-whitespace char, >> decrement the >> + this.currentOffset--; // counter so we don't miss >> this character >> + } >> + if(!stop) >> + sb.append((char)singleByte); >> + } while(!stop); >> + >> + return sb.toString(); >> + } >> + >> + /** >> + * @return the recursivlyRead >> + */ >> + public boolean isRecursivlyRead() { >> + return recursivlyRead; >> + } >> + >> + /** >> + * @param recursivlyRead the recursivlyRead to set >> + */ >> + public void setRecursivlyRead(boolean recursivlyRead) { >> + this.recursivlyRead = recursivlyRead; >> + } >> +} >> >> Added: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java >> (added) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java >> Fri Jul 1 22:28:23 2011 >> @@ -0,0 +1,115 @@ >> +/* >> + * Copyright 2011 adam. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + * under the License. >> + */ >> + >> +package org.apache.pdfbox.pdmodel; >> + >> +import java.io.File; >> +import java.io.IOException; >> +import java.util.ArrayList; >> +import java.util.HashMap; >> +import java.util.List; >> +import java.util.Map; >> +import org.apache.pdfbox.cos.COSBase; >> +import org.apache.pdfbox.cos.COSDocument; >> +import org.apache.pdfbox.pdfparser.ConformingPDFParser; >> +import org.apache.pdfbox.persistence.util.COSObjectKey; >> + >> +/** >> + * >> + * @author adam >> + */ >> +public class ConformingPDDocument extends PDDocument { >> + /** >> + * Maps ObjectKeys to a COSObject. Note that references to these >> objects >> + * are also stored in COSDictionary objects that map a name to a >> specific object. >> + */ >> + private final Map<COSObjectKey, COSBase> objectPool = >> + new HashMap<COSObjectKey, COSBase>(); >> + private ConformingPDFParser parser = null; >> + >> + public ConformingPDDocument() throws IOException { >> + super(); >> + } >> + >> + public ConformingPDDocument(COSDocument doc) throws IOException { >> + super(doc); >> + } >> + >> + /** >> + * This will load a document from an input stream. >> + * @param input The File which contains the document. >> + * @return The document that was loaded. >> + * @throws IOException If there is an error reading from the stream. >> + */ >> + public static PDDocument load(File input) throws IOException { >> + ConformingPDFParser parser = new ConformingPDFParser(input); >> + parser.parse(); >> + return parser.getPDDocument(); >> + } >> + >> + /** >> + * This will get an object from the pool. >> + * @param key The object key. >> + * @return The object in the pool or a new one if it has not been >> parsed yet. >> + * @throws IOException If there is an error getting the proxy >> object. >> + */ >> + public COSBase getObjectFromPool(COSObjectKey key) throws >> IOException { >> + return objectPool.get(key); >> + } >> + >> + /** >> + * This will get an object from the pool. >> + * @param key The object key. >> + * @return The object in the pool or a new one if it has not been >> parsed yet. >> + * @throws IOException If there is an error getting the proxy >> object. >> + */ >> + public List<COSObjectKey> getObjectKeysFromPool() throws >> IOException { >> + List<COSObjectKey> keys = new ArrayList<COSObjectKey>(); >> + for(COSObjectKey key : objectPool.keySet()) >> + keys.add(key); >> + return keys; >> + } >> + >> + /** >> + * This will get an object from the pool. >> + * @param number the object number >> + * @param generation the generation of this object you wish to load >> + * @return The object in the pool >> + * @throws IOException If there is an error getting the proxy >> object. >> + */ >> + public COSBase getObjectFromPool(long number, long generation) >> throws IOException { >> + return objectPool.get(new COSObjectKey(number, generation)); >> + } >> + >> + public void putObjectInPool(COSBase object, long number, long >> generation) { >> + objectPool.put(new COSObjectKey(number, generation), object); >> + } >> + >> + /** >> + * @return the parser >> + */ >> + public ConformingPDFParser getParser() { >> + return parser; >> + } >> + >> + /** >> + * @param parser the parser to set >> + */ >> + public void setParser(ConformingPDFParser parser) { >> + this.parser = parser; >> + } >> +} >> >> Added: >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java >> (added) >> +++ >> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java >> Fri Jul 1 22:28:23 2011 >> @@ -0,0 +1,43 @@ >> +/* >> + * Copyright 2011 adam. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + * under the License. >> + */ >> + >> +package org.apache.pdfbox.pdmodel.common; >> + >> +/** >> + * >> + * @author adam >> + */ >> +public class XrefEntry { >> + private int objectNumber = 0; >> + private int byteOffset = 0; >> + private int generation = 0; >> + private boolean inUse = true; >> + >> + public XrefEntry() { >> + } >> + >> + public XrefEntry(int objectNumber, int byteOffset, int >> generation, String inUse) { >> + this.objectNumber = objectNumber; >> + this.byteOffset = byteOffset; >> + this.generation = generation; >> + this.inUse = "n".equals(inUse); >> + } >> + >> + public int getByteOffset() { >> + return byteOffset; >> + } >> +} >> >> Added: >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java >> (added) >> +++ >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java >> Fri Jul 1 22:28:23 2011 >> @@ -0,0 +1,73 @@ >> +/* >> + * Copyright 2010 adam. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >> implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + * under the License. >> + */ >> + >> +package org.apache.pdfbox.pdfparser; >> + >> +import java.io.File; >> +import java.net.URL; >> +import org.apache.pdfbox.cos.COSDictionary; >> +import org.junit.After; >> +import org.junit.AfterClass; >> +import org.junit.Before; >> +import org.junit.BeforeClass; >> +import org.junit.Test; >> +import static org.junit.Assert.*; >> + >> +/** >> + * >> + * @author adam >> + */ >> +public class ConformingPDFParserTest { >> + >> + public ConformingPDFParserTest() { >> + } >> + >> + @BeforeClass >> + public static void setUpClass() throws Exception { >> + } >> + >> + @AfterClass >> + public static void tearDownClass() throws Exception { >> + } >> + >> + @Before >> + public void setUp() { >> + } >> + >> + @After >> + public void tearDown() { >> + } >> + >> + /** >> + * Test of parse method, of class ConformingPDFParser. >> + */ >> + @Test >> + public void testParse() throws Exception { >> + URL inputUrl = >> ConformingPDFParser.class.getResource("gdb-refcard.pdf"); >> + File inputFile = new File(inputUrl.toURI()); >> + ConformingPDFParser instance = new >> ConformingPDFParser(inputFile); >> + instance.parse(); >> + >> + COSDictionary trailer = instance.getDocument().getTrailer(); >> + assertNotNull(trailer); >> + System.out.println("Trailer: " + >> instance.getDocument().getTrailer().toString()); >> + assertEquals(3, trailer.size()); >> + assertNotNull(trailer.getDictionaryObject("Root")); >> + assertNotNull(trailer.getDictionaryObject("Info")); >> + assertNotNull(trailer.getDictionaryObject("Size")); >> + } >> +} >> \ No newline at end of file >> >> Modified: >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff >> >> ============================================================================== >> >> --- >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java >> (original) >> +++ >> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java >> Fri Jul 1 22:28:23 2011 >> @@ -16,7 +16,6 @@ >> */ >> package org.apache.pdfbox.pdmodel; >> >> -import java.io.File; >> import junit.framework.TestCase; >> >> public class TestPDDocumentCatalog extends TestCase { >> @@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten >> doc = >> PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf")); >> >> PDDocumentCatalog cat = doc.getDocumentCatalog(); >> // getLabelsByPageIndices() should not throw an exception >> - String[] labels = >> cat.getPageLabels().getLabelsByPageIndices(); >> + cat.getPageLabels().getLabelsByPageIndices(); >> } catch(Exception e) { >> - e.printStackTrace(); >> fail("Threw exception!"); >> } finally { >> if(doc != null) >> doc.close(); >> } >> } >> + >> + /** >> + * Test case for >> + *<a href="https://issues.apache.org/jira/browse/PDFBOX-911" >> + *>PDFBOX-911</a> - Method PDDocument.getNumberOfPages() returns >> wrong >> + * number of pages >> + */ >> + public void testGetNumberOfPages() throws Exception { >> + PDDocument doc = null; >> + try { >> + doc = >> PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf")); >> + assertEquals(4, doc.getNumberOfPages()); >> + } finally { >> + if(doc != null) >> + doc.close(); >> + } >> + } >> } >> >> Added: >> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf >> >> URL: >> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto >> >> ============================================================================== >> >> Binary file - no diff available. >> >> Propchange: >> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf >> >> ------------------------------------------------------------------------------ >> >> svn:mime-type = application/octet-stream >> >> >