Author: lehmi Date: Wed Oct 22 17:10:57 2014 New Revision: 1633653 URL: http://svn.apache.org/r1633653 Log: PDFBOX-2441: added check/repair of xref stream offset, rearrange some code
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1633653&r1=1633652&r2=1633653&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Wed Oct 22 17:10:57 2014 @@ -369,14 +369,11 @@ public class NonSequentialPDFParser exte long startXrefOffset = document.getStartXref(); // check the startxref offset - if (isLenient) + long fixedOffset = checkXRefOffset(startXrefOffset); + if (fixedOffset > -1) { - long fixedOffset = checkXRefOffset(startXrefOffset); - if (fixedOffset > -1) - { - startXrefOffset = fixedOffset; - } - document.setStartXref(startXrefOffset); + startXrefOffset = fixedOffset; + document.setStartXref(startXrefOffset); } long prev = startXrefOffset; // ---- parse whole chain of xref tables/object streams using PREV @@ -417,15 +414,22 @@ public class NonSequentialPDFParser exte if(trailer.containsKey(COSName.XREF_STM)) { int streamOffset = trailer.getInt(COSName.XREF_STM); + // check the xref stream reference + fixedOffset = checkXRefOffset(streamOffset); + if (fixedOffset > -1 && fixedOffset != streamOffset) + { + streamOffset = (int)fixedOffset; + trailer.setInt(COSName.XREF_STM, streamOffset); + } setPdfSource(streamOffset); skipSpaces(); parseXrefObjStream(prev, false); } prev = trailer.getInt(COSName.PREV); - if (isLenient && prev > -1) + if (prev > -1) { // check the xref table reference - long fixedOffset = checkXRefOffset(prev); + fixedOffset = checkXRefOffset(prev); if (fixedOffset > -1 && fixedOffset != prev) { prev = fixedOffset; @@ -437,10 +441,10 @@ public class NonSequentialPDFParser exte { // parse xref stream prev = parseXrefObjStream(prev, true); - if (isLenient && prev > -1) + if (prev > -1) { // check the xref table reference - long fixedOffset = checkXRefOffset(prev); + fixedOffset = checkXRefOffset(prev); if (fixedOffset > -1 && fixedOffset != prev) { prev = fixedOffset; @@ -457,10 +461,7 @@ public class NonSequentialPDFParser exte document.setTrailer(trailer); document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType()); // check the offsets of all referenced objects - if (isLenient) - { - checkXrefOffsets(); - } + checkXrefOffsets(); // ---- prepare encryption if necessary COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); @@ -1682,6 +1683,11 @@ public class NonSequentialPDFParser exte */ private long checkXRefOffset(long startXRefOffset) throws IOException { + // repair mode isn't available in non-lenient mode + if (!isLenient) + { + return startXRefOffset; + } setPdfSource(startXRefOffset); if (pdfSource.peek() == X && checkBytesAtOffset(XREF_TABLE)) { @@ -1778,6 +1784,11 @@ public class NonSequentialPDFParser exte */ private void checkXrefOffsets() throws IOException { + // repair mode isn't available in non-lenient mode + if (!isLenient) + { + return; + } Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable(); if (xrefOffset != null) {