svn commit: r1575426 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Author: tilman Date: Fri Mar 7 22:02:25 2014 New Revision: 1575426 URL: http://svn.apache.org/r1575426 Log: PDFBOX-1164: add heuristic by Timo Boehme to detect wrongly assumed end of inline image Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1575426r1=1575425r2=1575426view=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Fri Mar 7 22:02:25 2014 @@ -19,6 +19,7 @@ package org.apache.pdfbox.pdfparser; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.PushbackInputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -46,6 +47,8 @@ public class PDFStreamParser extends Bas { private ListObject streamObjects = new ArrayListObject( 100 ); private RandomAccess file; +private final intmaxBinCharTestLength = 5; +private final byte[] binCharTestArr = new byte[maxBinCharTestLength]; /** * Constructor that takes a stream to parse. @@ -391,10 +394,11 @@ public class PDFStreamParser extends Bas // PDF spec is kinda unclear about this. Should a whitespace // always appear before EI? Not sure, so that we just read // until EIwhitespace. -// Be aware not all kind of whitespaces are allowed here. see PDFBOX1561 +// Be aware not all kind of whitespaces are allowed here. see PDFBOX-1561 while( !(lastByte == 'E' currentByte == 'I' - isSpaceOrReturn()) + isSpaceOrReturn() + hasNoFollowingBinData( pdfSource )) !pdfSource.isEOF() ) { imageData.write( lastByte ); @@ -436,6 +440,37 @@ public class PDFStreamParser extends Bas } /** + * Looks up next 5 bytes if they contain only ASCII characters (no control + * sequences etc.). + * + * @return codetrue/code if next 5 bytes are printable ASCII characters, + * otherwise codefalse/code + */ +private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource) +throws IOException +{ +// as suggested in PDFBOX-1164 +final int readBytes = pdfSource.read(binCharTestArr, 0, maxBinCharTestLength); +boolean noBinData = true; + +if (readBytes 0) +{ +for (int bIdx = 0; bIdx readBytes; bIdx++) +{ +final byte b = binCharTestArr[bIdx]; +if ((b 0x09) || ((b 0x0a) (b 0x20) (b != 0x0d))) +{ +// control character or 0x7f - we have binary data +noBinData = false; +break; +} +} +pdfSource.unread(binCharTestArr, 0, readBytes); +} +return noBinData; +} + +/** * This will read an operator from the stream. * * @return The operator that was read from the stream.
svn commit: r1575427 - /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Author: tilman Date: Fri Mar 7 22:03:59 2014 New Revision: 1575427 URL: http://svn.apache.org/r1575427 Log: PDFBOX-1164: add heuristic by Timo Boehme to detect wrongly assumed end of inline image Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1575427r1=1575426r2=1575427view=diff == --- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original) +++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Fri Mar 7 22:03:59 2014 @@ -19,6 +19,7 @@ package org.apache.pdfbox.pdfparser; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.PushbackInputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -47,6 +48,8 @@ public class PDFStreamParser extends Bas { private ListObject streamObjects = new ArrayListObject( 100 ); private RandomAccess file; +private final intmaxBinCharTestLength = 5; +private final byte[] binCharTestArr = new byte[maxBinCharTestLength]; /** * Constructor that takes a stream to parse. @@ -393,7 +396,8 @@ public class PDFStreamParser extends Bas // Be aware not all kind of whitespaces are allowed here. see PDFBOX1561 while( !(lastByte == 'E' currentByte == 'I' - isSpaceOrReturn()) + isSpaceOrReturn() + hasNoFollowingBinData( pdfSource )) !pdfSource.isEOF() ) { imageData.write( lastByte ); @@ -435,6 +439,37 @@ public class PDFStreamParser extends Bas } /** + * Looks up next 5 bytes if they contain only ASCII characters (no control + * sequences etc.). + * + * @return codetrue/code if next 5 bytes are printable ASCII characters, + * otherwise codefalse/code + */ +private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource) +throws IOException +{ +// as suggested in PDFBOX-1164 +final int readBytes = pdfSource.read(binCharTestArr, 0, maxBinCharTestLength); +boolean noBinData = true; + +if (readBytes 0) +{ +for (int bIdx = 0; bIdx readBytes; bIdx++) +{ +final byte b = binCharTestArr[bIdx]; +if ((b 0x09) || ((b 0x0a) (b 0x20) (b != 0x0d))) +{ +// control character or 0x7f - we have binary data +noBinData = false; +break; +} +} +pdfSource.unread(binCharTestArr, 0, readBytes); +} +return noBinData; +} + +/** * This will read an operator from the stream. * * @return The operator that was read from the stream.
svn commit: r1575475 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java
Author: jahewson Date: Sat Mar 8 03:18:16 2014 New Revision: 1575475 URL: http://svn.apache.org/r1575475 Log: PDFBOX-1966: toShadingPaint and toTilingPaint should be private Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java?rev=1575475r1=1575474r2=1575475view=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java Sat Mar 8 03:18:16 2014 @@ -135,7 +135,7 @@ public final class PDPattern extends PDS } } -public Paint toTilingPaint(PDFRenderer renderer, PDTilingPattern tilingPattern, PDColor color) +private Paint toTilingPaint(PDFRenderer renderer, PDTilingPattern tilingPattern, PDColor color) throws IOException { if (tilingPattern.getPaintType() == PDTilingPattern.PAINT_COLORED) @@ -150,7 +150,7 @@ public final class PDPattern extends PDS } } -public Paint toShadingPaint(PDShadingPattern shadingPattern, int pageHeight) throws IOException +private Paint toShadingPaint(PDShadingPattern shadingPattern, int pageHeight) throws IOException { PDShading shadingResources = shadingPattern.getShading(); int shadingType = shadingResources != null ? shadingResources.getShadingType() : 0;