svn commit: r1575426 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

2014-03-07 Thread tilman
Author: tilman
Date: Fri Mar  7 22:02:25 2014
New Revision: 1575426

URL: http://svn.apache.org/r1575426
Log:
PDFBOX-1164: add heuristic by Timo Boehme to detect wrongly assumed end of 
inline image 

Modified:

pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1575426r1=1575425r2=1575426view=diff
==
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Fri Mar  7 22:02:25 2014
@@ -19,6 +19,7 @@ package org.apache.pdfbox.pdfparser;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -46,6 +47,8 @@ public class PDFStreamParser extends Bas
 {
 private ListObject streamObjects = new ArrayListObject( 100 );
 private RandomAccess file;
+private final intmaxBinCharTestLength = 5;
+private final byte[] binCharTestArr = new byte[maxBinCharTestLength];
 
 /**
  * Constructor that takes a stream to parse.
@@ -391,10 +394,11 @@ public class PDFStreamParser extends Bas
 // PDF spec is kinda unclear about this. Should a whitespace
 // always appear before EI? Not sure, so that we just read
 // until EIwhitespace.
-// Be aware not all kind of whitespaces are allowed here. see 
PDFBOX1561
+// Be aware not all kind of whitespaces are allowed here. see 
PDFBOX-1561
 while( !(lastByte == 'E' 
  currentByte == 'I' 
- isSpaceOrReturn()) 
+ isSpaceOrReturn() 
+ hasNoFollowingBinData( pdfSource )) 
!pdfSource.isEOF() )
 {
 imageData.write( lastByte );
@@ -436,6 +440,37 @@ public class PDFStreamParser extends Bas
 }
 
 /**
+ * Looks up next 5 bytes if they contain only ASCII characters (no control
+ * sequences etc.).
+ *
+ * @return codetrue/code if next 5 bytes are printable ASCII 
characters,
+ * otherwise codefalse/code
+ */
+private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource) 
+throws IOException
+{
+// as suggested in PDFBOX-1164
+final int readBytes = pdfSource.read(binCharTestArr, 0, 
maxBinCharTestLength);
+boolean noBinData = true;
+
+if (readBytes  0)
+{
+for (int bIdx = 0; bIdx  readBytes; bIdx++)
+{
+final byte b = binCharTestArr[bIdx];
+if ((b  0x09) || ((b  0x0a)  (b  0x20)  (b != 0x0d)))
+{
+// control character or  0x7f - we have binary data
+noBinData = false;
+break;
+}
+}
+pdfSource.unread(binCharTestArr, 0, readBytes);
+}
+return noBinData;
+}
+
+/**
  * This will read an operator from the stream.
  *
  * @return The operator that was read from the stream.




svn commit: r1575427 - /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

2014-03-07 Thread tilman
Author: tilman
Date: Fri Mar  7 22:03:59 2014
New Revision: 1575427

URL: http://svn.apache.org/r1575427
Log:
PDFBOX-1164: add heuristic by Timo Boehme to detect wrongly assumed end of 
inline image

Modified:

pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1575427r1=1575426r2=1575427view=diff
==
--- 
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Fri Mar  7 22:03:59 2014
@@ -19,6 +19,7 @@ package org.apache.pdfbox.pdfparser;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -47,6 +48,8 @@ public class PDFStreamParser extends Bas
 {
 private ListObject streamObjects = new ArrayListObject( 100 );
 private RandomAccess file;
+private final intmaxBinCharTestLength = 5;
+private final byte[] binCharTestArr = new byte[maxBinCharTestLength];
 
 /**
  * Constructor that takes a stream to parse.
@@ -393,7 +396,8 @@ public class PDFStreamParser extends Bas
 // Be aware not all kind of whitespaces are allowed here. see 
PDFBOX1561
 while( !(lastByte == 'E' 
  currentByte == 'I' 
- isSpaceOrReturn()) 
+ isSpaceOrReturn() 
+ hasNoFollowingBinData( pdfSource )) 
!pdfSource.isEOF() )
 {
 imageData.write( lastByte );
@@ -435,6 +439,37 @@ public class PDFStreamParser extends Bas
 }
 
 /**
+ * Looks up next 5 bytes if they contain only ASCII characters (no control
+ * sequences etc.).
+ *
+ * @return codetrue/code if next 5 bytes are printable ASCII 
characters,
+ * otherwise codefalse/code
+ */
+private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource) 
+throws IOException
+{
+// as suggested in PDFBOX-1164
+final int readBytes = pdfSource.read(binCharTestArr, 0, 
maxBinCharTestLength);
+boolean noBinData = true;
+
+if (readBytes  0)
+{
+for (int bIdx = 0; bIdx  readBytes; bIdx++)
+{
+final byte b = binCharTestArr[bIdx];
+if ((b  0x09) || ((b  0x0a)  (b  0x20)  (b != 0x0d)))
+{
+// control character or  0x7f - we have binary data
+noBinData = false;
+break;
+}
+}
+pdfSource.unread(binCharTestArr, 0, readBytes);
+}
+return noBinData;
+}
+
+/**
  * This will read an operator from the stream.
  *
  * @return The operator that was read from the stream.




svn commit: r1575475 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java

2014-03-07 Thread jahewson
Author: jahewson
Date: Sat Mar  8 03:18:16 2014
New Revision: 1575475

URL: http://svn.apache.org/r1575475
Log:
PDFBOX-1966: toShadingPaint and toTilingPaint should be private

Modified:

pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java?rev=1575475r1=1575474r2=1575475view=diff
==
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java
 Sat Mar  8 03:18:16 2014
@@ -135,7 +135,7 @@ public final class PDPattern extends PDS
 }
 }
 
-public Paint toTilingPaint(PDFRenderer renderer, PDTilingPattern 
tilingPattern, PDColor color)
+private Paint toTilingPaint(PDFRenderer renderer, PDTilingPattern 
tilingPattern, PDColor color)
 throws IOException
 {
 if (tilingPattern.getPaintType() == PDTilingPattern.PAINT_COLORED)
@@ -150,7 +150,7 @@ public final class PDPattern extends PDS
 }
 }
 
-public Paint toShadingPaint(PDShadingPattern shadingPattern, int 
pageHeight) throws IOException
+private Paint toShadingPaint(PDShadingPattern shadingPattern, int 
pageHeight) throws IOException
 {
 PDShading shadingResources = shadingPattern.getShading();
 int shadingType = shadingResources != null ? 
shadingResources.getShadingType() : 0;