Author: lehmi
Date: Thu May  8 15:49:41 2025
New Revision: 1925467

URL: http://svn.apache.org/viewvc?rev=1925467&view=rev
Log:
PDFBOX-5992: skip either a line break (CR, LF or CRLF) or any one-byte 
whitespace at the beginning of an inline image

Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1925467&r1=1925466&r2=1925467&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java 
Thu May  8 15:49:41 2025
@@ -402,9 +402,9 @@ public abstract class BaseParser
     }
 
     /**
-     * Skip the upcoming CRLF or LF which are supposed to follow a stream.
+     * Skip the upcoming CRLF or LF which are supposed to follow a stream. 
Trailing spaces are removed as well.
      * 
-     * @throws IOException
+     * @throws IOException if something went wrong
      */
     protected void skipWhiteSpaces() throws IOException
     {
@@ -418,24 +418,55 @@ public abstract class BaseParser
         {
             whitespace = source.read();
         }
+        if (!skipLinebreak(whitespace))
+        {
+            source.rewind(1);
+        }
+    }
 
-        if (isCR(whitespace))
+    /**
+     * Skip one line break, such as CR, LF or CRLF.
+     * 
+     * @return true if a line break was found and removed.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected boolean skipLinebreak() throws IOException
+    {
+        // a line break is a CR, or LF or CRLF
+        if (!skipLinebreak(source.read()))
         {
-            whitespace = source.read();
-            if (!isLF(whitespace))
+            source.rewind(1);
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Skip one line break, such as CR, LF or CRLF.
+     * 
+     * @param linebreak the first character to be checked.
+     * 
+     * @return true if a line break was found and removed.
+     * 
+     * @throws IOException if something went wrong
+     */
+    private boolean skipLinebreak(int linebreak) throws IOException
+    {
+        // a line break is a CR, or LF or CRLF
+        if (isCR(linebreak))
+        {
+            int next = source.read();
+            if (!isLF(next))
             {
                 source.rewind(1);
-                //The spec says this is invalid but it happens in the real
-                //world so we must support it.
             }
         }
-        else if (!isLF(whitespace))
+        else if (!isLF(linebreak))
         {
-            //we are in an error.
-            //but again we will do a lenient parsing and just assume that 
everything
-            //is fine
-            source.rewind(1);
+            return false;
         }
+        return true;
     }
 
     /**

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1925467&r1=1925466&r2=1925467&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Thu May  8 15:49:41 2025
@@ -273,9 +273,10 @@ public class PDFStreamParser extends Bas
                             "' at stream offset " + currentPosition);
                 }
                 ByteArrayOutputStream imageData = new ByteArrayOutputStream();
-                if( isWhitespace() )
+                // skip one line break (CR, LF or CRLF) or any one-byte 
whitespace
+                if (!skipLinebreak() && isWhitespace())
                 {
-                    //pull off the whitespace character
+                    // pull off the whitespace character
                     source.read();
                 }
                 int lastByte = source.read();


Reply via email to