Author: lehmi
Date: Thu May 8 15:49:41 2025
New Revision: 1925467
URL: http://svn.apache.org/viewvc?rev=1925467&view=rev
Log:
PDFBOX-5992: skip either a line break (CR, LF or CRLF) or any one-byte
whitespace at the beginning of an inline image
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1925467&r1=1925466&r2=1925467&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Thu May 8 15:49:41 2025
@@ -402,9 +402,9 @@ public abstract class BaseParser
}
/**
- * Skip the upcoming CRLF or LF which are supposed to follow a stream.
+ * Skip the upcoming CRLF or LF which are supposed to follow a stream.
Trailing spaces are removed as well.
*
- * @throws IOException
+ * @throws IOException if something went wrong
*/
protected void skipWhiteSpaces() throws IOException
{
@@ -418,24 +418,55 @@ public abstract class BaseParser
{
whitespace = source.read();
}
+ if (!skipLinebreak(whitespace))
+ {
+ source.rewind(1);
+ }
+ }
- if (isCR(whitespace))
+ /**
+ * Skip one line break, such as CR, LF or CRLF.
+ *
+ * @return true if a line break was found and removed.
+ *
+ * @throws IOException if something went wrong
+ */
+ protected boolean skipLinebreak() throws IOException
+ {
+ // a line break is a CR, or LF or CRLF
+ if (!skipLinebreak(source.read()))
{
- whitespace = source.read();
- if (!isLF(whitespace))
+ source.rewind(1);
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Skip one line break, such as CR, LF or CRLF.
+ *
+ * @param linebreak the first character to be checked.
+ *
+ * @return true if a line break was found and removed.
+ *
+ * @throws IOException if something went wrong
+ */
+ private boolean skipLinebreak(int linebreak) throws IOException
+ {
+ // a line break is a CR, or LF or CRLF
+ if (isCR(linebreak))
+ {
+ int next = source.read();
+ if (!isLF(next))
{
source.rewind(1);
- //The spec says this is invalid but it happens in the real
- //world so we must support it.
}
}
- else if (!isLF(whitespace))
+ else if (!isLF(linebreak))
{
- //we are in an error.
- //but again we will do a lenient parsing and just assume that
everything
- //is fine
- source.rewind(1);
+ return false;
}
+ return true;
}
/**
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1925467&r1=1925466&r2=1925467&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Thu May 8 15:49:41 2025
@@ -273,9 +273,10 @@ public class PDFStreamParser extends Bas
"' at stream offset " + currentPosition);
}
ByteArrayOutputStream imageData = new ByteArrayOutputStream();
- if( isWhitespace() )
+ // skip one line break (CR, LF or CRLF) or any one-byte
whitespace
+ if (!skipLinebreak() && isWhitespace())
{
- //pull off the whitespace character
+ // pull off the whitespace character
source.read();
}
int lastByte = source.read();