Author: lehmi
Date: Fri May 9 05:56:35 2025
New Revision: 1925470
URL: http://svn.apache.org/viewvc?rev=1925470&view=rev
Log:
PDFBOX-5992: skip either a line break (CR, LF or CRLF) or any one-byte
whitespace at the beginning of an inline image
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1925470&r1=1925469&r2=1925470&view=diff
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
(original)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Fri May 9 05:56:35 2025
@@ -390,6 +390,11 @@ public abstract class BaseParser
return true;
}
+ /**
+ * Skip the upcoming CRLF or LF which are supposed to follow a stream.
Trailing spaces are removed as well.
+ *
+ * @throws IOException if something went wrong
+ */
protected void skipWhiteSpaces() throws IOException
{
//PDF Ref 3.2.7 A stream must be followed by either
@@ -404,24 +409,55 @@ public abstract class BaseParser
{
whitespace = source.read();
}
+ if (!skipLinebreak(whitespace))
+ {
+ source.rewind(1);
+ }
+ }
- if (ASCII_CR == whitespace)
+ /**
+ * Skip one line break, such as CR, LF or CRLF.
+ *
+ * @return true if a line break was found and removed.
+ *
+ * @throws IOException if something went wrong
+ */
+ protected boolean skipLinebreak() throws IOException
+ {
+ // a line break is a CR, or LF or CRLF
+ if (!skipLinebreak(source.read()))
{
- whitespace = source.read();
- if (ASCII_LF != whitespace)
+ source.rewind(1);
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Skip one line break, such as CR, LF or CRLF.
+ *
+ * @param linebreak the first character to be checked.
+ *
+ * @return true if a line break was found and removed.
+ *
+ * @throws IOException if something went wrong
+ */
+ private boolean skipLinebreak(int linebreak) throws IOException
+ {
+ // a line break is a CR, or LF or CRLF
+ if (isCR(linebreak))
+ {
+ int next = source.read();
+ if (!isLF(next))
{
source.rewind(1);
- //The spec says this is invalid but it happens in the real
- //world so we must support it.
}
}
- else if (ASCII_LF != whitespace)
+ else if (!isLF(linebreak))
{
- //we are in an error.
- //but again we will do a lenient parsing and just assume that
everything
- //is fine
- source.rewind(1);
+ return false;
}
+ return true;
}
/**
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1925470&r1=1925469&r2=1925470&view=diff
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
(original)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Fri May 9 05:56:35 2025
@@ -272,9 +272,10 @@ public class PDFStreamParser extends Bas
"' at stream offset " + currentPosition);
}
ByteArrayOutputStream imageData = new ByteArrayOutputStream();
- if( isWhitespace() )
+ // skip one line break (CR, LF or CRLF) or any one-byte
whitespace
+ if (!skipLinebreak() && isWhitespace())
{
- //pull off the whitespace character
+ // pull off the whitespace character
source.read();
}
int lastByte = source.read();