Author: lehmi
Date: Sun Apr 20 11:11:57 2014
New Revision: 1588737
URL: http://svn.apache.org/r1588737
Log:
PDFBOX-2035: be more lenient when parsing CMaps
Modified:
pdfbox/branches/1.8/ (props changed)
pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapTest
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
Propchange: pdfbox/branches/1.8/
------------------------------------------------------------------------------
Merged /pdfbox/trunk:r1588736
Modified:
pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1588737&r1=1588736&r2=1588737&view=diff
==============================================================================
---
pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
(original)
+++
pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
Sun Apr 20 11:11:57 2014
@@ -49,6 +49,7 @@ public class CMapParser
private static final String END_BASE_FONT_RANGE = "endbfrange";
private static final String END_CID_CHAR = "endcidchar";
private static final String END_CID_RANGE = "endcidrange";
+ private static final String END_CMAP = "endcmap";
private static final String WMODE = "WMode";
private static final String CMAP_NAME = "CMapName";
@@ -131,6 +132,11 @@ public class CMapParser
CMap useCMap = parse(resourceRoot, useStream);
result.useCmap(useCMap);
}
+ else if (op.op.equals(END_CMAP))
+ {
+ // end of CMap reached, stop reading as there isn't any
interesting info anymore
+ break;
+ }
else if (op.op.equals(BEGIN_CODESPACE_RANGE))
{
Number cosCount = (Number) previousToken;
@@ -482,7 +488,9 @@ public class CMapParser
{
intValue = 10 + theNextByte - 'a';
}
- else if (theNextByte == 0x20)
+ // all kind of whitespaces may occur in malformed CMap
files
+ // see PDFBOX-2035
+ else if (isWhitespaceOrEOF(theNextByte))
{
// skipping whitespaces
theNextByte = is.read();
@@ -569,11 +577,17 @@ public class CMapParser
buffer.append((char) nextByte);
nextByte = is.read();
- while (!isWhitespaceOrEOF(nextByte))
+ // newline separator may be missing in malformed CMap files
+ // see PDFBOX-2035
+ while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
{
buffer.append((char) nextByte);
nextByte = is.read();
}
+ if (nextByte == '<')
+ {
+ is.unread(nextByte);
+ }
retval = new Operator(buffer.toString());
break;
Modified:
pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1588737&r1=1588736&r2=1588737&view=diff
==============================================================================
---
pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
(original)
+++
pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
Sun Apr 20 11:11:57 2014
@@ -19,13 +19,13 @@ package org.apache.fontbox.cmap;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.util.Arrays;
import junit.framework.TestCase;
/**
* This will test the CMapParser implementation.
*
- * @version $Revision$
*/
public class TestCMapParser extends TestCase
{
@@ -42,33 +42,42 @@ public class TestCMapParser extends Test
CMapParser parser = new CMapParser();
CMap cMap = parser.parse( resourceDir, new FileInputStream(new
File(inDir,"CMapTest")));
+ // code space range
+ assertEquals("codeSpaceRanges size", 1,
cMap.getCodeSpaceRanges().size());
+ final byte[] expectedStart = {0, 0}; // 00 00
+ final byte[] expectedEnd = {2, -1}; // 02 FF
+ final byte[] actualStart = cMap.getCodeSpaceRanges().get(0).getStart();
+ final byte[] actualEnd = cMap.getCodeSpaceRanges().get(0).getEnd();
+ assertTrue("codeSpaceRange start", Arrays.equals(expectedStart,
actualStart));
+ assertTrue("codeSpaceRange end", Arrays.equals(expectedEnd,
actualEnd));
+
// char mappings
byte[] bytes1 = {0,1};
- assertTrue("A".equals(cMap.lookup(bytes1, 0, 2)));
+ assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A",
cMap.lookup(bytes1, 0, 2));
byte[] bytes2 = {1,00};
String str2 = "0";
- assertTrue(str2.equals(cMap.lookup(bytes2, 0, 2)));
+ assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2,
cMap.lookup(bytes2, 0, 2));
byte[] bytes3 = {0,10};
String str3 = "*";
- assertTrue(str3.equals(cMap.lookup(bytes3, 0, 2)));
+ assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3,
cMap.lookup(bytes3, 0, 2));
byte[] bytes4 = {1,10};
String str4 = "+";
- assertTrue(str4.equals(cMap.lookup(bytes4, 0, 2)));
+ assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4,
cMap.lookup(bytes4, 0, 2));
// CID mappings
int cid1 = 65;
- assertTrue("A".equals(cMap.lookupCID(cid1)));
+ assertEquals("CID 65 from cidrange <0000> <00ff> 0 ", "A",
cMap.lookupCID(cid1));
int cid2 = 280;
String strCID2 = "\u0118";
- assertTrue(strCID2.equals(cMap.lookupCID(cid2)));
+ assertEquals("CID 280 from cidrange <0100> <01ff> 256", strCID2,
cMap.lookupCID(cid2));
int cid3 = 520;
String strCID3 = "\u0208";
- assertTrue(strCID3.equals(cMap.lookupCID(cid3)));
+ assertEquals("CID 520 from cidchar <0208> 520", strCID3,
cMap.lookupCID(cid3));
}
}
Modified: pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapTest
URL:
http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapTest?rev=1588737&r1=1588736&r2=1588737&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapTest (original)
+++ pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapTest Sun Apr 20
11:11:57 2014
@@ -18,8 +18,7 @@
%%
%%EndComments
-1 begincodespacerange
- <0000> <02FF>
+1 begincodespacerange<0000> <02FF>
endcodespacerange
2 beginbfchar
Modified:
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1588737&r1=1588736&r2=1588737&view=diff
==============================================================================
---
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
(original)
+++
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
Sun Apr 20 11:11:57 2014
@@ -25,6 +25,8 @@ import java.util.List;
import java.util.Map;
import org.apache.fontbox.afm.AFMParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.cmap.CMapParser;
import org.apache.fontbox.cmap.CMap;
@@ -59,6 +61,11 @@ public abstract class PDFont implements
{
/**
+ * Log instance.
+ */
+ private static final Log LOG = LogFactory.getLog(PDFont.class);
+
+ /**
* The cos dictionary for this font.
*/
protected COSDictionary font;
@@ -606,7 +613,10 @@ public abstract class PDFont implements
cmapObjects.put( targetCmap.getName(), targetCmap );
}
}
- catch (IOException exception) {}
+ catch (IOException exception)
+ {
+ LOG.error("An error occurs while reading a CMap", exception);
+ }
}
return targetCmap;
}