Author: lehmi
Date: Sun Oct 14 11:26:09 2012
New Revision: 1398055
URL: http://svn.apache.org/viewvc?rev=1398055&view=rev
Log:
PDFBOX-1408: use the toUnicodeMap to determine the width of the space character
as proposed by Juraj Lonc
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java?rev=1398055&r1=1398054&r2=1398055&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java
(original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java Sun
Oct 14 11:26:09 2012
@@ -51,6 +51,9 @@ public class CMap
private final Map<String,Integer> char2CIDMappings = new
HashMap<String,Integer>();
private final List<CIDRange> cidRanges = new LinkedList<CIDRange>();
+ private static final String SPACE = " ";
+ private int spaceMapping = -1;
+
/**
* Creates a new instance of CMap.
*/
@@ -132,13 +135,19 @@ public class CMap
*
* @return The string that matches the lookup.
*/
- public String lookupCID(int cid) {
- if (cid2charMappings.containsKey(cid)) {
+ public String lookupCID(int cid)
+ {
+ if (cid2charMappings.containsKey(cid))
+ {
return cid2charMappings.get(cid);
- } else {
- for (CIDRange range : cidRanges) {
+ }
+ else
+ {
+ for (CIDRange range : cidRanges)
+ {
int ch = range.unmap(cid);
- if (ch != -1) {
+ if (ch != -1)
+ {
return Character.toString((char) ch);
}
}
@@ -150,18 +159,27 @@ public class CMap
* This will perform a lookup into the CID map.
*
* @param code The code used to lookup.
+ * @param offset the offset into the array.
+ * @param length the length of the subarray.
*
* @return The CID that matches the lookup.
*/
- public int lookupCID(byte[] code, int offset, int length) {
- if (isInCodeSpaceRanges(code,offset,length)) {
+ public int lookupCID(byte[] code, int offset, int length)
+ {
+ if (isInCodeSpaceRanges(code,offset,length))
+ {
int codeAsInt = getCodeFromArray(code, offset, length);
- if (char2CIDMappings.containsKey(codeAsInt)) {
+ if (char2CIDMappings.containsKey(codeAsInt))
+ {
return char2CIDMappings.get(codeAsInt);
- } else {
- for (CIDRange range : cidRanges) {
+ }
+ else
+ {
+ for (CIDRange range : cidRanges)
+ {
int ch = range.map((char)codeAsInt);
- if (ch != -1) {
+ if (ch != -1)
+ {
return ch;
}
}
@@ -202,13 +220,17 @@ public class CMap
int srcLength = src.length;
int intSrc = getCodeFromArray(src, 0, srcLength);
+ if ( SPACE.equals(dest) )
+ {
+ spaceMapping = intSrc;
+ }
if( srcLength == 1 )
{
singleByteMappings.put( intSrc, dest );
}
else if( srcLength == 2 )
{
- doubleByteMappings.put( intSrc , dest );
+ doubleByteMappings.put( intSrc, dest );
}
else
{
@@ -233,11 +255,13 @@ public class CMap
/**
* This will add a CID Range.
*
- * @param src The CID Range to be added.
- * @param dest The starting cid.
+ * @param from starting charactor of the CID range.
+ * @param to ending character of the CID range.
+ * @param cid the cid to be started with.
*
*/
- public void addCIDRange(char from, char to, int cid) {
+ public void addCIDRange(char from, char to, int cid)
+ {
cidRanges.add(0, new CIDRange(from, to, cid));
}
@@ -314,7 +338,7 @@ public class CMap
*
* 0 represents a horizontal and 1 represents a vertical orientation.
*
- * @return
+ * @return the wmode
*/
public int getWMode()
{
@@ -450,4 +474,14 @@ public class CMap
{
supplement = newSupplement;
}
+
+ /**
+ * Returns the mapping for the space character.
+ *
+ * @return the mapped code for the space character
+ */
+ public int getSpaceMapping()
+ {
+ return spaceMapping;
+ }
}
\ No newline at end of file
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1398055&r1=1398054&r2=1398055&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
Sun Oct 14 11:26:09 2012
@@ -86,7 +86,7 @@ public abstract class PDFont implements
protected CMap cmap = null;
/**
- * The CMap holding the ToUnicode mapping
+ * The CMap holding the ToUnicode mapping.
*/
protected CMap toUnicodeCmap = null;
@@ -128,8 +128,8 @@ public abstract class PDFont implements
return metrics;
}
- protected final static String resourceRootCMAP =
"org/apache/pdfbox/resources/cmap/";
- private final static String resourceRootAFM =
"org/apache/pdfbox/resources/afm/";
+ protected static final String resourceRootCMAP =
"org/apache/pdfbox/resources/cmap/";
+ private static final String resourceRootAFM =
"org/apache/pdfbox/resources/afm/";
private static void addAdobeFontMetric(
Map<String, FontMetric> metrics, String name )
@@ -459,7 +459,7 @@ public abstract class PDFont implements
/**
* Set the encoding object from the fonts dictionary.
- * @param encoding the given encoding.
+ * @param encodingValue the given encoding.
*/
protected void setEncoding(COSBase encodingValue)
{
@@ -537,8 +537,9 @@ public abstract class PDFont implements
public int encodeToCID( byte[] c, int offset, int length ) throws
IOException
{
int code = -1;
- if (encode(c, offset, length) != null) {
- code = getCodeFromArray( c, offset, length );
+ if (encode(c, offset, length) != null)
+ {
+ code = getCodeFromArray( c, offset, length );
}
return code;
}
@@ -779,7 +780,7 @@ public abstract class PDFont implements
/**
* Set the widths of the characters code.
*
- * @param widths The widths of the character codes.
+ * @param widthsList The widths of the character codes.
*/
public void setWidths( List<Float> widthsList )
{
@@ -883,11 +884,17 @@ public abstract class PDFont implements
/**
* Sets hasToUnicode to the given value.
- * @param hasToUnicode the given value for hasToUnicode
+ * @param hasToUnicodeValue the given value for hasToUnicode
*/
protected void setHasToUnicode(boolean hasToUnicodeValue)
{
hasToUnicode = hasToUnicodeValue;
}
-
+
+ /**
+ * Determines the width of the space character.
+ * @return the width of the space character
+ */
+ public abstract float getSpaceWitdh();
+
}
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1398055&r1=1398054&r2=1398055&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
Sun Oct 14 11:26:09 2012
@@ -60,7 +60,11 @@ public abstract class PDSimpleFont exten
private float avgFontWidth = 0.0f;
private float avgFontHeight = 0.0f;
-
+ private float fontWidthOfSpace = -1f;
+
+ private static final byte[] SPACE_BYTES = { (byte)32 };
+
+
/**
* Log instance.
*/
@@ -484,13 +488,62 @@ public abstract class PDSimpleFont exten
}
private boolean isFontSubstituted = false;
+
+ /**
+ * This will get the value for isFontSubstituted, which indicates
+ * if the font was substituted due to a problem with the embedded one.
+ *
+ * @return true if the font was substituted
+ */
protected boolean isFontSubstituted()
{
return isFontSubstituted;
}
+ /**
+ * This will set the value for isFontSubstituted.
+ *
+ * @param isSubstituted true if the font was substituted
+ */
protected void setIsFontSubstituted(boolean isSubstituted)
{
isFontSubstituted = isSubstituted;
}
+
+ /**
+ * {@inheritDoc}
+ */
+ public float getSpaceWitdh()
+ {
+ if (fontWidthOfSpace == -1f)
+ {
+ COSBase toUnicode = getToUnicode();
+ try
+ {
+ if (toUnicode != null)
+ {
+ int spaceMapping = toUnicodeCmap.getSpaceMapping();
+ if (spaceMapping > -1)
+ {
+ fontWidthOfSpace = getFontWidth(spaceMapping);
+ }
+ else
+ {
+ fontWidthOfSpace = getAverageFontWidth();
+ }
+ }
+ else
+ {
+ fontWidthOfSpace = getFontWidth( SPACE_BYTES, 0, 1 );
+ }
+ }
+ catch (Exception e)
+ {
+ LOG.error("Can't determine the width of the space character
using 250 as default", e);
+ fontWidthOfSpace = 250f;
+ }
+ }
+ return fontWidthOfSpace;
+ }
+
}
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1398055&r1=1398054&r2=1398055&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
Sun Oct 14 11:26:09 2012
@@ -73,8 +73,6 @@ public class PDFStreamEngine
*/
private final Set<String> unsupportedOperators = new HashSet<String>();
- private static final byte[] SPACE_BYTES = { (byte)32 };
-
private PDGraphicsState graphicsState = null;
private Matrix textMatrix = null;
@@ -354,7 +352,7 @@ public class PDFStreamEngine
{
// to avoid crash as described in PDFBOX-614
// lets see what the space displacement should be
- spaceWidthText = (font.getFontWidth( SPACE_BYTES, 0, 1
)*glyphSpaceToTextSpaceFactor);
+ spaceWidthText =
(font.getSpaceWitdh()*glyphSpaceToTextSpaceFactor);
}
catch (Throwable exception)
{