Author: tilman
Date: Sun Sep 24 08:56:49 2023
New Revision: 1912513

URL: http://svn.apache.org/viewvc?rev=1912513&view=rev
Log:
PDFBOX-5684: add hash and lastModified to cache file to avoid parsing bad fonts 
twice unless they have changed

Modified:
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java?rev=1912513&r1=1912512&r2=1912513&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
 Sun Sep 24 08:56:49 2023
@@ -26,10 +26,13 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
 import java.security.AccessControlException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.FontBoxFont;
@@ -47,6 +50,7 @@ import org.apache.fontbox.type1.Type1Fon
 import org.apache.fontbox.util.autodetect.FontFileFinder;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
 
 /**
  * A FontProvider which searches for fonts on the local filesystem.
@@ -73,11 +77,13 @@ final class FileSystemFontProvider exten
         private final PDPanoseClassification panose;
         private final File file;
         private final FileSystemFontProvider parent;
+        private final String hash;
+        private final long lastModified;
 
         private FSFontInfo(File file, FontFormat format, String postScriptName,
                            CIDSystemInfo cidSystemInfo, int usWeightClass, int 
sFamilyClass,
                            int ulCodePageRange1, int ulCodePageRange2, int 
macStyle, byte[] panose,
-                           FileSystemFontProvider parent)
+                           FileSystemFontProvider parent, String hash, long 
lastModified)
         {
             this.file = file;
             this.format = format;
@@ -91,6 +97,8 @@ final class FileSystemFontProvider exten
             this.panose = panose != null && panose.length >= 
PDPanoseClassification.LENGTH ?
                     new PDPanoseClassification(panose) : null;
             this.parent = parent;
+            this.hash = hash;
+            this.lastModified = lastModified;
         }
 
         @Override
@@ -184,7 +192,7 @@ final class FileSystemFontProvider exten
         @Override
         public String toString()
         {
-            return super.toString() + " " + file;
+            return super.toString() + " " + file + " " + hash + " " + 
lastModified;
         }
 
         private TrueTypeFont getTrueTypeFont(String postScriptName, File file)
@@ -309,15 +317,18 @@ final class FileSystemFontProvider exten
         }
     }
 
-    /**
-     * Represents ignored fonts (i.e. bitmap fonts).
-     */
-    private static final class FSIgnored extends FSFontInfo
+    private FSFontInfo createFSIgnored(File file, FontFormat format, String 
postScriptName)
     {
-        private FSIgnored(File file, FontFormat format, String postScriptName)
+        String hash;
+        try
         {
-            super(file, format, postScriptName, null, 0, 0, 0, 0, 0, null, 
null);
+            hash = computeHash(readAllBytes(file));
         }
+        catch (IOException ex)
+        {
+            hash = "";
+        }
+        return new FSFontInfo(file, format, postScriptName, null, 0, 0, 0, 0, 
0, null, null, hash, file.lastModified());
     }
 
     /**
@@ -437,52 +448,7 @@ final class FileSystemFontProvider exten
 
             for (FSFontInfo fontInfo : fontInfoList)
             {
-                writer.write(fontInfo.postScriptName.trim());
-                writer.write("|");
-                writer.write(fontInfo.format.toString());
-                writer.write("|");
-                if (fontInfo.cidSystemInfo != null)
-                {
-                    writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' +
-                                 fontInfo.cidSystemInfo.getOrdering() + '-' +
-                                 fontInfo.cidSystemInfo.getSupplement());
-                }
-                writer.write("|");
-                if (fontInfo.usWeightClass > -1)
-                {
-                    writer.write(Integer.toHexString(fontInfo.usWeightClass));
-                }
-                writer.write("|");
-                if (fontInfo.sFamilyClass > -1)
-                {
-                    writer.write(Integer.toHexString(fontInfo.sFamilyClass));
-                }
-                writer.write("|");
-                writer.write(Integer.toHexString(fontInfo.ulCodePageRange1));
-                writer.write("|");
-                writer.write(Integer.toHexString(fontInfo.ulCodePageRange2));
-                writer.write("|");
-                if (fontInfo.macStyle > -1)
-                {
-                    writer.write(Integer.toHexString(fontInfo.macStyle));
-                }
-                writer.write("|");
-                if (fontInfo.panose != null)
-                {
-                    byte[] bytes = fontInfo.panose.getBytes();
-                    for (int i = 0; i < 10; i ++)
-                    {
-                        String str = Integer.toHexString(bytes[i]);
-                        if (str.length() == 1)
-                        {
-                            writer.write('0');
-                        }
-                        writer.write(str);
-                    }
-                }
-                writer.write("|");
-                writer.write(fontInfo.file.getAbsolutePath());
-                writer.newLine();
+                writeFontInfo(writer, fontInfo);
             }
         }
         catch (IOException e)
@@ -497,6 +463,60 @@ final class FileSystemFontProvider exten
         }
     }
 
+    private void writeFontInfo(BufferedWriter writer, FSFontInfo fontInfo) 
throws IOException
+    {
+        writer.write(fontInfo.postScriptName.trim());
+        writer.write("|");
+        writer.write(fontInfo.format.toString());
+        writer.write("|");
+        if (fontInfo.cidSystemInfo != null)
+        {
+            writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' +
+                         fontInfo.cidSystemInfo.getOrdering() + '-' +
+                         fontInfo.cidSystemInfo.getSupplement());
+        }
+        writer.write("|");
+        if (fontInfo.usWeightClass > -1)
+        {
+            writer.write(Integer.toHexString(fontInfo.usWeightClass));
+        }
+        writer.write("|");
+        if (fontInfo.sFamilyClass > -1)
+        {
+            writer.write(Integer.toHexString(fontInfo.sFamilyClass));
+        }
+        writer.write("|");
+        writer.write(Integer.toHexString(fontInfo.ulCodePageRange1));
+        writer.write("|");
+        writer.write(Integer.toHexString(fontInfo.ulCodePageRange2));
+        writer.write("|");
+        if (fontInfo.macStyle > -1)
+        {
+            writer.write(Integer.toHexString(fontInfo.macStyle));
+        }
+        writer.write("|");
+        if (fontInfo.panose != null)
+        {
+            byte[] bytes = fontInfo.panose.getBytes();
+            for (int i = 0; i < 10; i ++)
+            {
+                String str = Integer.toHexString(bytes[i]);
+                if (str.length() == 1)
+                {
+                    writer.write('0');
+                }
+                writer.write(str);
+            }
+        }
+        writer.write("|");
+        writer.write(fontInfo.file.getAbsolutePath());
+        writer.write("|");
+        writer.write(fontInfo.hash);
+        writer.write("|");
+        writer.write(Long.toString(fontInfo.file.lastModified()));
+        writer.newLine();
+    }
+
     /**
      * Loads the font metadata cache from disk.
      */
@@ -511,15 +531,16 @@ final class FileSystemFontProvider exten
         List<FSFontInfo> results = new ArrayList<FSFontInfo>();
 
         // Get the disk cache
-        File file = null;
+        File diskCacheFile = null;
         boolean fileExists = false;
         try
         {
-            file = getDiskCacheFile();
-            fileExists = file.exists();
+            diskCacheFile = getDiskCacheFile();
+            fileExists = diskCacheFile.exists();
         }
         catch (SecurityException e)
         {
+            LOG.debug("Error checking for file existence", e);
         }
         
         if (fileExists)
@@ -527,11 +548,11 @@ final class FileSystemFontProvider exten
             BufferedReader reader = null;
             try
             {
-                reader = new BufferedReader(new FileReader(file));
+                reader = new BufferedReader(new FileReader(diskCacheFile));
                 String line;
                 while ((line = reader.readLine()) != null)
                 {
-                    String[] parts = line.split("\\|", 10);
+                    String[] parts = line.split("\\|", 12);
                     if (parts.length < 10)
                     {
                         LOG.warn("Incorrect line '" + line + "' in font disk 
cache is skipped");
@@ -548,6 +569,8 @@ final class FileSystemFontProvider exten
                     int macStyle = -1;
                     byte[] panose = null;
                     File fontFile;
+                    String hash = "";
+                    long lastModified = 0;
                     
                     postScriptName = parts[0];
                     format = FontFormat.valueOf(parts[1]);
@@ -581,12 +604,42 @@ final class FileSystemFontProvider exten
                         }
                     }
                     fontFile = new File(parts[9]);
+                    if (parts.length >= 12 && !parts[10].isEmpty() && 
!parts[11].isEmpty())
+                    {
+                        hash = parts[10];
+                        lastModified = Long.parseLong(parts[11]);
+                    }
                     if (fontFile.exists())
                     {
-                        FSFontInfo info = new FSFontInfo(fontFile, format, 
postScriptName,
-                                cidSystemInfo, usWeightClass, sFamilyClass, 
ulCodePageRange1,
-                                ulCodePageRange2, macStyle, panose, this);
-                        results.add(info);
+                        boolean keep = false;
+                        // if the file exists, find out whether it's the same 
file.
+                        // first check whether time is different and if yes, 
whether hash is different
+                        if (fontFile.lastModified() != lastModified)
+                        {
+                            String newHash = 
computeHash(readAllBytes(fontFile));
+                            if (newHash.equals(hash))
+                            {
+                                keep = true;
+                                lastModified = fontFile.lastModified();
+                                hash = newHash;
+                            }
+                        }
+                        else
+                        {
+                            keep = true;
+                        }
+                        if (keep)
+                        {
+                            FSFontInfo info = new FSFontInfo(fontFile, format, 
postScriptName,
+                                    cidSystemInfo, usWeightClass, 
sFamilyClass, ulCodePageRange1,
+                                    ulCodePageRange2, macStyle, panose, this, 
hash, lastModified);
+                            results.add(info);
+                        }
+                        else
+                        {
+                            LOG.debug("Font file " + 
fontFile.getAbsolutePath() + " is different");
+                            continue; // don't remove from "pending"
+                        }
                     }
                     else
                     {
@@ -609,7 +662,7 @@ final class FileSystemFontProvider exten
         if (!pending.isEmpty())
         {
             // re-build the entire cache if we encounter un-cached fonts 
(could be optimised)
-            LOG.warn("New fonts found, font cache will be re-built");
+            LOG.warn(pending.size() + " new fonts found, font cache will be 
re-built");
             return null;
         }
         
@@ -652,16 +705,19 @@ final class FileSystemFontProvider exten
      */
     private void addTrueTypeFont(File ttfFile) throws IOException
     {
+        FontFormat fontFormat = null;
         try
         {
             if (ttfFile.getPath().toLowerCase().endsWith(".otf"))
             {
+                fontFormat = FontFormat.OTF;
                 OTFParser parser = new OTFParser(false, true);
                 OpenTypeFont otf = parser.parse(ttfFile);
                 addTrueTypeFontImpl(otf, ttfFile);
             }
             else
             {
+                fontFormat = FontFormat.TTF;
                 TTFParser parser = new TTFParser(false, true);
                 TrueTypeFont ttf = parser.parse(ttfFile);
                 addTrueTypeFontImpl(ttf, ttfFile);
@@ -670,6 +726,7 @@ final class FileSystemFontProvider exten
         catch (IOException e)
         {
             LOG.warn("Could not load font file: " + ttfFile, e);
+            fontInfoList.add(createFSIgnored(ttfFile, fontFormat, 
"*skipexception*"));
         }
     }
 
@@ -683,7 +740,7 @@ final class FileSystemFontProvider exten
             // read PostScript name, if any
             if (ttf.getName() != null && ttf.getName().contains("|"))
             {
-                fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
"*skippipeinname*"));
+                fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skippipeinname*"));
                 LOG.warn("Skipping font with '|' in name " + ttf.getName() + " 
in file " + file);
             }
             else if (ttf.getName() != null)
@@ -691,7 +748,7 @@ final class FileSystemFontProvider exten
                 // ignore bitmap fonts
                 if (ttf.getHeader() == null)
                 {
-                    fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
ttf.getName()));
+                    fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
ttf.getName()));
                     return;
                 }
                 int macStyle = ttf.getHeader().getMacStyle();
@@ -712,6 +769,11 @@ final class FileSystemFontProvider exten
                     panose = os2WindowsMetricsTable.getPanose();
                 }
 
+                InputStream is = ttf.getOriginalData();
+                byte[] ba = IOUtils.toByteArray(is);
+                is.close();
+                String hash = computeHash(ba);
+
                 String format;
                 if (ttf instanceof OpenTypeFont && ((OpenTypeFont) 
ttf).isPostScript())
                 {
@@ -732,7 +794,7 @@ final class FileSystemFontProvider exten
                     }
                     fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, 
ttf.getName(), ros,
                             usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
-                            macStyle, panose, this));
+                            macStyle, panose, this, hash, 
file.lastModified()));
                 }
                 else
                 {
@@ -752,7 +814,7 @@ final class FileSystemFontProvider exten
                     format = "TTF";
                     fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, 
ttf.getName(), ros,
                             usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
-                            macStyle, panose, this));
+                            macStyle, panose, this, hash, 
file.lastModified()));
                 }
 
                 if (LOG.isTraceEnabled())
@@ -768,13 +830,13 @@ final class FileSystemFontProvider exten
             }
             else
             {
-                fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
"*skipnoname*"));
+                fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skipnoname*"));
                 LOG.warn("Missing 'name' entry for PostScript name in font " + 
file);
             }
         }
         catch (IOException e)
         {
-            fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
"*skipexception*"));
+            fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skipexception*"));
             LOG.warn("Could not load font file: " + file, e);
         }
         finally
@@ -794,18 +856,19 @@ final class FileSystemFontProvider exten
             Type1Font type1 = Type1Font.createWithPFB(input);
             if (type1.getName() == null)
             {
-                fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, 
"*skipnoname*"));
+                fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, 
"*skipnoname*"));
                 LOG.warn("Missing 'name' entry for PostScript name in font " + 
pfbFile);
                 return;
             }
             if (type1.getName().contains("|"))
             {
-                fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, 
"*skippipeinname*"));
+                fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, 
"*skippipeinname*"));
                 LOG.warn("Skipping font with '|' in name " + type1.getName() + 
" in file " + pfbFile);
                 return;
             }
+            String hash = computeHash(readAllBytes(pfbFile));
             fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, 
type1.getName(),
-                                            null, -1, -1, 0, 0, -1, null, 
this));
+                                            null, -1, -1, 0, 0, -1, null, 
this, hash, pfbFile.lastModified()));
 
             if (LOG.isTraceEnabled())
             {
@@ -844,4 +907,34 @@ final class FileSystemFontProvider exten
     {
         return fontInfoList;
     }
+
+    private static byte[] readAllBytes(File file) throws IOException
+    {
+        InputStream is = null;
+        try
+        {
+            is = new FileInputStream(file);
+            return IOUtils.toByteArray(is);
+        }
+        finally
+        {
+            IOUtils.closeQuietly(is);
+        }
+    }
+
+    private static String computeHash(byte[] ba)
+    {
+        MessageDigest md;
+        try
+        {
+            md = MessageDigest.getInstance("SHA512");
+            byte[] md5 = md.digest(ba);
+            return Hex.getString(md5);
+        }
+        catch (NoSuchAlgorithmException ex)
+        {
+            // never happens
+            return "";
+        }
+    }
 }


Reply via email to