Author: tilman
Date: Sun Sep 24 08:56:45 2023
New Revision: 1912512

URL: http://svn.apache.org/viewvc?rev=1912512&view=rev
Log:
PDFBOX-5684: add hash and lastModified to cache file to avoid parsing bad fonts 
twice unless they have changed

Modified:
    
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java?rev=1912512&r1=1912511&r2=1912512&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
 (original)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
 Sun Sep 24 08:56:45 2023
@@ -26,11 +26,15 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.security.AccessControlException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.FontBoxFont;
@@ -45,7 +49,9 @@ import org.apache.fontbox.ttf.TrueTypeCo
 import org.apache.fontbox.ttf.TrueTypeFont;
 import org.apache.fontbox.type1.Type1Font;
 import org.apache.fontbox.util.autodetect.FontFileFinder;
+import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+import org.apache.pdfbox.util.Hex;
 
 /**
  * A FontProvider which searches for fonts on the local filesystem.
@@ -72,11 +78,13 @@ final class FileSystemFontProvider exten
         private final PDPanoseClassification panose;
         private final File file;
         private final FileSystemFontProvider parent;
+        private final String hash;
+        private final long lastModified;
 
         private FSFontInfo(File file, FontFormat format, String postScriptName,
                            CIDSystemInfo cidSystemInfo, int usWeightClass, int 
sFamilyClass,
                            int ulCodePageRange1, int ulCodePageRange2, int 
macStyle, byte[] panose,
-                           FileSystemFontProvider parent)
+                           FileSystemFontProvider parent, String hash, long 
lastModified)
         {
             this.file = file;
             this.format = format;
@@ -90,6 +98,8 @@ final class FileSystemFontProvider exten
             this.panose = panose != null && panose.length >= 
PDPanoseClassification.LENGTH ?
                     new PDPanoseClassification(panose) : null;
             this.parent = parent;
+            this.hash = hash;
+            this.lastModified = lastModified;
         }
 
         @Override
@@ -183,7 +193,7 @@ final class FileSystemFontProvider exten
         @Override
         public String toString()
         {
-            return super.toString() + " " + file;
+            return super.toString() + " " + file + " " + hash + " " + 
lastModified;
         }
 
         private TrueTypeFont getTrueTypeFont(String postScriptName, File file)
@@ -302,15 +312,18 @@ final class FileSystemFontProvider exten
         }
     }
 
-    /**
-     * Represents ignored fonts (i.e. bitmap fonts).
-     */
-    private static final class FSIgnored extends FSFontInfo
+    private FSFontInfo createFSIgnored(File file, FontFormat format, String 
postScriptName)
     {
-        private FSIgnored(File file, FontFormat format, String postScriptName)
+        String hash;
+        try
         {
-            super(file, format, postScriptName, null, 0, 0, 0, 0, 0, null, 
null);
+            hash = computeHash(Files.readAllBytes(file.toPath()));
         }
+        catch (IOException ex)
+        {
+            hash = "";
+        }
+        return new FSFontInfo(file, format, postScriptName, null, 0, 0, 0, 0, 
0, null, null, hash, file.lastModified());
     }
 
     /**
@@ -451,8 +464,8 @@ final class FileSystemFontProvider exten
         if (fontInfo.cidSystemInfo != null)
         {
             writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' +
-                            fontInfo.cidSystemInfo.getOrdering() + '-' +
-                            fontInfo.cidSystemInfo.getSupplement());
+                         fontInfo.cidSystemInfo.getOrdering() + '-' +
+                         fontInfo.cidSystemInfo.getSupplement());
         }
         writer.write("|");
         if (fontInfo.usWeightClass > -1)
@@ -489,6 +502,10 @@ final class FileSystemFontProvider exten
         }
         writer.write("|");
         writer.write(fontInfo.file.getAbsolutePath());
+        writer.write("|");
+        writer.write(fontInfo.hash);
+        writer.write("|");
+        writer.write(Long.toString(fontInfo.file.lastModified()));
         writer.newLine();
     }
 
@@ -506,12 +523,12 @@ final class FileSystemFontProvider exten
         List<FSFontInfo> results = new ArrayList<>();
         
         // Get the disk cache
-        File file = null;
+        File diskCacheFile = null;
         boolean fileExists = false;
         try
         {
-            file = getDiskCacheFile();
-            fileExists = file.exists();
+            diskCacheFile = getDiskCacheFile();
+            fileExists = diskCacheFile.exists();
         }
         catch (SecurityException e)
         {
@@ -520,12 +537,12 @@ final class FileSystemFontProvider exten
 
         if (fileExists)
         {
-            try (BufferedReader reader = new BufferedReader(new 
FileReader(file)))
+            try (BufferedReader reader = new BufferedReader(new 
FileReader(diskCacheFile)))
             {
                 String line;
                 while ((line = reader.readLine()) != null)
                 {
-                    String[] parts = line.split("\\|", 10);
+                    String[] parts = line.split("\\|", 12);
                     if (parts.length < 10)
                     {
                         LOG.warn("Incorrect line '" + line + "' in font disk 
cache is skipped");
@@ -542,6 +559,8 @@ final class FileSystemFontProvider exten
                     int macStyle = -1;
                     byte[] panose = null;
                     File fontFile;
+                    String hash = "";
+                    long lastModified = 0;
                     
                     postScriptName = parts[0];
                     format = FontFormat.valueOf(parts[1]);
@@ -575,12 +594,42 @@ final class FileSystemFontProvider exten
                         }
                     }
                     fontFile = new File(parts[9]);
+                    if (parts.length >= 12 && !parts[10].isEmpty() && 
!parts[11].isEmpty())
+                    {
+                        hash = parts[10];
+                        lastModified = Long.parseLong(parts[11]);
+                    }
                     if (fontFile.exists())
                     {
-                        FSFontInfo info = new FSFontInfo(fontFile, format, 
postScriptName,
-                                cidSystemInfo, usWeightClass, sFamilyClass, 
ulCodePageRange1,
-                                ulCodePageRange2, macStyle, panose, this);
-                        results.add(info);
+                        boolean keep = false;
+                        // if the file exists, find out whether it's the same 
file.
+                        // first check whether time is different and if yes, 
whether hash is different
+                        if (fontFile.lastModified() != lastModified)
+                        {
+                            String newHash = 
computeHash(Files.readAllBytes(fontFile.toPath()));
+                            if (newHash.equals(hash))
+                            {
+                                keep = true;
+                                lastModified = fontFile.lastModified();
+                                hash = newHash;
+                            }
+                        }
+                        else
+                        {
+                            keep = true;
+                        }
+                        if (keep)
+                        {
+                            FSFontInfo info = new FSFontInfo(fontFile, format, 
postScriptName,
+                                    cidSystemInfo, usWeightClass, 
sFamilyClass, ulCodePageRange1,
+                                    ulCodePageRange2, macStyle, panose, this, 
hash, lastModified);
+                            results.add(info);
+                        }
+                        else
+                        {
+                            LOG.debug("Font file " + 
fontFile.getAbsolutePath() + " is different");
+                            continue; // don't remove from "pending"
+                        }
                     }
                     else
                     {
@@ -599,7 +648,7 @@ final class FileSystemFontProvider exten
         if (!pending.isEmpty())
         {
             // re-build the entire cache if we encounter un-cached fonts 
(could be optimised)
-            LOG.warn("New fonts found, font cache will be re-built");
+            LOG.warn(pending.size() + " new fonts found, font cache will be 
re-built");
             return null;
         }
         
@@ -626,16 +675,19 @@ final class FileSystemFontProvider exten
      */
     private void addTrueTypeFont(File ttfFile) throws IOException
     {
+        FontFormat fontFormat = null;
         try
         {
             if (ttfFile.getPath().toLowerCase().endsWith(".otf"))
             {
+                fontFormat = FontFormat.OTF;
                 OTFParser parser = new OTFParser(false);
                 OpenTypeFont otf = parser.parse(new 
RandomAccessReadBufferedFile(ttfFile));
                 addTrueTypeFontImpl(otf, ttfFile);
             }
             else
             {
+                fontFormat = FontFormat.TTF;
                 TTFParser parser = new TTFParser(false);
                 TrueTypeFont ttf = parser.parse(new 
RandomAccessReadBufferedFile(ttfFile));
                 addTrueTypeFontImpl(ttf, ttfFile);
@@ -644,6 +696,7 @@ final class FileSystemFontProvider exten
         catch (IOException e)
         {
             LOG.warn("Could not load font file: " + ttfFile, e);
+            fontInfoList.add(createFSIgnored(ttfFile, fontFormat, 
"*skipexception*"));
         }
     }
 
@@ -657,7 +710,7 @@ final class FileSystemFontProvider exten
             // read PostScript name, if any
             if (ttf.getName() != null && ttf.getName().contains("|"))
             {
-                fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
"*skippipeinname*"));
+                fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skippipeinname*"));
                 LOG.warn("Skipping font with '|' in name " + ttf.getName() + " 
in file " + file);
             }
             else if (ttf.getName() != null)
@@ -665,7 +718,7 @@ final class FileSystemFontProvider exten
                 // ignore bitmap fonts
                 if (ttf.getHeader() == null)
                 {
-                    fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
ttf.getName()));
+                    fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
ttf.getName()));
                     return;
                 }
                 int macStyle = ttf.getHeader().getMacStyle();
@@ -686,6 +739,12 @@ final class FileSystemFontProvider exten
                     panose = os2WindowsMetricsTable.getPanose();
                 }
 
+                byte[] ba;
+                try (InputStream is = ttf.getOriginalData())
+                {
+                    ba = IOUtils.toByteArray(is);
+                }
+                String hash = computeHash(ba);
                 String format;
                 if (ttf instanceof OpenTypeFont && ((OpenTypeFont) 
ttf).isPostScript())
                 {
@@ -706,7 +765,7 @@ final class FileSystemFontProvider exten
                     }
                     fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, 
ttf.getName(), ros,
                             usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
-                            macStyle, panose, this));
+                            macStyle, panose, this, hash, 
file.lastModified()));
                 }
                 else
                 {
@@ -726,7 +785,7 @@ final class FileSystemFontProvider exten
                     format = "TTF";
                     fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, 
ttf.getName(), ros,
                             usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
-                            macStyle, panose, this));
+                            macStyle, panose, this, hash, 
file.lastModified()));
                 }
 
                 if (LOG.isTraceEnabled())
@@ -742,13 +801,13 @@ final class FileSystemFontProvider exten
             }
             else
             {
-                fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
"*skipnoname*"));
+                fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skipnoname*"));
                 LOG.warn("Missing 'name' entry for PostScript name in font " + 
file);
             }
         }
         catch (IOException e)
         {
-            fontInfoList.add(new FSIgnored(file, FontFormat.TTF, 
"*skipexception*"));
+            fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skipexception*"));
             LOG.warn("Could not load font file: " + file, e);
         }
         finally
@@ -767,18 +826,19 @@ final class FileSystemFontProvider exten
             Type1Font type1 = Type1Font.createWithPFB(input);
             if (type1.getName() == null)
             {
-                fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, 
"*skipnoname*"));
+                fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, 
"*skipnoname*"));
                 LOG.warn("Missing 'name' entry for PostScript name in font " + 
pfbFile);
                 return;
             }
             if (type1.getName().contains("|"))
             {
-                fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, 
"*skippipeinname*"));
+                fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, 
"*skippipeinname*"));
                 LOG.warn("Skipping font with '|' in name " + type1.getName() + 
" in file " + pfbFile);
                 return;
             }
+            String hash = computeHash(Files.readAllBytes(pfbFile.toPath()));
             fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, 
type1.getName(),
-                                            null, -1, -1, 0, 0, -1, null, 
this));
+                                            null, -1, -1, 0, 0, -1, null, 
this, hash, pfbFile.lastModified()));
 
             if (LOG.isTraceEnabled())
             {
@@ -813,4 +873,20 @@ final class FileSystemFontProvider exten
     {
         return fontInfoList;
     }
+
+    private static String computeHash(byte[] ba)
+    {
+        MessageDigest md;
+        try
+        {
+            md = MessageDigest.getInstance("SHA512");
+            byte[] md5 = md.digest(ba);
+            return Hex.getString(md5);
+        }
+        catch (NoSuchAlgorithmException ex)
+        {
+            // never happens
+            return "";
+        }
+    }
 }


Reply via email to