Author: tilman Date: Sun Sep 24 08:56:45 2023 New Revision: 1912512 URL: http://svn.apache.org/viewvc?rev=1912512&view=rev Log: PDFBOX-5684: add hash and lastModified to cache file to avoid parsing bad fonts twice unless they have changed
Modified: pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java Modified: pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java?rev=1912512&r1=1912511&r2=1912512&view=diff ============================================================================== --- pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java (original) +++ pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java Sun Sep 24 08:56:45 2023 @@ -26,11 +26,15 @@ import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.security.AccessControlException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; @@ -45,7 +49,9 @@ import org.apache.fontbox.ttf.TrueTypeCo import org.apache.fontbox.ttf.TrueTypeFont; import org.apache.fontbox.type1.Type1Font; import org.apache.fontbox.util.autodetect.FontFileFinder; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessReadBufferedFile; +import org.apache.pdfbox.util.Hex; /** * A FontProvider which searches for fonts on the local filesystem. @@ -72,11 +78,13 @@ final class FileSystemFontProvider exten private final PDPanoseClassification panose; private final File file; private final FileSystemFontProvider parent; + private final String hash; + private final long lastModified; private FSFontInfo(File file, FontFormat format, String postScriptName, CIDSystemInfo cidSystemInfo, int usWeightClass, int sFamilyClass, int ulCodePageRange1, int ulCodePageRange2, int macStyle, byte[] panose, - FileSystemFontProvider parent) + FileSystemFontProvider parent, String hash, long lastModified) { this.file = file; this.format = format; @@ -90,6 +98,8 @@ final class FileSystemFontProvider exten this.panose = panose != null && panose.length >= PDPanoseClassification.LENGTH ? new PDPanoseClassification(panose) : null; this.parent = parent; + this.hash = hash; + this.lastModified = lastModified; } @Override @@ -183,7 +193,7 @@ final class FileSystemFontProvider exten @Override public String toString() { - return super.toString() + " " + file; + return super.toString() + " " + file + " " + hash + " " + lastModified; } private TrueTypeFont getTrueTypeFont(String postScriptName, File file) @@ -302,15 +312,18 @@ final class FileSystemFontProvider exten } } - /** - * Represents ignored fonts (i.e. bitmap fonts). - */ - private static final class FSIgnored extends FSFontInfo + private FSFontInfo createFSIgnored(File file, FontFormat format, String postScriptName) { - private FSIgnored(File file, FontFormat format, String postScriptName) + String hash; + try { - super(file, format, postScriptName, null, 0, 0, 0, 0, 0, null, null); + hash = computeHash(Files.readAllBytes(file.toPath())); } + catch (IOException ex) + { + hash = ""; + } + return new FSFontInfo(file, format, postScriptName, null, 0, 0, 0, 0, 0, null, null, hash, file.lastModified()); } /** @@ -451,8 +464,8 @@ final class FileSystemFontProvider exten if (fontInfo.cidSystemInfo != null) { writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' + - fontInfo.cidSystemInfo.getOrdering() + '-' + - fontInfo.cidSystemInfo.getSupplement()); + fontInfo.cidSystemInfo.getOrdering() + '-' + + fontInfo.cidSystemInfo.getSupplement()); } writer.write("|"); if (fontInfo.usWeightClass > -1) @@ -489,6 +502,10 @@ final class FileSystemFontProvider exten } writer.write("|"); writer.write(fontInfo.file.getAbsolutePath()); + writer.write("|"); + writer.write(fontInfo.hash); + writer.write("|"); + writer.write(Long.toString(fontInfo.file.lastModified())); writer.newLine(); } @@ -506,12 +523,12 @@ final class FileSystemFontProvider exten List<FSFontInfo> results = new ArrayList<>(); // Get the disk cache - File file = null; + File diskCacheFile = null; boolean fileExists = false; try { - file = getDiskCacheFile(); - fileExists = file.exists(); + diskCacheFile = getDiskCacheFile(); + fileExists = diskCacheFile.exists(); } catch (SecurityException e) { @@ -520,12 +537,12 @@ final class FileSystemFontProvider exten if (fileExists) { - try (BufferedReader reader = new BufferedReader(new FileReader(file))) + try (BufferedReader reader = new BufferedReader(new FileReader(diskCacheFile))) { String line; while ((line = reader.readLine()) != null) { - String[] parts = line.split("\\|", 10); + String[] parts = line.split("\\|", 12); if (parts.length < 10) { LOG.warn("Incorrect line '" + line + "' in font disk cache is skipped"); @@ -542,6 +559,8 @@ final class FileSystemFontProvider exten int macStyle = -1; byte[] panose = null; File fontFile; + String hash = ""; + long lastModified = 0; postScriptName = parts[0]; format = FontFormat.valueOf(parts[1]); @@ -575,12 +594,42 @@ final class FileSystemFontProvider exten } } fontFile = new File(parts[9]); + if (parts.length >= 12 && !parts[10].isEmpty() && !parts[11].isEmpty()) + { + hash = parts[10]; + lastModified = Long.parseLong(parts[11]); + } if (fontFile.exists()) { - FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, - cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, - ulCodePageRange2, macStyle, panose, this); - results.add(info); + boolean keep = false; + // if the file exists, find out whether it's the same file. + // first check whether time is different and if yes, whether hash is different + if (fontFile.lastModified() != lastModified) + { + String newHash = computeHash(Files.readAllBytes(fontFile.toPath())); + if (newHash.equals(hash)) + { + keep = true; + lastModified = fontFile.lastModified(); + hash = newHash; + } + } + else + { + keep = true; + } + if (keep) + { + FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, + cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, + ulCodePageRange2, macStyle, panose, this, hash, lastModified); + results.add(info); + } + else + { + LOG.debug("Font file " + fontFile.getAbsolutePath() + " is different"); + continue; // don't remove from "pending" + } } else { @@ -599,7 +648,7 @@ final class FileSystemFontProvider exten if (!pending.isEmpty()) { // re-build the entire cache if we encounter un-cached fonts (could be optimised) - LOG.warn("New fonts found, font cache will be re-built"); + LOG.warn(pending.size() + " new fonts found, font cache will be re-built"); return null; } @@ -626,16 +675,19 @@ final class FileSystemFontProvider exten */ private void addTrueTypeFont(File ttfFile) throws IOException { + FontFormat fontFormat = null; try { if (ttfFile.getPath().toLowerCase().endsWith(".otf")) { + fontFormat = FontFormat.OTF; OTFParser parser = new OTFParser(false); OpenTypeFont otf = parser.parse(new RandomAccessReadBufferedFile(ttfFile)); addTrueTypeFontImpl(otf, ttfFile); } else { + fontFormat = FontFormat.TTF; TTFParser parser = new TTFParser(false); TrueTypeFont ttf = parser.parse(new RandomAccessReadBufferedFile(ttfFile)); addTrueTypeFontImpl(ttf, ttfFile); @@ -644,6 +696,7 @@ final class FileSystemFontProvider exten catch (IOException e) { LOG.warn("Could not load font file: " + ttfFile, e); + fontInfoList.add(createFSIgnored(ttfFile, fontFormat, "*skipexception*")); } } @@ -657,7 +710,7 @@ final class FileSystemFontProvider exten // read PostScript name, if any if (ttf.getName() != null && ttf.getName().contains("|")) { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skippipeinname*")); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skippipeinname*")); LOG.warn("Skipping font with '|' in name " + ttf.getName() + " in file " + file); } else if (ttf.getName() != null) @@ -665,7 +718,7 @@ final class FileSystemFontProvider exten // ignore bitmap fonts if (ttf.getHeader() == null) { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, ttf.getName())); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, ttf.getName())); return; } int macStyle = ttf.getHeader().getMacStyle(); @@ -686,6 +739,12 @@ final class FileSystemFontProvider exten panose = os2WindowsMetricsTable.getPanose(); } + byte[] ba; + try (InputStream is = ttf.getOriginalData()) + { + ba = IOUtils.toByteArray(is); + } + String hash = computeHash(ba); String format; if (ttf instanceof OpenTypeFont && ((OpenTypeFont) ttf).isPostScript()) { @@ -706,7 +765,7 @@ final class FileSystemFontProvider exten } fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, ttf.getName(), ros, usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this)); + macStyle, panose, this, hash, file.lastModified())); } else { @@ -726,7 +785,7 @@ final class FileSystemFontProvider exten format = "TTF"; fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, ttf.getName(), ros, usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this)); + macStyle, panose, this, hash, file.lastModified())); } if (LOG.isTraceEnabled()) @@ -742,13 +801,13 @@ final class FileSystemFontProvider exten } else { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipnoname*")); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skipnoname*")); LOG.warn("Missing 'name' entry for PostScript name in font " + file); } } catch (IOException e) { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipexception*")); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skipexception*")); LOG.warn("Could not load font file: " + file, e); } finally @@ -767,18 +826,19 @@ final class FileSystemFontProvider exten Type1Font type1 = Type1Font.createWithPFB(input); if (type1.getName() == null) { - fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, "*skipnoname*")); + fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, "*skipnoname*")); LOG.warn("Missing 'name' entry for PostScript name in font " + pfbFile); return; } if (type1.getName().contains("|")) { - fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, "*skippipeinname*")); + fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, "*skippipeinname*")); LOG.warn("Skipping font with '|' in name " + type1.getName() + " in file " + pfbFile); return; } + String hash = computeHash(Files.readAllBytes(pfbFile.toPath())); fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, type1.getName(), - null, -1, -1, 0, 0, -1, null, this)); + null, -1, -1, 0, 0, -1, null, this, hash, pfbFile.lastModified())); if (LOG.isTraceEnabled()) { @@ -813,4 +873,20 @@ final class FileSystemFontProvider exten { return fontInfoList; } + + private static String computeHash(byte[] ba) + { + MessageDigest md; + try + { + md = MessageDigest.getInstance("SHA512"); + byte[] md5 = md.digest(ba); + return Hex.getString(md5); + } + catch (NoSuchAlgorithmException ex) + { + // never happens + return ""; + } + } }