Author: tilman Date: Sun Sep 24 08:56:49 2023 New Revision: 1912513 URL: http://svn.apache.org/viewvc?rev=1912513&view=rev Log: PDFBOX-5684: add hash and lastModified to cache file to avoid parsing bad fonts twice unless they have changed
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java?rev=1912513&r1=1912512&r2=1912513&view=diff ============================================================================== --- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java (original) +++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java Sun Sep 24 08:56:49 2023 @@ -26,10 +26,13 @@ import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.security.AccessControlException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; @@ -47,6 +50,7 @@ import org.apache.fontbox.type1.Type1Fon import org.apache.fontbox.util.autodetect.FontFileFinder; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.util.Charsets; +import org.apache.pdfbox.util.Hex; /** * A FontProvider which searches for fonts on the local filesystem. @@ -73,11 +77,13 @@ final class FileSystemFontProvider exten private final PDPanoseClassification panose; private final File file; private final FileSystemFontProvider parent; + private final String hash; + private final long lastModified; private FSFontInfo(File file, FontFormat format, String postScriptName, CIDSystemInfo cidSystemInfo, int usWeightClass, int sFamilyClass, int ulCodePageRange1, int ulCodePageRange2, int macStyle, byte[] panose, - FileSystemFontProvider parent) + FileSystemFontProvider parent, String hash, long lastModified) { this.file = file; this.format = format; @@ -91,6 +97,8 @@ final class FileSystemFontProvider exten this.panose = panose != null && panose.length >= PDPanoseClassification.LENGTH ? new PDPanoseClassification(panose) : null; this.parent = parent; + this.hash = hash; + this.lastModified = lastModified; } @Override @@ -184,7 +192,7 @@ final class FileSystemFontProvider exten @Override public String toString() { - return super.toString() + " " + file; + return super.toString() + " " + file + " " + hash + " " + lastModified; } private TrueTypeFont getTrueTypeFont(String postScriptName, File file) @@ -309,15 +317,18 @@ final class FileSystemFontProvider exten } } - /** - * Represents ignored fonts (i.e. bitmap fonts). - */ - private static final class FSIgnored extends FSFontInfo + private FSFontInfo createFSIgnored(File file, FontFormat format, String postScriptName) { - private FSIgnored(File file, FontFormat format, String postScriptName) + String hash; + try { - super(file, format, postScriptName, null, 0, 0, 0, 0, 0, null, null); + hash = computeHash(readAllBytes(file)); } + catch (IOException ex) + { + hash = ""; + } + return new FSFontInfo(file, format, postScriptName, null, 0, 0, 0, 0, 0, null, null, hash, file.lastModified()); } /** @@ -437,52 +448,7 @@ final class FileSystemFontProvider exten for (FSFontInfo fontInfo : fontInfoList) { - writer.write(fontInfo.postScriptName.trim()); - writer.write("|"); - writer.write(fontInfo.format.toString()); - writer.write("|"); - if (fontInfo.cidSystemInfo != null) - { - writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' + - fontInfo.cidSystemInfo.getOrdering() + '-' + - fontInfo.cidSystemInfo.getSupplement()); - } - writer.write("|"); - if (fontInfo.usWeightClass > -1) - { - writer.write(Integer.toHexString(fontInfo.usWeightClass)); - } - writer.write("|"); - if (fontInfo.sFamilyClass > -1) - { - writer.write(Integer.toHexString(fontInfo.sFamilyClass)); - } - writer.write("|"); - writer.write(Integer.toHexString(fontInfo.ulCodePageRange1)); - writer.write("|"); - writer.write(Integer.toHexString(fontInfo.ulCodePageRange2)); - writer.write("|"); - if (fontInfo.macStyle > -1) - { - writer.write(Integer.toHexString(fontInfo.macStyle)); - } - writer.write("|"); - if (fontInfo.panose != null) - { - byte[] bytes = fontInfo.panose.getBytes(); - for (int i = 0; i < 10; i ++) - { - String str = Integer.toHexString(bytes[i]); - if (str.length() == 1) - { - writer.write('0'); - } - writer.write(str); - } - } - writer.write("|"); - writer.write(fontInfo.file.getAbsolutePath()); - writer.newLine(); + writeFontInfo(writer, fontInfo); } } catch (IOException e) @@ -497,6 +463,60 @@ final class FileSystemFontProvider exten } } + private void writeFontInfo(BufferedWriter writer, FSFontInfo fontInfo) throws IOException + { + writer.write(fontInfo.postScriptName.trim()); + writer.write("|"); + writer.write(fontInfo.format.toString()); + writer.write("|"); + if (fontInfo.cidSystemInfo != null) + { + writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' + + fontInfo.cidSystemInfo.getOrdering() + '-' + + fontInfo.cidSystemInfo.getSupplement()); + } + writer.write("|"); + if (fontInfo.usWeightClass > -1) + { + writer.write(Integer.toHexString(fontInfo.usWeightClass)); + } + writer.write("|"); + if (fontInfo.sFamilyClass > -1) + { + writer.write(Integer.toHexString(fontInfo.sFamilyClass)); + } + writer.write("|"); + writer.write(Integer.toHexString(fontInfo.ulCodePageRange1)); + writer.write("|"); + writer.write(Integer.toHexString(fontInfo.ulCodePageRange2)); + writer.write("|"); + if (fontInfo.macStyle > -1) + { + writer.write(Integer.toHexString(fontInfo.macStyle)); + } + writer.write("|"); + if (fontInfo.panose != null) + { + byte[] bytes = fontInfo.panose.getBytes(); + for (int i = 0; i < 10; i ++) + { + String str = Integer.toHexString(bytes[i]); + if (str.length() == 1) + { + writer.write('0'); + } + writer.write(str); + } + } + writer.write("|"); + writer.write(fontInfo.file.getAbsolutePath()); + writer.write("|"); + writer.write(fontInfo.hash); + writer.write("|"); + writer.write(Long.toString(fontInfo.file.lastModified())); + writer.newLine(); + } + /** * Loads the font metadata cache from disk. */ @@ -511,15 +531,16 @@ final class FileSystemFontProvider exten List<FSFontInfo> results = new ArrayList<FSFontInfo>(); // Get the disk cache - File file = null; + File diskCacheFile = null; boolean fileExists = false; try { - file = getDiskCacheFile(); - fileExists = file.exists(); + diskCacheFile = getDiskCacheFile(); + fileExists = diskCacheFile.exists(); } catch (SecurityException e) { + LOG.debug("Error checking for file existence", e); } if (fileExists) @@ -527,11 +548,11 @@ final class FileSystemFontProvider exten BufferedReader reader = null; try { - reader = new BufferedReader(new FileReader(file)); + reader = new BufferedReader(new FileReader(diskCacheFile)); String line; while ((line = reader.readLine()) != null) { - String[] parts = line.split("\\|", 10); + String[] parts = line.split("\\|", 12); if (parts.length < 10) { LOG.warn("Incorrect line '" + line + "' in font disk cache is skipped"); @@ -548,6 +569,8 @@ final class FileSystemFontProvider exten int macStyle = -1; byte[] panose = null; File fontFile; + String hash = ""; + long lastModified = 0; postScriptName = parts[0]; format = FontFormat.valueOf(parts[1]); @@ -581,12 +604,42 @@ final class FileSystemFontProvider exten } } fontFile = new File(parts[9]); + if (parts.length >= 12 && !parts[10].isEmpty() && !parts[11].isEmpty()) + { + hash = parts[10]; + lastModified = Long.parseLong(parts[11]); + } if (fontFile.exists()) { - FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, - cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, - ulCodePageRange2, macStyle, panose, this); - results.add(info); + boolean keep = false; + // if the file exists, find out whether it's the same file. + // first check whether time is different and if yes, whether hash is different + if (fontFile.lastModified() != lastModified) + { + String newHash = computeHash(readAllBytes(fontFile)); + if (newHash.equals(hash)) + { + keep = true; + lastModified = fontFile.lastModified(); + hash = newHash; + } + } + else + { + keep = true; + } + if (keep) + { + FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, + cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, + ulCodePageRange2, macStyle, panose, this, hash, lastModified); + results.add(info); + } + else + { + LOG.debug("Font file " + fontFile.getAbsolutePath() + " is different"); + continue; // don't remove from "pending" + } } else { @@ -609,7 +662,7 @@ final class FileSystemFontProvider exten if (!pending.isEmpty()) { // re-build the entire cache if we encounter un-cached fonts (could be optimised) - LOG.warn("New fonts found, font cache will be re-built"); + LOG.warn(pending.size() + " new fonts found, font cache will be re-built"); return null; } @@ -652,16 +705,19 @@ final class FileSystemFontProvider exten */ private void addTrueTypeFont(File ttfFile) throws IOException { + FontFormat fontFormat = null; try { if (ttfFile.getPath().toLowerCase().endsWith(".otf")) { + fontFormat = FontFormat.OTF; OTFParser parser = new OTFParser(false, true); OpenTypeFont otf = parser.parse(ttfFile); addTrueTypeFontImpl(otf, ttfFile); } else { + fontFormat = FontFormat.TTF; TTFParser parser = new TTFParser(false, true); TrueTypeFont ttf = parser.parse(ttfFile); addTrueTypeFontImpl(ttf, ttfFile); @@ -670,6 +726,7 @@ final class FileSystemFontProvider exten catch (IOException e) { LOG.warn("Could not load font file: " + ttfFile, e); + fontInfoList.add(createFSIgnored(ttfFile, fontFormat, "*skipexception*")); } } @@ -683,7 +740,7 @@ final class FileSystemFontProvider exten // read PostScript name, if any if (ttf.getName() != null && ttf.getName().contains("|")) { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skippipeinname*")); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skippipeinname*")); LOG.warn("Skipping font with '|' in name " + ttf.getName() + " in file " + file); } else if (ttf.getName() != null) @@ -691,7 +748,7 @@ final class FileSystemFontProvider exten // ignore bitmap fonts if (ttf.getHeader() == null) { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, ttf.getName())); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, ttf.getName())); return; } int macStyle = ttf.getHeader().getMacStyle(); @@ -712,6 +769,11 @@ final class FileSystemFontProvider exten panose = os2WindowsMetricsTable.getPanose(); } + InputStream is = ttf.getOriginalData(); + byte[] ba = IOUtils.toByteArray(is); + is.close(); + String hash = computeHash(ba); + String format; if (ttf instanceof OpenTypeFont && ((OpenTypeFont) ttf).isPostScript()) { @@ -732,7 +794,7 @@ final class FileSystemFontProvider exten } fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, ttf.getName(), ros, usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this)); + macStyle, panose, this, hash, file.lastModified())); } else { @@ -752,7 +814,7 @@ final class FileSystemFontProvider exten format = "TTF"; fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, ttf.getName(), ros, usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this)); + macStyle, panose, this, hash, file.lastModified())); } if (LOG.isTraceEnabled()) @@ -768,13 +830,13 @@ final class FileSystemFontProvider exten } else { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipnoname*")); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skipnoname*")); LOG.warn("Missing 'name' entry for PostScript name in font " + file); } } catch (IOException e) { - fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipexception*")); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skipexception*")); LOG.warn("Could not load font file: " + file, e); } finally @@ -794,18 +856,19 @@ final class FileSystemFontProvider exten Type1Font type1 = Type1Font.createWithPFB(input); if (type1.getName() == null) { - fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, "*skipnoname*")); + fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, "*skipnoname*")); LOG.warn("Missing 'name' entry for PostScript name in font " + pfbFile); return; } if (type1.getName().contains("|")) { - fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, "*skippipeinname*")); + fontInfoList.add(createFSIgnored(pfbFile, FontFormat.PFB, "*skippipeinname*")); LOG.warn("Skipping font with '|' in name " + type1.getName() + " in file " + pfbFile); return; } + String hash = computeHash(readAllBytes(pfbFile)); fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, type1.getName(), - null, -1, -1, 0, 0, -1, null, this)); + null, -1, -1, 0, 0, -1, null, this, hash, pfbFile.lastModified())); if (LOG.isTraceEnabled()) { @@ -844,4 +907,34 @@ final class FileSystemFontProvider exten { return fontInfoList; } + + private static byte[] readAllBytes(File file) throws IOException + { + InputStream is = null; + try + { + is = new FileInputStream(file); + return IOUtils.toByteArray(is); + } + finally + { + IOUtils.closeQuietly(is); + } + } + + private static String computeHash(byte[] ba) + { + MessageDigest md; + try + { + md = MessageDigest.getInstance("SHA512"); + byte[] md5 = md.digest(ba); + return Hex.getString(md5); + } + catch (NoSuchAlgorithmException ex) + { + // never happens + return ""; + } + } }