utils/HtmlOutputDev.cc | 50 ++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-)
New commits: commit ae13fd1f561125be152f3249ca87c8259b22ca6a Author: Brian Rosenfield <[email protected]> Date: Tue May 31 21:21:16 2022 +0000 Fix type 3 font size initialization in pdftohtml using font bounding box diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index 0fb83ba7..e0b25e09 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -177,7 +177,7 @@ HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) : yMax = y - descent * fontSize; GfxRGB rgb; state->getFillRGB(&rgb); - HtmlFont hfont = HtmlFont(*font, static_cast<int>(fontSize), rgb, state->getFillOpacity()); + HtmlFont hfont = HtmlFont(*font, std::lround(fontSize), rgb, state->getFillOpacity()); if (isMatRotOrSkew(state->getTextMat())) { double normalizedMatrix[4]; memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix)); @@ -297,33 +297,41 @@ void HtmlPage::updateFont(GfxState *state) { const char *name; int code; - double w; + double dimLength; // adjust the font size fontSize = state->getTransformedFontSize(); const GfxFont *const font = state->getFont().get(); if (font && font->getType() == fontType3) { - // This is a hack which makes it possible to deal with some Type 3 - // fonts. The problem is that it's impossible to know what the - // base coordinate system used in the font is without actually - // rendering the font. This code tries to guess by looking at the - // width of the character 'm' (which breaks if the font is a - // subset that doesn't contain 'm'). - for (code = 0; code < 256; ++code) { - if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') { - break; + // Grab the font size from the font bounding box if possible - remember to + // scale from the glyph coordinate system. + const double *fontBBox = font->getFontBBox(); + const double *fontMat = font->getFontMatrix(); + dimLength = (fontBBox[3] - fontBBox[1]) * fontMat[3]; + if (dimLength > 0) { + fontSize *= dimLength; + } else { + // This is a hack which makes it possible to deal with some Type 3 + // fonts. The problem is that it's impossible to know what the + // base coordinate system used in the font is without actually + // rendering the font. This code tries to guess by looking at the + // width of the character 'm' (which breaks if the font is a + // subset that doesn't contain 'm'). + for (code = 0; code < 256; ++code) { + if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') { + break; + } } - } - if (code < 256) { - w = ((Gfx8BitFont *)font)->getWidth(code); - if (w != 0) { - // 600 is a generic average 'm' width -- yes, this is a hack - fontSize *= w / 0.6; + if (code < 256) { + dimLength = ((Gfx8BitFont *)font)->getWidth(code); + if (dimLength != 0) { + // 600 is a generic average 'm' width -- yes, this is a hack + fontSize *= dimLength / 0.6; + } + } + if (fontMat[0] != 0) { + fontSize *= fabs(fontMat[3] / fontMat[0]); } - } - const double *fm = font->getFontMatrix(); - if (fm[0] != 0) { - fontSize *= fabs(fm[3] / fm[0]); } } }
