configure.ac                         |    2 
 vcl/inc/sft.hxx                      |    6 
 vcl/source/font/PhysicalFontFace.cxx |  220 ++++++++++++++++++++++++++---------
 vcl/source/fontsubset/sft.cxx        |    2 
 vcl/source/pdf/pdfwriter_impl.cxx    |   33 +++--
 5 files changed, 198 insertions(+), 65 deletions(-)

New commits:
commit 76095f9252f8e88e995415ac94ce9920df539086
Author:     Khaled Hosny <[email protected]>
AuthorDate: Tue Feb 17 21:32:47 2026 +0200
Commit:     Khaled Hosny <[email protected]>
CommitDate: Fri Feb 20 22:28:55 2026 +0100

    Use HarfBuzz subsetter to subset fonts for PDF
    
    This gives us a more up-to-date subsetter and would allow dropping most
    of our low-level, CVE-happy, font subsetting code.
    
    Update required HarfBuzz version to the version that provides all new
    APIs we are now using.
    
    Change-Id: I774ddf4f7af448a3cf17b64f89e8b1f36097775a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199572
    Reviewed-by: Khaled Hosny <[email protected]>
    Tested-by: Jenkins

diff --git a/configure.ac b/configure.ac
index d18caa997ba8..9d02bbce9900 100644
--- a/configure.ac
+++ b/configure.ac
@@ -11679,7 +11679,7 @@ AC_SUBST(SYSTEM_LIBORCUS)
 dnl ===================================================================
 dnl HarfBuzz
 dnl ===================================================================
-harfbuzz_required_version=5.1.0
+harfbuzz_required_version=7.3.0
 
 GRAPHITE_CFLAGS_internal="-I${WORKDIR}/UnpackedTarball/graphite/include 
-DGRAPHITE2_STATIC"
 HARFBUZZ_CFLAGS_internal="-I${WORKDIR}/UnpackedTarball/harfbuzz/src"
diff --git a/vcl/inc/sft.hxx b/vcl/inc/sft.hxx
index 324d9994e0ff..a196ff2358f0 100644
--- a/vcl/inc/sft.hxx
+++ b/vcl/inc/sft.hxx
@@ -582,6 +582,12 @@ class TrueTypeFont;
                               const sal_uInt8* pEncoding,
                               int nGlyphCount, FontSubsetInfo& rInfo);
 
+    bool CreateCFFfontSubset(const unsigned char* pFontBytes, int nByteLength,
+                              std::vector<sal_uInt8>& rOutBuffer,
+                              const sal_GlyphId* pGlyphIds,
+                              const sal_uInt8* pEncoding,
+                              int nGlyphCount, FontSubsetInfo& rInfo);
+
 /**
  * Returns global font information about the TrueType font.
  * @see TTGlobalFontInfo
diff --git a/vcl/source/font/PhysicalFontFace.cxx 
b/vcl/source/font/PhysicalFontFace.cxx
index 5a666b01322f..35205a1eab81 100644
--- a/vcl/source/font/PhysicalFontFace.cxx
+++ b/vcl/source/font/PhysicalFontFace.cxx
@@ -21,6 +21,7 @@
 
 #include <sal/types.h>
 #include <tools/fontenum.hxx>
+#include <tools/stream.hxx>
 #include <unotools/fontdefs.hxx>
 #include <osl/file.hxx>
 #include <osl/thread.h>
@@ -33,10 +34,12 @@
 #include <font/FontSelectPattern.hxx>
 #include <font/PhysicalFontFace.hxx>
 #include <o3tl/string_view.hxx>
+#include <comphelper/scopeguard.hxx>
 
 #include <string_view>
 
 #include <hb-ot.h>
+#include <hb-subset.h>
 
 namespace vcl::font
 {
@@ -294,77 +297,190 @@ bool 
PhysicalFontFace::GetFontCapabilities(vcl::FontCapabilities& rFontCapabilit
     return rFontCapabilities.oUnicodeRange || rFontCapabilities.oCodePageRange;
 }
 
-namespace
-{
-class RawFace
+// These are “private” HarfBuzz metrics tags, they are supported by not exposed
+// in the public header. They are safe to use, HarfBuzz just does not want to
+// advertise them.
+constexpr auto ASCENT_OS2 = static_cast<hb_ot_metrics_tag_t>(HB_TAG('O', 'a', 
's', 'c'));
+constexpr auto DESCENT_OS2 = static_cast<hb_ot_metrics_tag_t>(HB_TAG('O', 'd', 
's', 'c'));
+constexpr auto ASCENT_HHEA = static_cast<hb_ot_metrics_tag_t>(HB_TAG('H', 'a', 
's', 'c'));
+constexpr auto DESCENT_HHEA = static_cast<hb_ot_metrics_tag_t>(HB_TAG('H', 
'd', 's', 'c'));
+
+bool PhysicalFontFace::CreateFontSubset(std::vector<sal_uInt8>& rOutBuffer,
+                                        const sal_GlyphId* pGlyphIds, const 
sal_uInt8* pEncoding,
+                                        const int nGlyphCount, FontSubsetInfo& 
rInfo) const
 {
-public:
-    RawFace(hb_face_t* pFace)
-        : mpFace(hb_face_reference(pFace))
-    {
-    }
+    // Create subset input
+    hb_subset_input_t* pInput = hb_subset_input_create_or_fail();
+    comphelper::ScopeGuard aInputGuard([&]() { 
hb_subset_input_destroy(pInput); });
+    if (!pInput)
+        return false;
 
-    RawFace(const RawFace& rOther)
-        : mpFace(hb_face_reference(rOther.mpFace))
+    // Add the requested glyph IDs to the subset input, and set up
+    // old-to-new glyph ID mapping so that each glyph appears at the
+    // GID position matching its encoding byte.
+    hb_set_t* pGlyphSet = hb_subset_input_glyph_set(pInput);
+    hb_map_t* pGlyphMap = hb_subset_input_old_to_new_glyph_mapping(pInput);
+    for (int i = 0; i < nGlyphCount; ++i)
     {
+        hb_set_add(pGlyphSet, pGlyphIds[i]);
+        hb_map_set(pGlyphMap, pGlyphIds[i], pEncoding[i]);
     }
 
-    ~RawFace() { hb_face_destroy(mpFace); }
+    // Keep only tables needed for PDF embedding, drop everything else.
+    // By default hb-subset keeps many tables; we use the DROP_TABLE set to
+    // remove all tables we don't need.
+    static constexpr hb_tag_t aKeepTables[] = {
+        HB_TAG('h', 'e', 'a', 'd'), HB_TAG('h', 'h', 'e', 'a'), HB_TAG('h', 
'm', 't', 'x'),
+        HB_TAG('l', 'o', 'c', 'a'), HB_TAG('m', 'a', 'x', 'p'), HB_TAG('g', 
'l', 'y', 'f'),
+        HB_TAG('C', 'F', 'F', ' '), HB_TAG('p', 'o', 's', 't'), HB_TAG('n', 
'a', 'm', 'e'),
+        HB_TAG('O', 'S', '/', '2'), HB_TAG('c', 'v', 't', ' '), HB_TAG('f', 
'p', 'g', 'm'),
+        HB_TAG('p', 'r', 'e', 'p'),
+    };
+
+    hb_set_t* pDropTableSet = hb_subset_input_set(pInput, 
HB_SUBSET_SETS_DROP_TABLE_TAG);
+    // Drop all tables except the ones we need
+    hb_set_invert(pDropTableSet);
+    for (auto nKeep : aKeepTables)
+        hb_set_del(pDropTableSet, nKeep);
+
+    // Perform the subsettting
+    hb_face_t* pSubsetFace = hb_subset_or_fail(GetHbFace(), pInput);
+    comphelper::ScopeGuard aSubsetFaceGuard([&]() { 
hb_face_destroy(pSubsetFace); });
+    if (!pSubsetFace)
+        return false;
 
-    RawFontData GetTable(uint32_t nTag) const
+    // Fill FontSubsetInfo
+    rInfo.m_aPSName = GetName(NAME_ID_POSTSCRIPT_NAME);
+
+    auto nUPEM = UnitsPerEm();
+
+    hb_font_t* pSubsetFont = hb_font_create(pSubsetFace);
+    comphelper::ScopeGuard aSubsetFontGuard([&]() { 
hb_font_destroy(pSubsetFont); });
+    hb_position_t nAscent, nDescent, nCapHeight;
+    // Try hhea first, then OS/2 similar to old FillFontSubsetInfo()
+    if (hb_ot_metrics_get_position(pSubsetFont, ASCENT_HHEA, &nAscent)
+        || hb_ot_metrics_get_position(pSubsetFont, ASCENT_OS2, &nAscent))
+        rInfo.m_nAscent = XUnits(nUPEM, nAscent);
+    if (hb_ot_metrics_get_position(pSubsetFont, DESCENT_HHEA, &nDescent)
+        || hb_ot_metrics_get_position(pSubsetFont, DESCENT_OS2, &nDescent))
+        rInfo.m_nDescent = XUnits(nUPEM, -nDescent);
+    if (hb_ot_metrics_get_position(pSubsetFont, HB_OT_METRICS_TAG_CAP_HEIGHT, 
&nCapHeight))
+        rInfo.m_nCapHeight = XUnits(nUPEM, nCapHeight);
+
+    hb_blob_t* pHeadBlob = hb_face_reference_table(pSubsetFace, HB_TAG('h', 
'e', 'a', 'd'));
+    comphelper::ScopeGuard aHeadBlobGuard([&]() { hb_blob_destroy(pHeadBlob); 
});
+
+    unsigned int nHeadLen;
+    const char* pHead = hb_blob_get_data(pHeadBlob, &nHeadLen);
+    SvMemoryStream aStream(const_cast<char*>(pHead), nHeadLen, 
StreamMode::READ);
+    // Font data are big endian.
+    aStream.SetEndian(SvStreamEndian::BIG);
+    if (aStream.Seek(vcl::HEAD_yMax_offset) == vcl::HEAD_yMax_offset)
     {
-        return RawFontData(hb_face_reference_table(mpFace, nTag));
+        sal_Int16 xMin, yMin, xMax, yMax;
+        aStream.Seek(vcl::HEAD_xMin_offset);
+        aStream.ReadInt16(xMin);
+        aStream.ReadInt16(yMin);
+        aStream.ReadInt16(xMax);
+        aStream.ReadInt16(yMax);
+        rInfo.m_aFontBBox = tools::Rectangle(Point(XUnits(nUPEM, xMin), 
XUnits(nUPEM, yMin)),
+                                             Point(XUnits(nUPEM, xMax), 
XUnits(nUPEM, yMax)));
     }
 
-private:
-    hb_face_t* mpFace;
-};
+    rInfo.m_bFilled = true;
 
-class TrueTypeFace final : public AbstractTrueTypeFont
-{
-    const RawFace m_aFace;
-    mutable std::array<RawFontData, NUM_TAGS> m_aTableList;
+    hb_blob_t* pSubsetBlob = nullptr;
+    comphelper::ScopeGuard aBuilderBlobGuard([&]() { 
hb_blob_destroy(pSubsetBlob); });
 
-    const RawFontData& table(sal_uInt32 nIdx) const
+    // HarfBuzz creates a Unicode cmap, but we need a fake cmap based on 
pEncoding,
+    // so we use face builder construct a new face based in the subset table,
+    // and create a new cmap table and add it to the new face.
     {
-        assert(nIdx < NUM_TAGS);
-        static const uint32_t aTags[NUM_TAGS] = {
-            T_maxp, T_glyf, T_head, T_loca, T_name, T_hhea, T_hmtx, T_cmap,
-            T_vhea, T_vmtx, T_OS2,  T_post, T_cvt,  T_prep, T_fpgm, T_CFF,
-        };
-        if (m_aTableList[nIdx].empty())
-            m_aTableList[nIdx] = std::move(m_aFace.GetTable(aTags[nIdx]));
-        return m_aTableList[nIdx];
-    }
+        hb_face_t* pBuilderFace = hb_face_builder_create();
+        comphelper::ScopeGuard aBuilderFaceGuard([&]() { 
hb_face_destroy(pBuilderFace); });
+        unsigned int nSubsetTableCount = hb_face_get_table_tags(pSubsetFace, 
0, nullptr, nullptr);
+        std::vector<hb_tag_t> aSubsetTableTags(nSubsetTableCount);
+        hb_face_get_table_tags(pSubsetFace, 0, &nSubsetTableCount, 
aSubsetTableTags.data());
+        for (unsigned int i = 0; i < nSubsetTableCount; ++i)
+        {
+            hb_blob_t* pTableBlob = hb_face_reference_table(pSubsetFace, 
aSubsetTableTags[i]);
+            hb_face_builder_add_table(pBuilderFace, aSubsetTableTags[i], 
pTableBlob);
+            hb_blob_destroy(pTableBlob);
+        }
 
-public:
-    TrueTypeFace(RawFace aFace, const FontCharMapRef rCharMap)
-        : AbstractTrueTypeFont(nullptr, rCharMap)
-        , m_aFace(std::move(aFace))
-    {
+        // Build a cmap table with a format 0 subtable
+        SvMemoryStream aCmapStream;
+        aCmapStream.SetEndian(SvStreamEndian::BIG);
+
+        // cmap header
+        aCmapStream.WriteUInt16(0); // version
+        aCmapStream.WriteUInt16(1); // numTables
+
+        // Encoding record
+        aCmapStream.WriteUInt16(1); // platformID (Mac: 1)
+        aCmapStream.WriteUInt16(0); // encodingID (Roman: 0)
+        aCmapStream.WriteUInt32(12); // subtable offset
+
+        // Format 0 subtable
+        aCmapStream.WriteUInt16(0); // format
+        aCmapStream.WriteUInt16(262); // length
+        aCmapStream.WriteUInt16(0); // language
+
+        // glyphIdArray
+        for (int i = 0; i < 256; ++i)
+        {
+            if (i < nGlyphCount)
+                aCmapStream.WriteUInt8(pEncoding[i]);
+            else
+                aCmapStream.WriteUInt8(0);
+        }
+
+        hb_blob_t* pCmapBlob
+            = hb_blob_create(static_cast<const char*>(aCmapStream.GetData()), 
aCmapStream.Tell(),
+                             HB_MEMORY_MODE_DUPLICATE, nullptr, nullptr);
+        hb_face_builder_add_table(pBuilderFace, HB_TAG('c', 'm', 'a', 'p'), 
pCmapBlob);
+        hb_blob_destroy(pCmapBlob);
+
+        pSubsetBlob = hb_face_reference_blob(pBuilderFace);
     }
 
-    bool hasTable(sal_uInt32 nIdx) const override { return 
!table(nIdx).empty(); }
-    const sal_uInt8* table(sal_uInt32 nIdx, sal_uInt32& nSize) const override
+    hb_blob_t* pCFFBlob = hb_face_reference_table(pSubsetFace, HB_TAG('C', 
'F', 'F', ' '));
+    comphelper::ScopeGuard aCFFBlobGuard([&]() { hb_blob_destroy(pCFFBlob); });
+    if (pCFFBlob != hb_blob_get_empty())
     {
-        auto& rTable = table(nIdx);
-        nSize = rTable.size();
-        return rTable.data();
+        // Ideally we should be outputting a CFF (Type1C) font here, but I 
couldn’t get it to work.
+        // So we oconvert it to Type1 font instead.
+        // TODO: simplify CreateCFFfontSubset() to only do the conversion, 
since we already
+        // have the subsetted font.
+        rInfo.m_nFontType = FontType::TYPE1_PFB;
+
+        unsigned int nCffLen;
+        const unsigned char* pCffData
+            = reinterpret_cast<const unsigned 
char*>(hb_blob_get_data(pCFFBlob, &nCffLen));
+
+        std::vector<sal_GlyphId> aNewGlyphIds(nGlyphCount);
+        for (int i = 0; i < nGlyphCount; ++i)
+            aNewGlyphIds[i] = i;
+
+        if (!CreateCFFfontSubset(pCffData, nCffLen, rOutBuffer, 
aNewGlyphIds.data(), pEncoding,
+                                 nGlyphCount, rInfo))
+            return false;
     }
-};
-}
+    else
+    {
+        rInfo.m_nFontType = FontType::SFNT_TTF;
 
-bool PhysicalFontFace::CreateFontSubset(std::vector<sal_uInt8>& rOutBuffer,
-                                        const sal_GlyphId* pGlyphIds, const 
sal_uInt8* pEncoding,
-                                        const int nGlyphCount, FontSubsetInfo& 
rInfo) const
-{
-    // Prepare data for font subsetter.
-    TrueTypeFace aSftFont(RawFace(GetHbFace()), GetFontCharMap());
-    if (aSftFont.initialize() != SFErrCodes::Ok)
-        return false;
+        unsigned int nSubsetLength;
+        const char* pSubsetData = nullptr;
+        pSubsetData = hb_blob_get_data(pSubsetBlob, &nSubsetLength);
+        if (!pSubsetData || !nSubsetLength)
+            return false;
+
+        rOutBuffer.assign(reinterpret_cast<const sal_uInt8*>(pSubsetData),
+                          reinterpret_cast<const sal_uInt8*>(pSubsetData) + 
nSubsetLength);
+    }
 
-    // write subset into destination file
-    return CreateTTFfontSubset(aSftFont, rOutBuffer, pGlyphIds, pEncoding, 
nGlyphCount, rInfo);
+    return true;
 }
 
 bool PhysicalFontFace::HasColorLayers() const
diff --git a/vcl/source/fontsubset/sft.cxx b/vcl/source/fontsubset/sft.cxx
index 5e7da45adb64..dc03041dafcf 100644
--- a/vcl/source/fontsubset/sft.cxx
+++ b/vcl/source/fontsubset/sft.cxx
@@ -1436,6 +1436,7 @@ void FillFontSubsetInfo(const AbstractTrueTypeFont* ttf, 
FontSubsetInfo& rInfo)
 
     rInfo.m_bFilled = true;
 }
+}
 
 bool CreateCFFfontSubset(const unsigned char* pFontBytes, int nByteLength,
                          std::vector<sal_uInt8>& rOutBuffer, const 
sal_GlyphId* pGlyphIds,
@@ -1462,7 +1463,6 @@ bool CreateCFFfontSubset(const unsigned char* pFontBytes, 
int nByteLength,
 
     return bRet;
 }
-}
 
 bool CreateTTFfontSubset(vcl::AbstractTrueTypeFont& rTTF, 
std::vector<sal_uInt8>& rOutBuffer,
                          const sal_GlyphId* pGlyphIds, const sal_uInt8* 
pEncoding,
diff --git a/vcl/source/pdf/pdfwriter_impl.cxx 
b/vcl/source/pdf/pdfwriter_impl.cxx
index ec91e73bc07f..0305bf5c1403 100644
--- a/vcl/source/pdf/pdfwriter_impl.cxx
+++ b/vcl/source/pdf/pdfwriter_impl.cxx
@@ -2030,6 +2030,9 @@ sal_Int32 PDFWriterImpl::emitFontDescriptor( const 
vcl::font::PhysicalFontFace*
             case FontType::SFNT_TTF:
                 aLine.append( '2' );
                 break;
+            case FontType::CFF_FONT:
+                aLine.append( "3" );
+                break;
             case FontType::TYPE1_PFA:
             case FontType::TYPE1_PFB:
             case FontType::ANY_TYPE1:
@@ -2100,18 +2103,17 @@ bool PDFWriterImpl::emitFonts()
                     + OString::number( nStreamLengthObject ) );
                 if (!g_bDebugDisableCompression)
                     aLine.append( " 0 R"
-                                 "/Filter/FlateDecode"
-                                 "/Length1 " );
+                                 "/Filter/FlateDecode");
                 else
-                    aLine.append( " 0 R"
-                                 "/Length1 " );
+                    aLine.append( " 0 R");
 
                 sal_uInt64 nStartPos = 0;
                 if( aSubsetInfo.m_nFontType == FontType::SFNT_TTF )
                 {
-                    aLine.append( OString::number(aBuffer.size())
-                               + ">>
"
-                                 "stream
" );
+                    aLine.append("/Length1 "
+                        + OString::number(aBuffer.size())
+                        + ">>
"
+                          "stream
" );
                     if ( !writeBuffer( aLine ) ) return false;
                     if ( osl::File::E_None != m_aFile.getPos(nStartPos) ) 
return false;
 
@@ -2123,8 +2125,16 @@ bool PDFWriterImpl::emitFonts()
                 }
                 else if( aSubsetInfo.m_nFontType & FontType::CFF_FONT)
                 {
-                    // TODO: implement
-                    OSL_FAIL( "PDFWriterImpl does not support CFF-font subsets 
yet!" );
+                    // CFF subset is embedded as an SFNT font (same as 
TrueType)
+                    aLine.append("/Subtype/Type1C>>
stream
");
+                    if ( !writeBuffer( aLine ) ) return false;
+                    if ( osl::File::E_None != m_aFile.getPos(nStartPos) ) 
return false;
+
+                    // copy font file
+                    beginCompression();
+                    checkAndEnableStreamEncryption( nFontStream );
+                    if (!writeBufferBytes(aBuffer.data(), aBuffer.size()))
+                        return false;
                 }
                 else if( aSubsetInfo.m_nFontType & FontType::TYPE1_PFB) // 
TODO: also support PFA?
                 {
@@ -2133,7 +2143,8 @@ bool PDFWriterImpl::emitFonts()
                     getPfbSegmentLengths(aBuffer.data(), aBuffer.size(), 
aSegmentLengths);
                     // the lengths below are mandatory for PDF-exported Type1 
fonts
                     // because the PFB segment headers get stripped! WhyOhWhy.
-                    aLine.append( OString::number(aSegmentLengths[0] )
+                    aLine.append("/Length1 "
+                        + OString::number(aSegmentLengths[0] )
                         + "/Length2 "
                         + OString::number( aSegmentLengths[1] )
                         + "/Length3 "
@@ -2185,7 +2196,7 @@ bool PDFWriterImpl::emitFonts()
                 if ( !updateObject( nFontObject ) ) return false;
                 aLine.setLength( 0 );
                 aLine.append( OString::number(nFontObject) + " 0 obj
" );
-                aLine.append( (aSubsetInfo.m_nFontType & FontType::ANY_TYPE1) ?
+                aLine.append( (aSubsetInfo.m_nFontType & (FontType::ANY_TYPE1 
| FontType::CFF_FONT)) ?
                              "<</Type/Font/Subtype/Type1/BaseFont/" :
                              "<</Type/Font/Subtype/TrueType/BaseFont/" );
                 appendSubsetName( s_subset.m_nFontID, aSubsetInfo.m_aPSName, 
aLine );

Reply via email to