Author: tilman
Date: Tue Nov 25 08:32:09 2025
New Revision: 1929960

Log:
PDFBOX-6103: add DFLT script support in GSUB system for OpenType fonts, by 
Fabrice Calafat

Added:
   
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java
   (contents, props changed)
   
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java
   (contents, props changed)
   pdfbox/branches/3.0/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf   
(contents, props changed)
Modified:
   pdfbox/branches/3.0/fontbox/src/main/appended-resources/META-INF/LICENSE
   
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
   
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java

Modified: 
pdfbox/branches/3.0/fontbox/src/main/appended-resources/META-INF/LICENSE
==============================================================================
--- pdfbox/branches/3.0/fontbox/src/main/appended-resources/META-INF/LICENSE    
Tue Nov 25 08:31:59 2025        (r1929959)
+++ pdfbox/branches/3.0/fontbox/src/main/appended-resources/META-INF/LICENSE    
Tue Nov 25 08:32:09 2025        (r1929960)
@@ -31,7 +31,7 @@ Apache FontBox is based on contributions
     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     SUCH DAMAGE.
 
-Lohit-Bengali, Lohit-Devanagari, Lohit-Gujarati fonts 
(https://pagure.io/lohit):
+Lohit fonts (https://pagure.io/lohit):
 
     Copyright 2011-15 Lohit Fonts Project contributors
 
@@ -132,3 +132,10 @@ FoglihtenNo07 font Copyright 2011-2024 G
 https://www.glukfonts.pl/font.php?l=de&font=FoglihtenNo07
 
 SIL Open Font License, see above
+
+Josefin Sans fonts (https://fonts.google.com/specimen/Josefin+Sans)
+
+Copyright 2010 The Josefin Sans Project Authors 
(https://github.com/ThomasJockin/JosefinSansFont-master),
+with Reserved Font Name "Josefin Sans".
+
+SIL Open Font License, see above

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
        Tue Nov 25 08:31:59 2025        (r1929959)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
        Tue Nov 25 08:32:09 2025        (r1929960)
@@ -51,6 +51,8 @@ public class GsubWorkerFactory
             return new GsubWorkerForGujarati(cmapLookup, gsubData);
         case LATIN:
             return new GsubWorkerForLatin(gsubData);
+        case DFLT:
+            return new GsubWorkerForDflt(gsubData);
         default:
             return new DefaultGsubWorker();
         }

Added: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java
        Tue Nov 25 08:32:09 2025        (r1929960)
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fontbox.ttf.gsub;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.fontbox.ttf.model.GsubData;
+import org.apache.fontbox.ttf.model.ScriptFeature;
+
+
+/**
+ * DFLT (Default) script-specific implementation of GSUB system.
+ *
+ * <p>According to the OpenType specification, a Script table with the script 
tag 'DFLT' (default)
+ * is used in fonts to define features that are not script-specific. 
Applications should use the
+ * DFLT script table when no script table exists for the specific script of 
the text being
+ * processed, or when text lacks a defined script (containing only symbols or 
punctuation).</p>
+ *
+ * <p>This implementation applies common, script-neutral typographic features 
that work across
+ * writing systems. The feature order follows standard OpenType 
recommendations for universal
+ * glyph substitutions.</p>
+ *
+ * <p>Reference:
+ * <a 
href="https://learn.microsoft.com/en-us/typography/opentype/spec/chapter2#scriptlist-table";>
+ * OpenType ScriptList Table Specification</a></p>
+ */
+public class GsubWorkerForDflt implements GsubWorker
+{
+    private static final Log LOG = LogFactory.getLog(GsubWorkerForDflt.class);
+
+    /**
+     * Script-neutral features in recommended processing order.
+     *
+     * <ul>
+     * <li>ccmp - Glyph Composition/Decomposition (must be first)</li>
+     * <li>liga - Standard Ligatures</li>
+     * <li>clig - Contextual Ligatures</li>
+     * <li>calt - Contextual Alternates</li>
+     * </ul>
+     *
+     * Note: This feature list focuses on common GSUB (substitution) features.
+     * GPOS features like 'kern', 'mark', 'mkmk' are handled separately.
+     */
+    private static final List<String> FEATURES_IN_ORDER = 
Arrays.asList("ccmp", "liga", "clig", "calt");
+
+    private final GsubData gsubData;
+
+    GsubWorkerForDflt(GsubData gsubData)
+    {
+        this.gsubData = gsubData;
+    }
+
+    @Override
+    public List<Integer> applyTransforms(List<Integer> originalGlyphIds)
+    {
+        List<Integer> intermediateGlyphsFromGsub = originalGlyphIds;
+
+        for (String feature : FEATURES_IN_ORDER)
+        {
+            if (!gsubData.isFeatureSupported(feature))
+            {
+                LOG.debug("the feature " + feature + " was not found");
+                continue;
+            }
+
+            LOG.debug("applying the feature " + feature);
+
+            ScriptFeature scriptFeature = gsubData.getFeature(feature);
+
+            intermediateGlyphsFromGsub = applyGsubFeature(scriptFeature,
+                    intermediateGlyphsFromGsub);
+        }
+
+        return Collections.unmodifiableList(intermediateGlyphsFromGsub);
+    }
+
+    private List<Integer> applyGsubFeature(ScriptFeature scriptFeature,
+            List<Integer> originalGlyphs)
+    {
+        if (scriptFeature.getAllGlyphIdsForSubstitution().isEmpty())
+        {
+            LOG.debug("getAllGlyphIdsForSubstitution() for " + 
scriptFeature.getName() + " is empty");
+            return originalGlyphs;
+        }
+
+        GlyphArraySplitter glyphArraySplitter = new 
GlyphArraySplitterRegexImpl(
+                scriptFeature.getAllGlyphIdsForSubstitution());
+
+        List<List<Integer>> tokens = glyphArraySplitter.split(originalGlyphs);
+        List<Integer> gsubProcessedGlyphs = new ArrayList<>();
+
+        for (List<Integer> chunk : tokens)
+        {
+            if (scriptFeature.canReplaceGlyphs(chunk))
+            {
+                // gsub system kicks in, you get the glyphId directly
+                Integer replacementForGlyphs = 
scriptFeature.getReplacementForGlyphs(chunk);
+                gsubProcessedGlyphs.add(replacementForGlyphs);
+            }
+            else
+            {
+                gsubProcessedGlyphs.addAll(chunk);
+            }
+        }
+
+        LOG.debug("originalGlyphs: " + originalGlyphs + ", 
gsubProcessedGlyphs: "
+                + gsubProcessedGlyphs);
+
+        return gsubProcessedGlyphs;
+    }
+}

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
        Tue Nov 25 08:31:59 2025        (r1929959)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
        Tue Nov 25 08:32:09 2025        (r1929960)
@@ -36,6 +36,7 @@ public enum Language
     DEVANAGARI(new String[] { "dev2", "deva" }),
     GUJARATI(new String[] { "gjr2", "gujr" }),
     LATIN(new String[] { "latn" }),
+    DFLT(new String[] { "DFLT" }),
 
     /**
      * An entry explicitly denoting the absence of any concrete language. May 
be useful when no actual glyph

Added: 
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ 
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java
    Tue Nov 25 08:32:09 2025        (r1929960)
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fontbox.ttf.gsub;
+
+import org.apache.fontbox.ttf.CmapLookup;
+import org.apache.fontbox.ttf.TTFParser;
+import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * Integration test for {@link GsubWorkerForDflt}. Tests DFLT (default) script 
GSUB worker.
+ *
+ * <p>The DFLT script is used for script-neutral typographic features that 
work across
+ * writing systems, particularly when text lacks a specific script (symbols, 
punctuation)
+ * or when no script-specific table exists.</p>
+ *
+ * <p>JosefinSans-Italic.ttf (SIL Open Font License) uses DFLT script and has 
standard ligatures
+ * (fi, fl) which are used for testing GSUB transformations. Words without 
ligature sequences
+ * (like "font" or "code") pass through unchanged, while words containing "fi" 
or "fl" are
+ * transformed to use ligature glyphs.</p>
+ *
+ */
+class GsubWorkerForDfltTest
+{
+    private static final String JOSEFIN_SANS_TTF = 
"src/test/resources/ttf/JosefinSans-Italic.ttf";
+
+    private static CmapLookup cmapLookup;
+    private static GsubWorker gsubWorkerForDflt;
+
+    @BeforeAll
+    static void init() throws IOException
+    {
+        try (TrueTypeFont ttf = new TTFParser().parse(new 
RandomAccessReadBufferedFile(JOSEFIN_SANS_TTF)))
+        {
+            cmapLookup = ttf.getUnicodeCmapLookup();
+            gsubWorkerForDflt = new 
GsubWorkerFactory().getGsubWorker(cmapLookup, ttf.getGsubData());
+        }
+    }
+
+    @Test
+    void testCorrectWorkerType()
+    {
+        assertInstanceOf(GsubWorkerForDflt.class, gsubWorkerForDflt);
+    }
+
+    static Stream<Arguments> provideTransformTestCases()
+    {
+        return Stream.of(
+                // No ligature - text passes through unchanged
+                Arguments.of("code", Arrays.asList(229, 293, 235, 237), "no 
ligature sequences"),
+                // Simple ligature
+                Arguments.of("fi", Collections.singletonList(407), "fi -> 
ligature"),
+                // Ligature within word
+                Arguments.of("office", Arrays.asList(293, 257, 407, 229, 237), 
"ffi -> f + fi-ligature"),
+                // Multi-f sequence
+                Arguments.of("ffl", Arrays.asList(257, 408), "ffl -> f + 
fl-ligature")
+        );
+    }
+
+    @ParameterizedTest(name = "{0}: {2}")
+    @MethodSource("provideTransformTestCases")
+    void testApplyTransforms(String input, List<Integer> expectedGlyphs, 
String description)
+    {
+        List<Integer> result = 
gsubWorkerForDflt.applyTransforms(getGlyphIds(input));
+        assertEquals(expectedGlyphs, result);
+    }
+
+    @Test
+    void testApplyTransforms_immutableResult()
+    {
+        List<Integer> result = 
gsubWorkerForDflt.applyTransforms(getGlyphIds("abc"));
+
+        assertThrows(UnsupportedOperationException.class, () -> 
result.add(999));
+        assertThrows(UnsupportedOperationException.class, () -> 
result.remove(0));
+    }
+
+    private static List<Integer> getGlyphIds(String word)
+    {
+        List<Integer> originalGlyphIds = new ArrayList<>();
+
+        for (char unicodeChar : word.toCharArray())
+        {
+            int glyphId = cmapLookup.getGlyphId(unicodeChar);
+            assertTrue(glyphId > 0);
+            originalGlyphIds.add(glyphId);
+        }
+
+        return originalGlyphIds;
+    }
+}
\ No newline at end of file

Added: pdfbox/branches/3.0/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf
==============================================================================
Binary file. No diff available.

Reply via email to