Author: tilman
Date: Tue Nov 25 08:31:59 2025
New Revision: 1929959
Log:
PDFBOX-6103: add DFLT script support in GSUB system for OpenType fonts, by
Fabrice Calafat
Added:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java
(contents, props changed)
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java
(contents, props changed)
pdfbox/trunk/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf
(contents, props changed)
Modified:
pdfbox/trunk/fontbox/src/main/appended-resources/META-INF/LICENSE
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
Modified: pdfbox/trunk/fontbox/src/main/appended-resources/META-INF/LICENSE
==============================================================================
--- pdfbox/trunk/fontbox/src/main/appended-resources/META-INF/LICENSE Tue Nov
25 08:30:14 2025 (r1929958)
+++ pdfbox/trunk/fontbox/src/main/appended-resources/META-INF/LICENSE Tue Nov
25 08:31:59 2025 (r1929959)
@@ -31,7 +31,7 @@ Apache FontBox is based on contributions
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
-Lohit-Bengali, Lohit-Devanagari, Lohit-Gujarati fonts
(https://pagure.io/lohit):
+Lohit fonts (https://pagure.io/lohit):
Copyright 2011-15 Lohit Fonts Project contributors
@@ -130,4 +130,11 @@ Lohit-Bengali, Lohit-Devanagari, Lohit-G
FoglihtenNo07 font Copyright 2011-2024 Grzegorz Luk
https://www.glukfonts.pl/font.php?l=de&font=FoglihtenNo07
-SIL Open Font License, see above
\ No newline at end of file
+SIL Open Font License, see above
+
+Josefin Sans fonts (https://fonts.google.com/specimen/Josefin+Sans)
+
+Copyright 2010 The Josefin Sans Project Authors
(https://github.com/ThomasJockin/JosefinSansFont-master),
+with Reserved Font Name "Josefin Sans".
+
+SIL Open Font License, see above
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
==============================================================================
---
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
Tue Nov 25 08:30:14 2025 (r1929958)
+++
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
Tue Nov 25 08:31:59 2025 (r1929959)
@@ -38,6 +38,9 @@ public class GsubWorkerFactory
{
//TODO this needs to be redesigned / improved because if a font
supports several languages,
// it will choose one of them and maybe not the one expected.
+ // See also PDFBOX-5700 and PDFBOX-5729
+ // For example, NotoSans-Regular hits Devanagari first
+ // See also GlyphSubstitutionDataExtractor.getSupportedLanguage()
which decides the language?!
LOG.debug("Language: {}", gsubData.getLanguage());
switch (gsubData.getLanguage())
{
@@ -49,6 +52,8 @@ public class GsubWorkerFactory
return new GsubWorkerForGujarati(cmapLookup, gsubData);
case LATIN:
return new GsubWorkerForLatin(gsubData);
+ case DFLT:
+ return new GsubWorkerForDflt(gsubData);
default:
return new DefaultGsubWorker();
}
Added:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java
Tue Nov 25 08:31:59 2025 (r1929959)
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fontbox.ttf.gsub;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.fontbox.ttf.model.GsubData;
+import org.apache.fontbox.ttf.model.ScriptFeature;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+/**
+ * DFLT (Default) script-specific implementation of GSUB system.
+ *
+ * <p>According to the OpenType specification, a Script table with the script
tag 'DFLT' (default)
+ * is used in fonts to define features that are not script-specific.
Applications should use the
+ * DFLT script table when no script table exists for the specific script of
the text being
+ * processed, or when text lacks a defined script (containing only symbols or
punctuation).</p>
+ *
+ * <p>This implementation applies common, script-neutral typographic features
that work across
+ * writing systems. The feature order follows standard OpenType
recommendations for universal
+ * glyph substitutions.</p>
+ *
+ * <p>Reference:
+ * <a
href="https://learn.microsoft.com/en-us/typography/opentype/spec/chapter2#scriptlist-table">
+ * OpenType ScriptList Table Specification</a></p>
+ */
+public class GsubWorkerForDflt implements GsubWorker
+{
+ private static final Logger LOG =
LogManager.getLogger(GsubWorkerForDflt.class);
+
+ /**
+ * Script-neutral features in recommended processing order.
+ *
+ * <ul>
+ * <li>ccmp - Glyph Composition/Decomposition (must be first)</li>
+ * <li>liga - Standard Ligatures</li>
+ * <li>clig - Contextual Ligatures</li>
+ * <li>calt - Contextual Alternates</li>
+ * </ul>
+ *
+ * Note: This feature list focuses on common GSUB (substitution) features.
+ * GPOS features like 'kern', 'mark', 'mkmk' are handled separately.
+ */
+ private static final List<String> FEATURES_IN_ORDER =
Arrays.asList("ccmp", "liga", "clig", "calt");
+
+ private final GsubData gsubData;
+
+ GsubWorkerForDflt(GsubData gsubData)
+ {
+ this.gsubData = gsubData;
+ }
+
+ @Override
+ public List<Integer> applyTransforms(List<Integer> originalGlyphIds)
+ {
+ List<Integer> intermediateGlyphsFromGsub = originalGlyphIds;
+
+ for (String feature : FEATURES_IN_ORDER)
+ {
+ if (!gsubData.isFeatureSupported(feature))
+ {
+ LOG.debug("the feature " + feature + " was not found");
+ continue;
+ }
+
+ LOG.debug("applying the feature " + feature);
+
+ ScriptFeature scriptFeature = gsubData.getFeature(feature);
+
+ intermediateGlyphsFromGsub = applyGsubFeature(scriptFeature,
+ intermediateGlyphsFromGsub);
+ }
+
+ return Collections.unmodifiableList(intermediateGlyphsFromGsub);
+ }
+
+ private List<Integer> applyGsubFeature(ScriptFeature scriptFeature,
+ List<Integer> originalGlyphs)
+ {
+ if (scriptFeature.getAllGlyphIdsForSubstitution().isEmpty())
+ {
+ LOG.debug("getAllGlyphIdsForSubstitution() for " +
scriptFeature.getName() + " is empty");
+ return originalGlyphs;
+ }
+
+ GlyphArraySplitter glyphArraySplitter = new
GlyphArraySplitterRegexImpl(
+ scriptFeature.getAllGlyphIdsForSubstitution());
+
+ List<List<Integer>> tokens = glyphArraySplitter.split(originalGlyphs);
+ List<Integer> gsubProcessedGlyphs = new ArrayList<>();
+
+ for (List<Integer> chunk : tokens)
+ {
+ if (scriptFeature.canReplaceGlyphs(chunk))
+ {
+ // gsub system kicks in, you get the glyphId directly
+ List<Integer> replacementForGlyphs =
scriptFeature.getReplacementForGlyphs(chunk);
+ gsubProcessedGlyphs.addAll(replacementForGlyphs);
+ }
+ else
+ {
+ gsubProcessedGlyphs.addAll(chunk);
+ }
+ }
+
+ LOG.debug("originalGlyphs: " + originalGlyphs + ",
gsubProcessedGlyphs: "
+ + gsubProcessedGlyphs);
+
+ return gsubProcessedGlyphs;
+ }
+}
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
==============================================================================
---
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
Tue Nov 25 08:30:14 2025 (r1929958)
+++
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java
Tue Nov 25 08:31:59 2025 (r1929959)
@@ -36,6 +36,7 @@ public enum Language
DEVANAGARI(new String[] { "dev2", "deva" }),
GUJARATI(new String[] { "gjr2", "gujr" }),
LATIN(new String[] { "latn" }),
+ DFLT(new String[] { "DFLT" }),
/**
* An entry explicitly denoting the absence of any concrete language. May
be useful when no actual glyph
Added:
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java
Tue Nov 25 08:31:59 2025 (r1929959)
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fontbox.ttf.gsub;
+
+import org.apache.fontbox.ttf.CmapLookup;
+import org.apache.fontbox.ttf.TTFParser;
+import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * Integration test for {@link GsubWorkerForDflt}. Tests DFLT (default) script
GSUB worker.
+ *
+ * <p>The DFLT script is used for script-neutral typographic features that
work across
+ * writing systems, particularly when text lacks a specific script (symbols,
punctuation)
+ * or when no script-specific table exists.</p>
+ *
+ * <p>JosefinSans-Italic.ttf (SIL Open Font License) uses DFLT script and has
standard ligatures
+ * (fi, fl) which are used for testing GSUB transformations. Words without
ligature sequences
+ * (like "font" or "code") pass through unchanged, while words containing "fi"
or "fl" are
+ * transformed to use ligature glyphs.</p>
+ *
+ */
+class GsubWorkerForDfltTest
+{
+ private static final String JOSEFIN_SANS_TTF =
"src/test/resources/ttf/JosefinSans-Italic.ttf";
+
+ private static CmapLookup cmapLookup;
+ private static GsubWorker gsubWorkerForDflt;
+
+ @BeforeAll
+ static void init() throws IOException
+ {
+ try (TrueTypeFont ttf = new TTFParser().parse(new
RandomAccessReadBufferedFile(JOSEFIN_SANS_TTF)))
+ {
+ cmapLookup = ttf.getUnicodeCmapLookup();
+ gsubWorkerForDflt = new
GsubWorkerFactory().getGsubWorker(cmapLookup, ttf.getGsubData());
+ }
+ }
+
+ @Test
+ void testCorrectWorkerType()
+ {
+ assertInstanceOf(GsubWorkerForDflt.class, gsubWorkerForDflt);
+ }
+
+ static Stream<Arguments> provideTransformTestCases()
+ {
+ return Stream.of(
+ // No ligature - text passes through unchanged
+ Arguments.of("code", Arrays.asList(229, 293, 235, 237), "no
ligature sequences"),
+ // Simple ligature
+ Arguments.of("fi", Collections.singletonList(407), "fi ->
ligature"),
+ // Ligature within word
+ Arguments.of("office", Arrays.asList(293, 257, 407, 229, 237),
"ffi -> f + fi-ligature"),
+ // Multi-f sequence
+ Arguments.of("ffl", Arrays.asList(257, 408), "ffl -> f +
fl-ligature")
+ );
+ }
+
+ @ParameterizedTest(name = "{0}: {2}")
+ @MethodSource("provideTransformTestCases")
+ void testApplyTransforms(String input, List<Integer> expectedGlyphs,
String description)
+ {
+ List<Integer> result =
gsubWorkerForDflt.applyTransforms(getGlyphIds(input));
+ assertEquals(expectedGlyphs, result);
+ }
+
+ @Test
+ void testApplyTransforms_immutableResult()
+ {
+ List<Integer> result =
gsubWorkerForDflt.applyTransforms(getGlyphIds("abc"));
+
+ assertThrows(UnsupportedOperationException.class, () ->
result.add(999));
+ assertThrows(UnsupportedOperationException.class, () ->
result.remove(0));
+ }
+
+ private static List<Integer> getGlyphIds(String word)
+ {
+ List<Integer> originalGlyphIds = new ArrayList<>();
+
+ for (char unicodeChar : word.toCharArray())
+ {
+ int glyphId = cmapLookup.getGlyphId(unicodeChar);
+ assertTrue(glyphId > 0);
+ originalGlyphIds.add(glyphId);
+ }
+
+ return originalGlyphIds;
+ }
+}
\ No newline at end of file
Added: pdfbox/trunk/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf
==============================================================================
Binary file. No diff available.