Author: tilman Date: Tue Apr 24 17:40:41 2018 New Revision: 1830016 URL: http://svn.apache.org/viewvc?rev=1830016&view=rev Log: PDFBOX-4189: Refined GSUB system to support new languages + feature complete Bengali, by Palash Ray
Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java (with props) pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/GsubData.java (with props) pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java (with props) pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedGsubData.java (with props) pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedScriptFeature.java (with props) pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ScriptFeature.java (with props) pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/package.html (with props) Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorker.java pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForBengali.java pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableTest.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java Tue Apr 24 17:40:41 2018 @@ -31,6 +31,7 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.ttf.gsub.GlyphSubstitutionDataExtractor; +import org.apache.fontbox.ttf.model.GsubData; import org.apache.fontbox.ttf.table.common.CoverageTable; import org.apache.fontbox.ttf.table.common.CoverageTableFormat1; import org.apache.fontbox.ttf.table.common.CoverageTableFormat2; @@ -72,7 +73,7 @@ public class GlyphSubstitutionTable exte private String lastUsedSupportedScript; - private Map<String, Map<List<Integer>, Integer>> rawGSubData; + private GsubData gsubData; GlyphSubstitutionTable(TrueTypeFont font) { @@ -103,9 +104,8 @@ public class GlyphSubstitutionTable exte GlyphSubstitutionDataExtractor glyphSubstitutionDataExtractor = new GlyphSubstitutionDataExtractor(); - rawGSubData = glyphSubstitutionDataExtractor + gsubData = glyphSubstitutionDataExtractor .getGsubData(scriptList, featureListTable, lookupListTable); - LOG.debug("rawGSubData: " + rawGSubData); } private Map<String, ScriptTable> readScriptList(TTFDataStream data, long offset) @@ -670,9 +670,9 @@ public class GlyphSubstitutionTable exte return gid; } - public Map<String, Map<List<Integer>, Integer>> getRawGSubData() + public GsubData getGsubData() { - return rawGSubData; + return gsubData; } private RangeRecord readRangeRecord(TTFDataStream data) throws IOException Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java Tue Apr 24 17:40:41 2018 @@ -27,11 +27,12 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.fontbox.FontBoxFont; -import org.apache.fontbox.util.BoundingBox; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.FontBoxFont; +import org.apache.fontbox.ttf.model.GsubData; +import org.apache.fontbox.util.BoundingBox; /** * A TrueType font file. @@ -654,15 +655,15 @@ public class TrueTypeFont implements Fon return 0; } - public Map<String, Map<List<Integer>, Integer>> getGlyphSubstitutionMap() throws IOException + public GsubData getGsubData() throws IOException { GlyphSubstitutionTable table = getGsub(); if (table == null) { - return Collections.emptyMap(); + return GsubData.NO_DATA_FOUND; } - return table.getRawGSubData(); + return table.getGsubData(); } /** Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GlyphSubstitutionDataExtractor.java Tue Apr 24 17:40:41 2018 @@ -26,6 +26,9 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.model.GsubData; +import org.apache.fontbox.ttf.model.Language; +import org.apache.fontbox.ttf.model.MapBackedGsubData; import org.apache.fontbox.ttf.table.common.CoverageTable; import org.apache.fontbox.ttf.table.common.FeatureListTable; import org.apache.fontbox.ttf.table.common.FeatureRecord; @@ -52,19 +55,19 @@ public class GlyphSubstitutionDataExtrac private static final Log LOG = LogFactory.getLog(GlyphSubstitutionDataExtractor.class); - private static final String[] SUPPORTED_LANGUAGES = { "bng2", "beng" }; - - public Map<String, Map<List<Integer>, Integer>> getGsubData(Map<String, ScriptTable> scriptList, + public GsubData getGsubData(Map<String, ScriptTable> scriptList, FeatureListTable featureListTable, LookupListTable lookupListTable) { - ScriptTable scriptTable = getSupportedLanguage(scriptList); + ScriptTableDetails scriptTableDetails = getSupportedLanguage(scriptList); - if (scriptTable == null) + if (scriptTableDetails == null) { - return Collections.emptyMap(); + return GsubData.NO_DATA_FOUND; } + ScriptTable scriptTable = scriptTableDetails.getScriptTable(); + Map<String, Map<List<Integer>, Integer>> gsubData = new LinkedHashMap<>(); // the starting point is really the scriptTags if (scriptTable.getDefaultLangSysTable() != null) @@ -76,16 +79,21 @@ public class GlyphSubstitutionDataExtrac { populateGsubData(gsubData, langSysTable, featureListTable, lookupListTable); } - return Collections.unmodifiableMap(gsubData); + + return new MapBackedGsubData(scriptTableDetails.getLanguage(), + scriptTableDetails.getFeatureName(), gsubData); } - private ScriptTable getSupportedLanguage(Map<String, ScriptTable> scriptList) + private ScriptTableDetails getSupportedLanguage(Map<String, ScriptTable> scriptList) { - for (String supportedLanguage : SUPPORTED_LANGUAGES) + for (Language lang : Language.values()) { - if (scriptList.containsKey(supportedLanguage)) + for (String scriptName : lang.getScriptNames()) { - return scriptList.get(supportedLanguage); + if (scriptList.containsKey(scriptName)) + { + return new ScriptTableDetails(lang, scriptName, scriptList.get(scriptName)); + } } } return null; @@ -232,4 +240,34 @@ public class GlyphSubstitutionDataExtrac } } + private static class ScriptTableDetails + { + private final Language language; + private final String featureName; + private final ScriptTable scriptTable; + + private ScriptTableDetails(Language language, String featureName, ScriptTable scriptTable) + { + this.language = language; + this.featureName = featureName; + this.scriptTable = scriptTable; + } + + public Language getLanguage() + { + return language; + } + + public String getFeatureName() + { + return featureName; + } + + public ScriptTable getScriptTable() + { + return scriptTable; + } + + } + } Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorker.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorker.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorker.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorker.java Tue Apr 24 17:40:41 2018 @@ -28,8 +28,11 @@ import java.util.List; */ public interface GsubWorker { - List<Integer> substituteGlyphs(List<Integer> originalGlyphIds); - - List<Integer> repositionGlyphs(List<Integer> originalGlyphIds); + /** + * Applies language-specific transforms including GSUB and any other pre or post-processing necessary for displaying + * Glyphs correctly. + * + */ + List<Integer> applyTransforms(List<Integer> originalGlyphIds); } Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java Tue Apr 24 17:40:41 2018 @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.gsub; + +import org.apache.fontbox.ttf.CmapLookup; +import org.apache.fontbox.ttf.model.GsubData; +import org.apache.fontbox.ttf.model.Language; + +/** + * Gets a {@link Language} specific instance of a {@link GsubWorker} + * + * @author Palash Ray + * + */ +public class GsubWorkerFactory +{ + + public GsubWorker getGsubWorker(CmapLookup cmapLookup, GsubData gsubData) + { + switch (gsubData.getLanguage()) + { + case BENGALI: + return new GsubWorkerForBengali(cmapLookup, gsubData); + default: + throw new UnsupportedOperationException( + "The language " + gsubData.getLanguage() + " is not yet supported"); + } + + } + +} Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForBengali.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForBengali.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForBengali.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForBengali.java Tue Apr 24 17:40:41 2018 @@ -20,12 +20,15 @@ package org.apache.fontbox.ttf.gsub; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.ttf.CmapLookup; +import org.apache.fontbox.ttf.model.GsubData; +import org.apache.fontbox.ttf.model.ScriptFeature; /** * @@ -39,35 +42,44 @@ public class GsubWorkerForBengali implem private static final Log LOG = LogFactory.getLog(GsubWorkerForBengali.class); + private static final String INIT_FEATURE = "init"; + /** * This sequence is very important. This has been taken from <a href= * "https://docs.microsoft.com/en-us/typography/script-development/bengali">https://docs.microsoft.com/en-us/typography/script-development/bengali</a> */ private static final List<String> FEATURES_IN_ORDER = Arrays.asList("locl", "nukt", "akhn", - "rphf", "blwf", "half", "pstf", "vatu", "cjct", "init", "pres", "abvs", "blws", "psts", - "haln", "calt"); + "rphf", "blwf", "half", "pstf", "vatu", "cjct", INIT_FEATURE, "pres", "abvs", "blws", + "psts", "haln", "calt"); private static final char[] BEFORE_HALF_CHARS = new char[] { '\u09BF', '\u09C7', '\u09C8' }; + private static final BeforeAndAfterSpanComponent[] BEFORE_AND_AFTER_SPAN_CHARS = new BeforeAndAfterSpanComponent[] { + new BeforeAndAfterSpanComponent('\u09CB', '\u09C7', '\u09BE'), + new BeforeAndAfterSpanComponent('\u09CC', '\u09C7', '\u09D7') }; - private final Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap; + private final CmapLookup cmapLookup; + private final GsubData gsubData; private final List<Integer> beforeHalfGlyphIds; + private final Map<Integer, BeforeAndAfterSpanComponent> beforeAndAfterSpanGlyphIds; + - public GsubWorkerForBengali(CmapLookup cmapLookup, - Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap) + GsubWorkerForBengali(CmapLookup cmapLookup, GsubData gsubData) { - this.glyphSubstitutionMap = glyphSubstitutionMap; - beforeHalfGlyphIds = getBeforeHalfGlyphIds(cmapLookup); + this.cmapLookup = cmapLookup; + this.gsubData = gsubData; + beforeHalfGlyphIds = getBeforeHalfGlyphIds(); + beforeAndAfterSpanGlyphIds = getBeforeAndAfterSpanGlyphIds(); } @Override - public List<Integer> substituteGlyphs(List<Integer> originalGlyphIds) + public List<Integer> applyTransforms(List<Integer> originalGlyphIds) { List<Integer> intermediateGlyphsFromGsub = originalGlyphIds; for (String feature : FEATURES_IN_ORDER) { - if (!glyphSubstitutionMap.containsKey(feature)) + if (!gsubData.isFeatureSupported(feature)) { LOG.debug("the feature " + feature + " was not found"); continue; @@ -75,16 +87,23 @@ public class GsubWorkerForBengali implem LOG.debug("applying the feature " + feature); - Map<List<Integer>, Integer> featureMap = glyphSubstitutionMap.get(feature); + ScriptFeature scriptFeature = gsubData.getFeature(feature); - intermediateGlyphsFromGsub = applyGsubFeature(featureMap, intermediateGlyphsFromGsub); + intermediateGlyphsFromGsub = applyGsubFeature(scriptFeature, + intermediateGlyphsFromGsub); } - return intermediateGlyphsFromGsub; + return Collections.unmodifiableList(repositionGlyphs(intermediateGlyphsFromGsub)); } - @Override - public List<Integer> repositionGlyphs(List<Integer> originalGlyphIds) + private List<Integer> repositionGlyphs(List<Integer> originalGlyphIds) + { + List<Integer> glyphsRepositionedByBeforeHalf = repositionBeforeHalfGlyphIds( + originalGlyphIds); + return repositionBeforeAndAfterSpanGlyphIds(glyphsRepositionedByBeforeHalf); + } + + private List<Integer> repositionBeforeHalfGlyphIds(List<Integer> originalGlyphIds) { List<Integer> repositionedGlyphIds = new ArrayList<>(originalGlyphIds); @@ -101,12 +120,34 @@ public class GsubWorkerForBengali implem return repositionedGlyphIds; } - private List<Integer> applyGsubFeature(Map<List<Integer>, Integer> featureMap, + private List<Integer> repositionBeforeAndAfterSpanGlyphIds(List<Integer> originalGlyphIds) + { + List<Integer> repositionedGlyphIds = new ArrayList<>(originalGlyphIds); + + for (int index = 1; index < originalGlyphIds.size(); index++) + { + int glyphId = originalGlyphIds.get(index); + if (beforeAndAfterSpanGlyphIds.containsKey(glyphId)) + { + BeforeAndAfterSpanComponent beforeAndAfterSpanComponent = beforeAndAfterSpanGlyphIds + .get(glyphId); + int previousGlyphId = originalGlyphIds.get(index - 1); + repositionedGlyphIds.set(index, previousGlyphId); + repositionedGlyphIds.set(index - 1, + getGlyphId(beforeAndAfterSpanComponent.beforeComponentCharacter)); + repositionedGlyphIds.add(index + 1, + getGlyphId(beforeAndAfterSpanComponent.afterComponentCharacter)); + } + } + return repositionedGlyphIds; + } + + private List<Integer> applyGsubFeature(ScriptFeature scriptFeature, List<Integer> originalGlyphs) { GlyphArraySplitter glyphArraySplitter = new GlyphArraySplitterRegexImpl( - featureMap.keySet()); + scriptFeature.getAllGlyphIdsForSubstitution()); List<List<Integer>> tokens = glyphArraySplitter.split(originalGlyphs); @@ -114,10 +155,10 @@ public class GsubWorkerForBengali implem for (List<Integer> chunk : tokens) { - if (featureMap.containsKey(chunk)) + if (scriptFeature.canReplaceGlyphs(chunk)) { // gsub system kicks in, you get the glyphId directly - int glyphId = featureMap.get(chunk); + int glyphId = scriptFeature.getReplacementForGlyphs(chunk); gsubProcessedGlyphs.add(glyphId); } else @@ -132,16 +173,67 @@ public class GsubWorkerForBengali implem return gsubProcessedGlyphs; } - private static List<Integer> getBeforeHalfGlyphIds(CmapLookup cmapLookup) + private List<Integer> getBeforeHalfGlyphIds() { - List<Integer> beforeHalfGlyphIds = new ArrayList<>(); + List<Integer> glyphIds = new ArrayList<>(); - for (char beforeHalfChar : BEFORE_HALF_CHARS) + for (char character : BEFORE_HALF_CHARS) { - beforeHalfGlyphIds.add(cmapLookup.getGlyphId(beforeHalfChar)); + glyphIds.add(getGlyphId(character)); } - return Collections.unmodifiableList(beforeHalfGlyphIds); + if (gsubData.isFeatureSupported(INIT_FEATURE)) + { + ScriptFeature feature = gsubData.getFeature(INIT_FEATURE); + for (List<Integer> glyphCluster : feature.getAllGlyphIdsForSubstitution()) + { + glyphIds.add(feature.getReplacementForGlyphs(glyphCluster)); + } + } + + return Collections.unmodifiableList(glyphIds); + + } + + private Integer getGlyphId(char character) + { + return cmapLookup.getGlyphId(character); + } + + private Map<Integer, BeforeAndAfterSpanComponent> getBeforeAndAfterSpanGlyphIds() + { + Map<Integer, BeforeAndAfterSpanComponent> beforeAndAfterSpanGlyphIds = new HashMap<>(); + + for (BeforeAndAfterSpanComponent beforeAndAfterSpanComponent : BEFORE_AND_AFTER_SPAN_CHARS) + { + beforeAndAfterSpanGlyphIds.put( + getGlyphId(beforeAndAfterSpanComponent.originalCharacter), + beforeAndAfterSpanComponent); + } + + return Collections.unmodifiableMap(beforeAndAfterSpanGlyphIds); + } + + /** + * Models characters like O-kar (\u09CB) and OU-kar (\u09CC). Since these 2 characters is + * represented by 2 components, one before and one after the Vyanjan Varna on which this is + * used, this glyph has to be replaced by these 2 glyphs. For O-kar, it has to be replaced by + * E-kar (\u09C7) and AA-kar (\u09BE). For OU-kar, it has be replaced by E-kar (\u09C7) and + * \u09D7. + * + */ + private static class BeforeAndAfterSpanComponent { + private final char originalCharacter; + private final char beforeComponentCharacter; + private final char afterComponentCharacter; + + BeforeAndAfterSpanComponent(char originalCharacter, char beforeComponentCharacter, + char afterComponentCharacter) + { + this.originalCharacter = originalCharacter; + this.beforeComponentCharacter = beforeComponentCharacter; + this.afterComponentCharacter = afterComponentCharacter; + } } Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/GsubData.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/GsubData.java?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/GsubData.java (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/GsubData.java Tue Apr 24 17:40:41 2018 @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.model; + +import java.util.Set; + +/** + * Model for data from the GSUB tables + * + * @author Palash Ray + * + */ +public interface GsubData +{ + /** + * To be used when there is no GSUB data available + */ + GsubData NO_DATA_FOUND = new GsubData() + { + + @Override + public boolean isFeatureSupported(String featureName) + { + throw new UnsupportedOperationException(); + } + + @Override + public Language getLanguage() + { + throw new UnsupportedOperationException(); + } + + @Override + public ScriptFeature getFeature(String featureName) + { + throw new UnsupportedOperationException(); + } + + @Override + public String getActiveScriptName() + { + throw new UnsupportedOperationException(); + } + + @Override + public Set<String> getSupportedFeatures() + { + throw new UnsupportedOperationException(); + } + }; + + Language getLanguage(); + + /** + * A {@link Language} can have more than one script that is supported. However, at any given + * point, only one of the many scripts are active. + * + * @return The name of the script that is active. + */ + String getActiveScriptName(); + + boolean isFeatureSupported(String featureName); + + ScriptFeature getFeature(String featureName); + + Set<String> getSupportedFeatures(); + +} Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/GsubData.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java Tue Apr 24 17:40:41 2018 @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.model; + +import org.apache.fontbox.ttf.gsub.GsubWorker; +import org.apache.fontbox.ttf.gsub.GsubWorkerFactory; +import org.apache.fontbox.ttf.table.common.ScriptRecord; + +/** + * Enumerates the languages supported for GSUB operation. In order to support a new language, you + * need to add it here and then implement the {@link GsubWorker} for the given language and return + * the same from the + * {@link GsubWorkerFactory#getGsubWorker(org.apache.fontbox.ttf.CmapLookup, GsubData)} + * + * @author Palash Ray + * + */ +public enum Language +{ + + BENGALI(new String[] { "bng2", "beng" }); + + private final String[] scriptNames; + + private Language(String[] scriptNames) + { + this.scriptNames = scriptNames; + } + + /** + * ScriptNames form the basis of identification of the language. This method gets the ScriptNames that the given + * Language supports, in the order of preference, Index 0 being the most preferred. These names should match the + * {@link ScriptRecord} in the GSUB system. + */ + public String[] getScriptNames() + { + return scriptNames; + } + +} Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedGsubData.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedGsubData.java?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedGsubData.java (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedGsubData.java Tue Apr 24 17:40:41 2018 @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.model; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * + * A {@link Map} based simple implementation of the {@link GsubData} + * + * @author Palash Ray + * + */ +public class MapBackedGsubData implements GsubData +{ + + private final Language language; + private final String activeScriptName; + private final Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap; + + public MapBackedGsubData(Language language, String activeScriptName, + Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap) + { + this.language = language; + this.activeScriptName = activeScriptName; + this.glyphSubstitutionMap = glyphSubstitutionMap; + } + + @Override + public Language getLanguage() + { + return language; + } + + @Override + public String getActiveScriptName() + { + return activeScriptName; + } + + @Override + public boolean isFeatureSupported(String featureName) + { + return glyphSubstitutionMap.containsKey(featureName); + } + + @Override + public ScriptFeature getFeature(String featureName) + { + if (!isFeatureSupported(featureName)) + { + throw new UnsupportedOperationException( + "The feature " + featureName + " is not supported!"); + } + + return new MapBackedScriptFeature(featureName, glyphSubstitutionMap.get(featureName)); + } + + @Override + public Set<String> getSupportedFeatures() + { + return glyphSubstitutionMap.keySet(); + } + +} Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedGsubData.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedScriptFeature.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedScriptFeature.java?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedScriptFeature.java (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedScriptFeature.java Tue Apr 24 17:40:41 2018 @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.model; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * + * A {@link Map} based simple implementation of the {@link ScriptFeature} + * + * @author Palash Ray + * + */ +public class MapBackedScriptFeature implements ScriptFeature +{ + + private final String name; + private final Map<List<Integer>, Integer> featureMap; + + public MapBackedScriptFeature(String name, Map<List<Integer>, Integer> featureMap) + { + this.name = name; + this.featureMap = featureMap; + } + + @Override + public String getName() + { + return name; + } + + @Override + public Set<List<Integer>> getAllGlyphIdsForSubstitution() + { + return featureMap.keySet(); + } + + @Override + public boolean canReplaceGlyphs(List<Integer> glyphIds) + { + return featureMap.containsKey(glyphIds); + } + + @Override + public Integer getReplacementForGlyphs(List<Integer> glyphIds) + { + if (!canReplaceGlyphs(glyphIds)) + { + throw new UnsupportedOperationException( + "The glyphs " + glyphIds + " cannot be replaced"); + } + return featureMap.get(glyphIds); + } + + @Override + public int hashCode() + { + final int prime = 31; + int result = 1; + result = prime * result + ((featureMap == null) ? 0 : featureMap.hashCode()); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) + { + return true; + } + if (obj == null) + { + return false; + } + if (getClass() != obj.getClass()) + { + return false; + } + MapBackedScriptFeature other = (MapBackedScriptFeature) obj; + if (featureMap == null) + { + if (other.featureMap != null) + { + return false; + } + } + else if (!featureMap.equals(other.featureMap)) + { + return false; + } + if (name == null) + { + if (other.name != null) + { + return false; + } + } + else if (!name.equals(other.name)) + { + return false; + } + return true; + } +} Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/MapBackedScriptFeature.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ScriptFeature.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ScriptFeature.java?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ScriptFeature.java (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ScriptFeature.java Tue Apr 24 17:40:41 2018 @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.model; + +import java.util.List; +import java.util.Set; + +import org.apache.fontbox.ttf.table.common.FeatureRecord; + +/** + * Models a {@link FeatureRecord} + * + * @author Palash Ray + * + */ +public interface ScriptFeature +{ + + String getName(); + + Set<List<Integer>> getAllGlyphIdsForSubstitution(); + + boolean canReplaceGlyphs(List<Integer> glyphIds); + + Integer getReplacementForGlyphs(List<Integer> glyphIds); + +} Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/ScriptFeature.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/package.html URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/package.html?rev=1830016&view=auto ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/package.html (added) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/package.html Tue Apr 24 17:40:41 2018 @@ -0,0 +1,25 @@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one or more + ! contributor license agreements. See the NOTICE file distributed with + ! this work for additional information regarding copyright ownership. + ! The ASF licenses this file to You under the Apache License, Version 2.0 + ! (the "License"); you may not use this file except in compliance with + ! the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, software + ! distributed under the License is distributed on an "AS IS" BASIS, + ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ! See the License for the specific language governing permissions and + ! limitations under the License. + !--> +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head> + +</head> +<body> +This package contains a more logical model for the various font tables like GSUB. +</body> +</html> Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/model/package.html ------------------------------------------------------------------------------ svn:eol-style = native Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableTest.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableTest.java (original) +++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/ttf/GlyphSubstitutionTableTest.java Tue Apr 24 17:40:41 2018 @@ -17,7 +17,7 @@ package org.apache.fontbox.ttf; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import java.io.BufferedReader; @@ -30,8 +30,11 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; +import org.apache.fontbox.ttf.model.GsubData; +import org.apache.fontbox.ttf.model.Language; +import org.apache.fontbox.ttf.model.MapBackedScriptFeature; +import org.apache.fontbox.ttf.model.ScriptFeature; import org.junit.Test; public class GlyphSubstitutionTableTest @@ -43,7 +46,7 @@ public class GlyphSubstitutionTableTest "blwf", "blws", "half", "haln", "init", "nukt", "pres", "pstf", "rphf", "vatu"); @Test - public void testGetRawGSubData() throws IOException + public void testGetGsubData() throws IOException { // given MemoryTTFDataStream memoryTTFDataStream = new MemoryTTFDataStream( @@ -56,12 +59,13 @@ public class GlyphSubstitutionTableTest testClass.read(null, memoryTTFDataStream); // then - Map<String, Map<List<Integer>, Integer>> rawGsubData = testClass.getRawGSubData(); - assertNotNull(rawGsubData); - assertFalse(rawGsubData.isEmpty()); + GsubData gsubData = testClass.getGsubData(); + assertNotNull(gsubData); + assertNotEquals(GsubData.NO_DATA_FOUND, gsubData); + assertEquals(Language.BENGALI, gsubData.getLanguage()); + assertEquals("bng2", gsubData.getActiveScriptName()); - Set<String> featureNames = rawGsubData.keySet(); - assertEquals(new HashSet<>(EXPECTED_FEATURE_NAMES), featureNames); + assertEquals(new HashSet<>(EXPECTED_FEATURE_NAMES), gsubData.getSupportedFeatures()); String templatePathToFile = "/gsub/lohit_bengali/bng2/%s.txt"; @@ -70,7 +74,9 @@ public class GlyphSubstitutionTableTest System.out.println("******* Testing feature: " + featureName); Map<List<Integer>, Integer> expectedGsubTableRawData = getExpectedGsubTableRawData( String.format(templatePathToFile, featureName)); - assertEquals(expectedGsubTableRawData, rawGsubData.get(featureName)); + ScriptFeature scriptFeature = new MapBackedScriptFeature(featureName, + expectedGsubTableRawData); + assertEquals(scriptFeature, gsubData.getFeature(featureName)); } } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java Tue Apr 24 17:40:41 2018 @@ -25,7 +25,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; @@ -35,7 +34,8 @@ import org.apache.commons.logging.LogFac import org.apache.fontbox.ttf.CmapLookup; import org.apache.fontbox.ttf.gsub.CompoundCharacterTokenizer; import org.apache.fontbox.ttf.gsub.GsubWorker; -import org.apache.fontbox.ttf.gsub.GsubWorkerForBengali; +import org.apache.fontbox.ttf.gsub.GsubWorkerFactory; +import org.apache.fontbox.ttf.model.GsubData; import org.apache.pdfbox.contentstream.PDAbstractContentStream; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -330,12 +330,11 @@ public final class PDPageContentStream e if (font instanceof PDType0Font) { PDType0Font pdType0Font = (PDType0Font) font; - Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap = pdType0Font - .getGlyphSubstitutionMap(); - if (!glyphSubstitutionMap.isEmpty()) + GsubData gsubData = pdType0Font.getGsubData(); + if (gsubData != GsubData.NO_DATA_FOUND) { Set<Integer> glyphIds = new HashSet<>(); - encodedText = encodeForGsub(glyphSubstitutionMap, glyphIds, pdType0Font, text); + encodedText = encodeForGsub(gsubData, glyphIds, pdType0Font, text); if (pdType0Font.willBeSubset()) { pdType0Font.addGlyphsToSubset(glyphIds); @@ -1177,7 +1176,7 @@ public final class PDPageContentStream e writeOperator("Ts"); } - private byte[] encodeForGsub(Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap, + private byte[] encodeForGsub(GsubData gsubData, Set<Integer> glyphIds, PDType0Font font, String text) throws IOException { @@ -1197,7 +1196,7 @@ public final class PDPageContentStream e } else { - glyphIds.addAll(applyGSUBRules(out, font, glyphSubstitutionMap, word)); + glyphIds.addAll(applyGSUBRules(out, font, gsubData, word)); } } @@ -1205,8 +1204,7 @@ public final class PDPageContentStream e } private List<Integer> applyGSUBRules(ByteArrayOutputStream out, PDType0Font font, - Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap, String word) - throws IOException + GsubData gsubData, String word) throws IOException { List<Integer> originalGlyphIds = new ArrayList<>(); CmapLookup cmapLookup = font.getCmapLookup(); @@ -1223,11 +1221,11 @@ public final class PDPageContentStream e originalGlyphIds.add(glyphId); } - // TODO: figure out how to get this language-specific detail up here - GsubWorker gsubWorker = new GsubWorkerForBengali(cmapLookup, glyphSubstitutionMap); + GsubWorkerFactory gsubWorkerFactory = new GsubWorkerFactory(); - List<Integer> repositionedGlyphIds = gsubWorker.repositionGlyphs(originalGlyphIds); - List<Integer> glyphIdsAfterGsub = gsubWorker.substituteGlyphs(repositionedGlyphIds); + GsubWorker gsubWorker = gsubWorkerFactory.getGsubWorker(cmapLookup, gsubData); + + List<Integer> glyphIdsAfterGsub = gsubWorker.applyTransforms(originalGlyphIds); for (Integer glyphId : glyphIdsAfterGsub) { Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1830016&r1=1830015&r2=1830016&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Tue Apr 24 17:40:41 2018 @@ -20,10 +20,7 @@ import java.awt.geom.GeneralPath; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.util.Collections; import java.util.HashSet; -import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; @@ -32,6 +29,7 @@ import org.apache.fontbox.cmap.CMap; import org.apache.fontbox.ttf.CmapLookup; import org.apache.fontbox.ttf.TTFParser; import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.fontbox.ttf.model.GsubData; import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -52,7 +50,7 @@ public class PDType0Font extends PDFont private final PDCIDFont descendantFont; private final Set<Integer> noUnicode = new HashSet<>(); - private final Map<String, Map<List<Integer>, Integer>> glyphSubstitutionMap; + private final GsubData gsubData; private final CmapLookup cmapLookup; private CMap cMap, cMapUCS2; private boolean isCMapPredefined; @@ -70,7 +68,7 @@ public class PDType0Font extends PDFont { super(fontDictionary); - glyphSubstitutionMap = Collections.emptyMap(); + gsubData = GsubData.NO_DATA_FOUND; cmapLookup = null; COSBase base = dict.getDictionaryObject(COSName.DESCENDANT_FONTS); @@ -104,7 +102,7 @@ public class PDType0Font extends PDFont ttf.enableVerticalSubstitutions(); } - glyphSubstitutionMap = ttf.getGlyphSubstitutionMap(); + gsubData = ttf.getGsubData(); cmapLookup = ttf.getUnicodeCmapLookup(); embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this, vertical); @@ -599,9 +597,9 @@ public class PDType0Font extends PDFont return descendantFont.hasGlyph(code); } - public Map<String, Map<List<Integer>, Integer>> getGlyphSubstitutionMap() + public GsubData getGsubData() { - return glyphSubstitutionMap; + return gsubData; } public byte[] encodeGlyphId(int glyphId)