[ https://issues.apache.org/jira/browse/FOP-2918?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17158838#comment-17158838 ]
Kelly H Wilkerson commented on FOP-2918: ---------------------------------------- I'm not sure if this is the right place to make the change, but a change that does work is to correct add an additional check for a high surrogate in org.apache.fop.layoutmgr.inline.TextLayoutManager. I think it's sufficient to guard against a break mid surrogate pair when the bidi levels change (first diff) but added another option (second diff) that would prevent a word break after any high surrogate char no matter which cause. {code:java} --- a/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java +++ b/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java @@ -776,6 +776,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { boolean inWord = false; boolean inWhitespace = false; char ch = 0; + char prevChar = 0; int level = -1; int prevLevel = -1; boolean retainControls = false; @@ -814,7 +815,8 @@ public class TextLayoutManager extends LeafNodeLayoutManager { if (breakOpportunity || GlyphMapping.isSpace(ch) || CharUtilities.isExplicitBreak(ch) - || ((prevLevel != -1) && (level != prevLevel))) { + || ((prevLevel != -1) && (level != prevLevel) && !Character.isHighSurrogate(prevChar)) + ) { // this.foText.charAt(lastIndex) == CharUtilities.SOFT_HYPHEN prevMapping = processWord(alignment, sequence, prevMapping, ch, breakOpportunity, true, prevLevel, retainControls); @@ -870,6 +872,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { inWhitespace = ch == CharUtilities.SPACE && foText.getWhitespaceTreatment() != Constants.EN_PRESERVE; prevLevel = level; + prevChar = ch; nextStart++; } {code} {code:java} --- a/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java +++ b/fop-core/src/main/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java @@ -776,6 +776,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { boolean inWord = false; boolean inWhitespace = false; char ch = 0; + char prevChar = 0; int level = -1; int prevLevel = -1; boolean retainControls = false; @@ -811,10 +812,11 @@ public class TextLayoutManager extends LeafNodeLayoutManager { + "}"); } if (inWord) { - if (breakOpportunity + if ((breakOpportunity || GlyphMapping.isSpace(ch) || CharUtilities.isExplicitBreak(ch) - || ((prevLevel != -1) && (level != prevLevel))) { + || ((prevLevel != -1) && (level != prevLevel))) + && !Character.isHighSurrogate(prevChar)) { // this.foText.charAt(lastIndex) == CharUtilities.SOFT_HYPHEN prevMapping = processWord(alignment, sequence, prevMapping, ch, breakOpportunity, true, prevLevel, retainControls); @@ -870,6 +872,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager { inWhitespace = ch == CharUtilities.SPACE && foText.getWhitespaceTreatment() != Constants.EN_PRESERVE; prevLevel = level; + prevChar = ch; nextStart++; } {code} > Surrogate pairs not handled in U+10800-U+1083F > ---------------------------------------------- > > Key: FOP-2918 > URL: https://issues.apache.org/jira/browse/FOP-2918 > Project: FOP > Issue Type: Bug > Components: renderer/pdf > Affects Versions: 2.4 > Environment: Windows 10 > Reporter: Jan Driesen > Priority: Major > Attachments: NotoSansCypriot-Regular.ttf, fop.xconf, input.fo > > > Fop is not properly handling surrogate pairs for characters in Unicode Block > 'Cypriot Syllabary' when rendering PDF. > It tries to resolve the individual surrogate entities. This results in errors > saying the glyphs cannot be found. > The attached test shows a font that supports characters in this range, and an > FO file holding the surrogate characters to be rendered. > Similar issues arise with fonts "MPH 2b Damas" > ([https://fedoraproject.org/wiki/MPH_2B_Damase_fonts]) and "Segoe UI > Historic" > ([https://docs.microsoft.com/en-us/typography/font-list/segoe_ui_historic),] > but the error may differ. [I am unsure whether licensing allows me to add > these) > Some fonts (Damas & Noto) result in a "String index out of range". Other > fonts (Segoe) deliver a "ill-formed UTF-16 sequence, contains isolated high > surrogate at end of sequence" FOPException. > We expected this to work thanks to FOP-1969 (fop 2.3). -- This message was sent by Atlassian Jira (v8.3.4#803005)