poppler/GfxFont.cc | 14 +++++++++++++- poppler/UnicodeTypeTable.cc | 9 +++++++++ poppler/UnicodeTypeTable.h | 3 +++ 3 files changed, 25 insertions(+), 1 deletion(-)
New commits: commit 3361564364a1799fc3d6c6df9f208c5531c407dc Author: Adrian Johnson <[email protected]> Date: Tue Feb 21 22:20:02 2012 +0100 Expand glyph name ligatures such as "ff", "ffi" etc to normal form Bug 7002 diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index 4e29c36..cc0f092 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -26,7 +26,7 @@ // Copyright (C) 2009 Peter Kerzum <[email protected]> // Copyright (C) 2009, 2010 David Benjamin <[email protected]> // Copyright (C) 2011 Axel Strübing <[email protected]> -// Copyright (C) 2011 Adrian Johnson <[email protected]> +// Copyright (C) 2011, 2012 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -55,6 +55,7 @@ #include "CharCodeToUnicode.h" #include "FontEncodingTables.h" #include "BuiltinFontTables.h" +#include "UnicodeTypeTable.h" #include <fofi/FoFiIdentifier.h> #include <fofi/FoFiType1.h> #include <fofi/FoFiType1C.h> @@ -1228,6 +1229,17 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA // construct the char code -> Unicode mapping object ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + // pass 1a: Expand ligatures in the Alphabetic Presentation Form + // block (eg "fi", "ffi") to normal form + for (code = 0; code < 256; ++code) { + if (unicodeIsAlphabeticPresentationForm(toUnicode[code])) { + Unicode *normalized = unicodeNormalizeNFKC(&toUnicode[code], 1, &len, NULL); + if (len > 1) + ctu->setMapping((CharCode)code, normalized, len); + gfree(normalized); + } + } + // pass 2: try to fill in the missing chars, looking for ligatures, numeric // references and variants if (missing) { diff --git a/poppler/UnicodeTypeTable.cc b/poppler/UnicodeTypeTable.cc index d620025..721af9d 100644 --- a/poppler/UnicodeTypeTable.cc +++ b/poppler/UnicodeTypeTable.cc @@ -16,6 +16,7 @@ // Copyright (C) 2006, 2007 Ed Catmur <[email protected]> // Copyright (C) 2007 Jeff Muizelaar <[email protected]> // Copyright (C) 2008 Albert Astals Cid <[email protected]> +// Copyright (C) 2012 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -962,6 +963,14 @@ GBool unicodeTypeAlphaNum(Unicode c) { return t == 'L' || t == 'R' || t == '#'; } +#define UNICODE_ALPHABETIC_PRESENTATION_BLOCK_BEGIN 0xFB00 +#define UNICODE_ALPHABETIC_PRESENTATION_BLOCK_END 0xFB4F + +GBool unicodeIsAlphabeticPresentationForm(Unicode c) { + return c >= UNICODE_ALPHABETIC_PRESENTATION_BLOCK_BEGIN + && c <= UNICODE_ALPHABETIC_PRESENTATION_BLOCK_END; +} + Unicode unicodeToUpper(Unicode c) { int i; diff --git a/poppler/UnicodeTypeTable.h b/poppler/UnicodeTypeTable.h index 0dfbd98..869aad9 100644 --- a/poppler/UnicodeTypeTable.h +++ b/poppler/UnicodeTypeTable.h @@ -14,6 +14,7 @@ // under GPL version 2 or later // // Copyright (C) 2006 Ed Catmur <[email protected]> +// Copyright (C) 2012 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -33,6 +34,8 @@ extern GBool unicodeTypeNum(Unicode c); extern GBool unicodeTypeAlphaNum(Unicode c); +extern GBool unicodeIsAlphabeticPresentationForm(Unicode c); + extern Unicode unicodeToUpper(Unicode c); extern Unicode *unicodeNormalizeNFKC(Unicode *in, int len,
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
