utils/pdfinfo.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
New commits: commit fde3bed0f400a50f31f1f6bcee44ac1b2c17ddc6 Author: Albert Astals Cid <[email protected]> Date: Wed Feb 22 00:03:37 2012 +0100 pdfinfo: decode utf-16 surrogate pairs Based on a patch by Adrian Johnson Bug 23075 diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index e0a6f7a..cdc5375 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -14,9 +14,10 @@ // under GPL version 2 or later // // Copyright (C) 2006 Dom Lachowicz <[email protected]> -// Copyright (C) 2007-2010 Albert Astals Cid <[email protected]> +// Copyright (C) 2007-2010, 2012 Albert Astals Cid <[email protected]> // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2011 Vittal Aithal <[email protected]> +// Copyright (C) 2012 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -379,7 +380,7 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text, Object obj; GooString *s1; GBool isUnicode; - Unicode u; + Unicode u, u2; char buf[8]; int i, n; @@ -399,6 +400,15 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text, u = ((s1->getChar(i) & 0xff) << 8) | (s1->getChar(i+1) & 0xff); i += 2; + if (u >= 0xd800 && u <= 0xdbff && i < obj.getString()->getLength()) { + // surrogate pair + u2 = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + if (u2 >= 0xdc00 && u2 <= 0xdfff) { + u = 0x10000 + ((u - 0xd800) << 10) + (u2 - 0xdc00); + } + } } else { u = pdfDocEncoding[s1->getChar(i) & 0xff]; ++i; _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
