poppler/GfxFont.cc | 14 +++++++++++++- poppler/GfxFont.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-)
New commits: commit e6fb20d7b3bf8ea8aedc1bcd910e035059835b5f Author: Axel Strübing <[email protected]> Date: Fri Jan 28 19:20:15 2011 +0000 Extract text of a pdf correctly See "[poppler] text extraction does not work" in the mailing list for more info diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index 2d7180a..0a165a6 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -22,6 +22,7 @@ // Copyright (C) 2008, 2010 Hib Eris <[email protected]> // Copyright (C) 2009 Peter Kerzum <[email protected]> // Copyright (C) 2009, 2010 David Benjamin <[email protected]> +// Copyright (C) 2011 Axel Strübing <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -176,6 +177,7 @@ GfxFont::GfxFont(char *tagA, Ref idA, GooString *nameA) { weight = WeightNotDefined; refCnt = 1; dfp = NULL; + hasToUnicode = gFalse; } GfxFont::~GfxFont() { @@ -436,6 +438,7 @@ CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits, } else { ctu = CharCodeToUnicode::parseCMap(buf, nBits); } + hasToUnicode = gTrue; delete buf; return ctu; } @@ -1697,7 +1700,16 @@ int GfxCIDFont::getNextChar(char *s, int len, CharCode *code, *code = (CharCode)(cid = cMap->getCID(s, len, &n)); if (ctu) { - *uLen = ctu->mapToUnicode(cid, u); + if (hasToUnicode) { + int i = 0, c = 0; + while (i < n) { + c = (c << 8 ) + (s[i] & 0xff); + ++i; + } + *uLen = ctu->mapToUnicode(c, u); + } else { + *uLen = ctu->mapToUnicode(cid, u); + } } else { *uLen = 0; } diff --git a/poppler/GfxFont.h b/poppler/GfxFont.h index 55f8692..09af062 100644 --- a/poppler/GfxFont.h +++ b/poppler/GfxFont.h @@ -19,6 +19,7 @@ // Copyright (C) 2007 Julien Rebetez <[email protected]> // Copyright (C) 2007 Jeff Muizelaar <[email protected]> // Copyright (C) 2007 Koji Otani <[email protected]> +// Copyright (C) 2011 Axel Strübing <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -253,6 +254,7 @@ protected: double descent; // max depth below baseline int refCnt; GBool ok; + GBool hasToUnicode; }; //------------------------------------------------------------------------
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
