glib/poppler-document.cc | 4 ++++ goo/GooString.h | 1 + 2 files changed, 5 insertions(+)
New commits: commit 0bb9dbc608a73df0a5579c0db3347f2d147266c4 Author: Christophe Fergeau <[email protected]> Date: Tue Sep 26 11:02:58 2017 +0200 document: Handle UTF16-LE annotations I can produce such annotations when adding annotations to a PDF attachement from the standard mail app on my iPhone (iOS 12.1). They currently all show as "ÿþÚ" rather than the actual string content. UTF16-BE vs UTF16-LE is detected by inferring the endianness from the first two bytes of the string (0xFF 0xFE and 0xFE 0xFF aka Byte Order Marker). diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index 9772c16a..78a57b10 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -844,6 +844,10 @@ char *_poppler_goo_string_to_utf8(const GooString *s) result = g_convert (s->c_str () + 2, s->getLength () - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr); + } else if (s->hasUnicodeMarkerLE()) { + result = g_convert (s->c_str () + 2, + s->getLength () - 2, + "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr); } else { int len; gunichar *ucs4_temp; diff --git a/goo/GooString.h b/goo/GooString.h index bae3a180..5b403e7d 100644 --- a/goo/GooString.h +++ b/goo/GooString.h @@ -172,6 +172,7 @@ public: bool endsWith(const char *suffix) const; bool hasUnicodeMarker() const { return size() >= 2 && (*this)[0] == char(0xfe) && (*this)[1] == char(0xff); } + bool hasUnicodeMarkerLE() const { return size() >= 2 && (*this)[0] == char(0xff) && (*this)[1] == char(0xfe); } bool hasJustUnicodeMarker() const { return size() == 2 && hasUnicodeMarker(); } void prependUnicodeMarker(); _______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
