glib/poppler-document.cc |    4 ++++
 goo/GooString.h          |    1 +
 2 files changed, 5 insertions(+)

New commits:
commit 0bb9dbc608a73df0a5579c0db3347f2d147266c4
Author: Christophe Fergeau <[email protected]>
Date:   Tue Sep 26 11:02:58 2017 +0200

    document: Handle UTF16-LE annotations
    
    I can produce such annotations when adding annotations to a PDF
    attachement from the standard mail app on my iPhone (iOS 12.1).
    They currently all show as "ÿþÚ" rather than the actual string content.
    
    UTF16-BE vs UTF16-LE is detected by inferring the endianness from the
    first two bytes of the string (0xFF 0xFE and 0xFE 0xFF aka Byte Order
    Marker).

diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 9772c16a..78a57b10 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -844,6 +844,10 @@ char *_poppler_goo_string_to_utf8(const GooString *s)
     result = g_convert (s->c_str () + 2,
                        s->getLength () - 2,
                        "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr);
+  } else if (s->hasUnicodeMarkerLE()) {
+    result = g_convert (s->c_str () + 2,
+                       s->getLength () - 2,
+                       "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr);
   } else {
     int len;
     gunichar *ucs4_temp;
diff --git a/goo/GooString.h b/goo/GooString.h
index bae3a180..5b403e7d 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -172,6 +172,7 @@ public:
   bool endsWith(const char *suffix) const;
 
   bool hasUnicodeMarker() const { return size() >= 2 && (*this)[0] == 
char(0xfe) && (*this)[1] == char(0xff); }
+  bool hasUnicodeMarkerLE() const { return size() >= 2 && (*this)[0] == 
char(0xff) && (*this)[1] == char(0xfe); }
   bool hasJustUnicodeMarker() const { return size() == 2 && 
hasUnicodeMarker(); }
 
   void prependUnicodeMarker();
_______________________________________________
poppler mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to