Hi,

I'm not sure if it's overkilling, I wrote a localization
patch for qt4/tests/poppler-texts.cpp, with Qt4's builtin
encoding convertor.

Cobra (the original requester of raw text for poppler-qt4)
told that still Big5 or GBK locales are popular in Chinese
users, so I think localized outputs are preferred than
raw UTF-8 output. If I were working for poppler-utils which
we cannot expect any localization framework, I won't do it.
But Qt4 has good framework, and I could do such.

Anyway, this patch does not introduce any improvement nor
new feature, no need to push it in poppler-0.15.0 release.

Regards,
mpsuzuki
--- a/qt4/tests/poppler-texts.cpp
+++ b/qt4/tests/poppler-texts.cpp
@@ -1,40 +1,62 @@
 #include <QtCore/QCoreApplication>
 #include <QtCore/QDebug>
+#include <QTextCodec>
 
 #include <iostream>
 
 #include <poppler-qt4.h>
 
+void
+stdcout_qbytearray( QByteArray  mbcs )
+{
+    std::cout << std::flush;
+    for ( int j = 0; j < mbcs.size(); j++ )
+        std::cout << mbcs[j];
+    std::cout << std::endl;
+}
+
+void
+stdcout_qstring( QString  qstr )
+{
+    stdcout_qbytearray( qstr.toLocal8Bit() );
+}
+
 int main( int argc, char **argv )
 {
     QCoreApplication a( argc, argv );               // QApplication required!
 
-    if (!( argc == 2 ))
+    QTextCodec::setCodecForCStrings(QTextCodec::codecForLocale());
+
+    if ( argc < 2 ||
+        (argc == 3 && strcmp(argv[2], "-raw") != 0 ) ||
+         argc > 3)
     {
-	qWarning() << "usage: poppler-texts filename";
+	qWarning() << QString::fromAscii(
+            "usage: poppler-texts filename [-raw]").toLocal8Bit();
 	exit(1);
     }
   
+    Poppler::Page::TextLayout layout =
+      ( argc == 3 && strcmp(argv[2], "-raw") == 0 ) ?
+        Poppler::Page::RawOrderLayout : Poppler::Page::PhysicalLayout ;
+
     Poppler::Document *doc = Poppler::Document::load(argv[1]);
     if (!doc)
     {
-	qWarning() << "doc not loaded";
+	qWarning() << QString::fromAscii("doc not loaded").toLocal8Bit();
 	exit(1);
     }
 
     for ( int i = 0; i < doc->numPages(); i++ )
     {
-      int j = 0;
-      std::cout << "*** Page " << i << std::endl;
-      std::cout << std::flush;
-
-      Poppler::Page *page = doc->page(i);
-      const QByteArray utf8str = page->text( QRectF(), Poppler::Page::RawOrderLayout ).toUtf8();
-      std::cout << std::flush;
-      for ( j = 0; j < utf8str.size(); j++ )
-        std::cout << utf8str[j];
-      std::cout << std::endl;
-      delete page;
+        stdcout_qstring( QString::fromAscii("*** Page ") );
+        stdcout_qstring( QString::number(i) );
+        Poppler::Page *page = doc->page(i);
+        QRectF rect = QRectF( 0,
+                              0,
+                              page->pageSizeF().width(),
+                              page->pageSizeF().height() );
+        stdcout_qstring( page->text( rect, layout ) );
     }
     delete doc;
 }
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to