Hi,
I'm not sure if it's overkilling, I wrote a localization
patch for qt4/tests/poppler-texts.cpp, with Qt4's builtin
encoding convertor.
Cobra (the original requester of raw text for poppler-qt4)
told that still Big5 or GBK locales are popular in Chinese
users, so I think localized outputs are preferred than
raw UTF-8 output. If I were working for poppler-utils which
we cannot expect any localization framework, I won't do it.
But Qt4 has good framework, and I could do such.
Anyway, this patch does not introduce any improvement nor
new feature, no need to push it in poppler-0.15.0 release.
Regards,
mpsuzuki
--- a/qt4/tests/poppler-texts.cpp
+++ b/qt4/tests/poppler-texts.cpp
@@ -1,40 +1,62 @@
#include <QtCore/QCoreApplication>
#include <QtCore/QDebug>
+#include <QTextCodec>
#include <iostream>
#include <poppler-qt4.h>
+void
+stdcout_qbytearray( QByteArray mbcs )
+{
+ std::cout << std::flush;
+ for ( int j = 0; j < mbcs.size(); j++ )
+ std::cout << mbcs[j];
+ std::cout << std::endl;
+}
+
+void
+stdcout_qstring( QString qstr )
+{
+ stdcout_qbytearray( qstr.toLocal8Bit() );
+}
+
int main( int argc, char **argv )
{
QCoreApplication a( argc, argv ); // QApplication required!
- if (!( argc == 2 ))
+ QTextCodec::setCodecForCStrings(QTextCodec::codecForLocale());
+
+ if ( argc < 2 ||
+ (argc == 3 && strcmp(argv[2], "-raw") != 0 ) ||
+ argc > 3)
{
- qWarning() << "usage: poppler-texts filename";
+ qWarning() << QString::fromAscii(
+ "usage: poppler-texts filename [-raw]").toLocal8Bit();
exit(1);
}
+ Poppler::Page::TextLayout layout =
+ ( argc == 3 && strcmp(argv[2], "-raw") == 0 ) ?
+ Poppler::Page::RawOrderLayout : Poppler::Page::PhysicalLayout ;
+
Poppler::Document *doc = Poppler::Document::load(argv[1]);
if (!doc)
{
- qWarning() << "doc not loaded";
+ qWarning() << QString::fromAscii("doc not loaded").toLocal8Bit();
exit(1);
}
for ( int i = 0; i < doc->numPages(); i++ )
{
- int j = 0;
- std::cout << "*** Page " << i << std::endl;
- std::cout << std::flush;
-
- Poppler::Page *page = doc->page(i);
- const QByteArray utf8str = page->text( QRectF(), Poppler::Page::RawOrderLayout ).toUtf8();
- std::cout << std::flush;
- for ( j = 0; j < utf8str.size(); j++ )
- std::cout << utf8str[j];
- std::cout << std::endl;
- delete page;
+ stdcout_qstring( QString::fromAscii("*** Page ") );
+ stdcout_qstring( QString::number(i) );
+ Poppler::Page *page = doc->page(i);
+ QRectF rect = QRectF( 0,
+ 0,
+ page->pageSizeF().width(),
+ page->pageSizeF().height() );
+ stdcout_qstring( page->text( rect, layout ) );
}
delete doc;
}
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler