From d02a86b3c0e21adf9becf3882fb686f501c8e045 Mon Sep 17 00:00:00 2001
From: Ihar Filipau <thephilips@gmail.com>
Date: Thu, 10 May 2012 22:30:44 +0200
Subject: [PATCH] random noise


diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index a170f1b..9c55f5e 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -357,13 +357,23 @@ int main(int argc, char *argv[]) {
         TextWord *word;
         double xMinA, yMinA, xMaxA, yMaxA;
         if (word_length == 0)
-          fprintf(stderr, "no word list\n");
+          fprintf(stderr, "Page-%d: no word list\n", page);
 
         for (int i = 0; i < word_length; ++i) {
           word = wordlist->get(i);
           word->getBBox(&xMinA, &yMinA, &xMaxA, &yMaxA);
           const std::string myString = myXmlTokenReplace(word->getText()->getCString());
-          fprintf(f,"    <word xMin=\"%f\" yMin=\"%f\" xMax=\"%f\" yMax=\"%f\">%s</word>\n", xMinA, yMinA, xMaxA, yMaxA, myString.c_str());
+          fprintf(f,"    <word xMin=\"%.3f\" yMin=\"%.3f\" xMax=\"%.3f\" yMax=\"%.3f\""
+			  " fontname=%s"
+			  " size=%.1f italic=%d bold=%d underl=%d>"
+			  "%s</word>\n",
+			  xMinA, yMinA, xMaxA, yMaxA,
+			  word->getFontName() ? word->getFontName()->getCString() : "",
+			  word->getFontSize(),
+			  	(int)word->getFontInfo()->isItalic(), 
+			  	(int)word->getFontInfo()->isBold(), 
+				(int)word->isUnderlined(),
+			  myString.c_str());
         }
         fprintf(f, "  </page>\n");
         delete wordlist;
