On Saturday, February 16, 2013 10:57:36 AM Adrian Johnson wrote:
> On 16/02/13 09:53, Fabio D'Urso wrote:
> > Hi,
> > 
> > I'm not able to open files saved by git-master poppler in poppler 0.22:
> > Syntax Error: Couldn't find trailer dictionary
> > Syntax Error: Couldn't read xref table
> > 
> > I noticed that large file support patches changed how we write incremental
> > updates of XRef streams. In particular I'm referring to
> > XRef::XRefStreamWriter::writeEntry and XRef::writeStreamToBuffer.
> > 
> > Previously we used to write 4-bytes offsets in XRef streams, now we always
> > write 8-bytes offsets (sizeof(Goffset)).
> > If I understand correctly older poppler versions are not able to read
> > those files because they don't have large file support.
> > 
> > In other words, even if the files we create are good, all previous poppler
> > versions (including 0.22.1) are not able to open them back.
> 
> Could you open a bug to track this issue.

Sorry, I haven't opened a bug. But I've written patches :)

> > Maybe we had better keep emitting 4-bytes entries for compatibility when
> > possible (i.e. if no offset exceeds the 4GB limit), what do you think?
> 
> Yes it would be best to check if all offsets in the xref stream are less
> than 4GB and if so, write 4 byte offsets.

This is done in the first attached patch.

I've also attached two other patches that fix a printf format specifier (%i 
should now be %lli because it refers to a Goffset).
The second patch adds the printf-format attribute to OutStream::printf, so the 
compiler can spot this kind of errors. The third one is the actual fix.
This fixes the startxref value being sometimes negative on little-endian cpus 
where int is 32-bit, and zero on big-endian cpus where int is 32-bit.

Fabio
>From 49b17234917c1bf6b3143bec42c26207df116b3d Mon Sep 17 00:00:00 2001
From: Fabio D'Urso <[email protected]>
Date: Thu, 25 Apr 2013 17:58:25 +0200
Subject: [PATCH 1/3] XRef stream writing: Write 32-bit offsets when possible

For compatibility reasons: some pdf readers don't support 64-bit offsets
yet (for example, poppler 0.22 doesn't)
---
 poppler/XRef.cc | 38 ++++++++++++++++++++++++++++----------
 poppler/XRef.h  | 17 +++++++++++++++--
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 85745ff..83e5ef7 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -21,7 +21,7 @@
 // Copyright (C) 2009, 2010 Ilya Gorenbein <[email protected]>
 // Copyright (C) 2010 Hib Eris <[email protected]>
 // Copyright (C) 2012, 2013 Thomas Freitag <[email protected]>
-// Copyright (C) 2012 Fabio D'Urso <[email protected]>
+// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]>
 // Copyright (C) 2013 Adrian Johnson <[email protected]>
 //
 // To see a description of the changes please see the Changelog file that
@@ -1460,9 +1460,10 @@ void XRef::writeTableToFile(OutStream* outStr, GBool writeAllEntries) {
   writeXRef(&writer, writeAllEntries);
 }
 
-XRef::XRefStreamWriter::XRefStreamWriter(Object *indexA, GooString *stmBufA) {
+XRef::XRefStreamWriter::XRefStreamWriter(Object *indexA, GooString *stmBufA, int offsetSizeA) {
   index = indexA;
   stmBuf = stmBufA;
+  offsetSize = offsetSizeA;
 }
 
 void XRef::XRefStreamWriter::startSection(int first, int count) {
@@ -1472,17 +1473,28 @@ void XRef::XRefStreamWriter::startSection(int first, int count) {
 }
 
 void XRef::XRefStreamWriter::writeEntry(Goffset offset, int gen, XRefEntryType type) {
+  const int entryTotalSize = 1 + offsetSize + 2; /* type + offset + gen */
   char data[16];
-  int i;
   data[0] = (type==xrefEntryFree) ? 0 : 1;
-  for (i = sizeof(Goffset); i > 0; i--) {
+  for (int i = offsetSize; i > 0; i--) {
     data[i] = offset & 0xff;
     offset >>= 8;
   }
-  i = sizeof(Goffset) + 1;
-  data[i] = (gen >> 8) & 0xff;
-  data[i+1] = gen & 0xff;
-  stmBuf->append(data, i+2);
+  data[offsetSize + 1] = (gen >> 8) & 0xff;
+  data[offsetSize + 2] = gen & 0xff;
+  stmBuf->append(data, entryTotalSize);
+}
+
+XRef::XRefPreScanWriter::XRefPreScanWriter() {
+  hasOffsetsBeyond4GB = gFalse;
+}
+
+void XRef::XRefPreScanWriter::startSection(int first, int count) {
+}
+
+void XRef::XRefPreScanWriter::writeEntry(Goffset offset, int gen, XRefEntryType type) {
+  if (offset >= 0x100000000ll)
+    hasOffsetsBeyond4GB = gTrue;
 }
 
 void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) {
@@ -1490,7 +1502,13 @@ void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) {
   index.initArray(xref);
   stmBuf->clear();
 
-  XRefStreamWriter writer(&index, stmBuf);
+  // First pass: determine whether all offsets fit in 4 bytes or not
+  XRefPreScanWriter prescan;
+  writeXRef(&prescan, gFalse);
+  const int offsetSize = prescan.hasOffsetsBeyond4GB ? sizeof(Goffset) : 4;
+
+  // Second pass: actually write the xref stream
+  XRefStreamWriter writer(&index, stmBuf, offsetSize);
   writeXRef(&writer, gFalse);
 
   Object obj1, obj2;
@@ -1498,7 +1516,7 @@ void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) {
   xrefDict->set("Index", &index);
   obj2.initArray(xref);
   obj2.arrayAdd( obj1.initInt(1) );
-  obj2.arrayAdd( obj1.initInt(sizeof(Goffset)) );
+  obj2.arrayAdd( obj1.initInt(offsetSize) );
   obj2.arrayAdd( obj1.initInt(2) );
   xrefDict->set("W", &obj2);
 }
diff --git a/poppler/XRef.h b/poppler/XRef.h
index c9f17c2..70065d8 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -20,7 +20,7 @@
 // Copyright (C) 2010 Ilya Gorenbein <[email protected]>
 // Copyright (C) 2010 Hib Eris <[email protected]>
 // Copyright (C) 2012, 2013 Thomas Freitag <[email protected]>
-// Copyright (C) 2012 Fabio D'Urso <[email protected]>
+// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]>
 // Copyright (C) 2013 Adrian Johnson <[email protected]>
 //
 // To see a description of the changes please see the Changelog file that
@@ -243,6 +243,7 @@ private:
     virtual ~XRefWriter() {};
   };
 
+  // XRefWriter subclass that writes a XRef table
   class XRefTableWriter: public XRefWriter {
   public:
     XRefTableWriter(OutStream* outStrA);
@@ -252,14 +253,26 @@ private:
     OutStream* outStr;
   };
 
+  // XRefWriter subclass that writes a XRef stream
   class XRefStreamWriter: public XRefWriter {
   public:
-    XRefStreamWriter(Object *index, GooString *stmBuf);
+    XRefStreamWriter(Object *index, GooString *stmBuf, int offsetSize);
     void startSection(int first, int count);
     void writeEntry(Goffset offset, int gen, XRefEntryType type);
   private:
     Object *index;
     GooString *stmBuf;
+    int offsetSize;
+  };
+
+  // Dummy XRefWriter subclass that only checks if all offsets fit in 4 bytes
+  class XRefPreScanWriter: public XRefWriter {
+  public:
+    XRefPreScanWriter();
+    void startSection(int first, int count);
+    void writeEntry(Goffset offset, int gen, XRefEntryType type);
+
+    GBool hasOffsetsBeyond4GB;
   };
 
   void writeXRef(XRefWriter *writer, GBool writeAllEntries);
-- 
1.8.1.4

>From db1aef5129c3d0b52d7bacd555e123b2e1a3fdd1 Mon Sep 17 00:00:00 2001
From: Fabio D'Urso <[email protected]>
Date: Thu, 25 Apr 2013 19:06:09 +0200
Subject: [PATCH 2/3] Re-enable commented-out printf-format attribute on
 OutStream::printf

The answer to the question "2,3 because the first arg is class
instance ?" is yes. It's documented behavior:
 Since non-static C++ methods have an implicit this argument, the
 arguments of such methods should be counted from two, not one.
from http://gcc.gnu.org/onlinedocs/gcc-4.3.2/gcc/Function-Attributes.html
---
 poppler/Stream.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/poppler/Stream.h b/poppler/Stream.h
index 0a178b4..7d932b0 100644
--- a/poppler/Stream.h
+++ b/poppler/Stream.h
@@ -21,7 +21,7 @@
 // Copyright (C) 2010 Hib Eris <[email protected]>
 // Copyright (C) 2011, 2012 William Bader <[email protected]>
 // Copyright (C) 2012, 2013 Thomas Freitag <[email protected]>
-// Copyright (C) 2012 Fabio D'Urso <[email protected]>
+// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]>
 // Copyright (C) 2013 Adrian Johnson <[email protected]>
 // Copyright (C) 2013 Peter Breitenlohner <[email protected]>
 // Copyright (C) 2013 Adam Reichold <[email protected]>
@@ -265,9 +265,7 @@ public:
   // Put a char in the stream
   virtual void put (char c) = 0;
 
-  //FIXME
-  // Printf-like function                         2,3 because the first arg is class instance ?
-  virtual void printf (const char *format, ...) = 0 ; //__attribute__((format(printf, 2,3))) = 0;
+  virtual void printf (const char *format, ...) GCC_PRINTF_FORMAT(2,3) = 0;
 
 private:
   int ref; // reference count
-- 
1.8.1.4

>From 1a604ac4fb8fa95ae989785f0037ac1037ed33d5 Mon Sep 17 00:00:00 2001
From: Fabio D'Urso <[email protected]>
Date: Thu, 25 Apr 2013 19:18:30 +0200
Subject: [PATCH 3/3] Fix printf format specifiers (Goffset is a long long, not
 a int)

---
 poppler/PDFDoc.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 9cfbc7a..c78d5ca 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -27,7 +27,7 @@
 // Copyright (C) 2010 Srinivas Adicherla <[email protected]>
 // Copyright (C) 2010 Philip Lorenz <[email protected]>
 // Copyright (C) 2011-2013 Thomas Freitag <[email protected]>
-// Copyright (C) 2012 Fabio D'Urso <[email protected]>
+// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]>
 // Copyright (C) 2013 Adrian Johnson <[email protected]>
 // Copyright (C) 2013 Adam Reichold <[email protected]>
 //
@@ -1355,7 +1355,7 @@ void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAl
   outStr->printf( "trailer\r\n");
   writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
   outStr->printf( "\r\nstartxref\r\n");
-  outStr->printf( "%i\r\n", uxrefOffset);
+  outStr->printf( "%lli\r\n", uxrefOffset);
   outStr->printf( "%%%%EOF\r\n");
 }
 
@@ -1376,7 +1376,7 @@ void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefS
   obj1.free();
 
   outStr->printf( "startxref\r\n");
-  outStr->printf( "%i\r\n", uxrefOffset);
+  outStr->printf( "%lli\r\n", uxrefOffset);
   outStr->printf( "%%%%EOF\r\n");
 }
 
-- 
1.8.1.4

_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to