On Saturday, February 16, 2013 10:57:36 AM Adrian Johnson wrote: > On 16/02/13 09:53, Fabio D'Urso wrote: > > Hi, > > > > I'm not able to open files saved by git-master poppler in poppler 0.22: > > Syntax Error: Couldn't find trailer dictionary > > Syntax Error: Couldn't read xref table > > > > I noticed that large file support patches changed how we write incremental > > updates of XRef streams. In particular I'm referring to > > XRef::XRefStreamWriter::writeEntry and XRef::writeStreamToBuffer. > > > > Previously we used to write 4-bytes offsets in XRef streams, now we always > > write 8-bytes offsets (sizeof(Goffset)). > > If I understand correctly older poppler versions are not able to read > > those files because they don't have large file support. > > > > In other words, even if the files we create are good, all previous poppler > > versions (including 0.22.1) are not able to open them back. > > Could you open a bug to track this issue.
Sorry, I haven't opened a bug. But I've written patches :) > > Maybe we had better keep emitting 4-bytes entries for compatibility when > > possible (i.e. if no offset exceeds the 4GB limit), what do you think? > > Yes it would be best to check if all offsets in the xref stream are less > than 4GB and if so, write 4 byte offsets. This is done in the first attached patch. I've also attached two other patches that fix a printf format specifier (%i should now be %lli because it refers to a Goffset). The second patch adds the printf-format attribute to OutStream::printf, so the compiler can spot this kind of errors. The third one is the actual fix. This fixes the startxref value being sometimes negative on little-endian cpus where int is 32-bit, and zero on big-endian cpus where int is 32-bit. Fabio
>From 49b17234917c1bf6b3143bec42c26207df116b3d Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Thu, 25 Apr 2013 17:58:25 +0200 Subject: [PATCH 1/3] XRef stream writing: Write 32-bit offsets when possible For compatibility reasons: some pdf readers don't support 64-bit offsets yet (for example, poppler 0.22 doesn't) --- poppler/XRef.cc | 38 ++++++++++++++++++++++++++++---------- poppler/XRef.h | 17 +++++++++++++++-- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/poppler/XRef.cc b/poppler/XRef.cc index 85745ff..83e5ef7 100644 --- a/poppler/XRef.cc +++ b/poppler/XRef.cc @@ -21,7 +21,7 @@ // Copyright (C) 2009, 2010 Ilya Gorenbein <[email protected]> // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> -// Copyright (C) 2012 Fabio D'Urso <[email protected]> +// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]> // Copyright (C) 2013 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that @@ -1460,9 +1460,10 @@ void XRef::writeTableToFile(OutStream* outStr, GBool writeAllEntries) { writeXRef(&writer, writeAllEntries); } -XRef::XRefStreamWriter::XRefStreamWriter(Object *indexA, GooString *stmBufA) { +XRef::XRefStreamWriter::XRefStreamWriter(Object *indexA, GooString *stmBufA, int offsetSizeA) { index = indexA; stmBuf = stmBufA; + offsetSize = offsetSizeA; } void XRef::XRefStreamWriter::startSection(int first, int count) { @@ -1472,17 +1473,28 @@ void XRef::XRefStreamWriter::startSection(int first, int count) { } void XRef::XRefStreamWriter::writeEntry(Goffset offset, int gen, XRefEntryType type) { + const int entryTotalSize = 1 + offsetSize + 2; /* type + offset + gen */ char data[16]; - int i; data[0] = (type==xrefEntryFree) ? 0 : 1; - for (i = sizeof(Goffset); i > 0; i--) { + for (int i = offsetSize; i > 0; i--) { data[i] = offset & 0xff; offset >>= 8; } - i = sizeof(Goffset) + 1; - data[i] = (gen >> 8) & 0xff; - data[i+1] = gen & 0xff; - stmBuf->append(data, i+2); + data[offsetSize + 1] = (gen >> 8) & 0xff; + data[offsetSize + 2] = gen & 0xff; + stmBuf->append(data, entryTotalSize); +} + +XRef::XRefPreScanWriter::XRefPreScanWriter() { + hasOffsetsBeyond4GB = gFalse; +} + +void XRef::XRefPreScanWriter::startSection(int first, int count) { +} + +void XRef::XRefPreScanWriter::writeEntry(Goffset offset, int gen, XRefEntryType type) { + if (offset >= 0x100000000ll) + hasOffsetsBeyond4GB = gTrue; } void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) { @@ -1490,7 +1502,13 @@ void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) { index.initArray(xref); stmBuf->clear(); - XRefStreamWriter writer(&index, stmBuf); + // First pass: determine whether all offsets fit in 4 bytes or not + XRefPreScanWriter prescan; + writeXRef(&prescan, gFalse); + const int offsetSize = prescan.hasOffsetsBeyond4GB ? sizeof(Goffset) : 4; + + // Second pass: actually write the xref stream + XRefStreamWriter writer(&index, stmBuf, offsetSize); writeXRef(&writer, gFalse); Object obj1, obj2; @@ -1498,7 +1516,7 @@ void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) { xrefDict->set("Index", &index); obj2.initArray(xref); obj2.arrayAdd( obj1.initInt(1) ); - obj2.arrayAdd( obj1.initInt(sizeof(Goffset)) ); + obj2.arrayAdd( obj1.initInt(offsetSize) ); obj2.arrayAdd( obj1.initInt(2) ); xrefDict->set("W", &obj2); } diff --git a/poppler/XRef.h b/poppler/XRef.h index c9f17c2..70065d8 100644 --- a/poppler/XRef.h +++ b/poppler/XRef.h @@ -20,7 +20,7 @@ // Copyright (C) 2010 Ilya Gorenbein <[email protected]> // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> -// Copyright (C) 2012 Fabio D'Urso <[email protected]> +// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]> // Copyright (C) 2013 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that @@ -243,6 +243,7 @@ private: virtual ~XRefWriter() {}; }; + // XRefWriter subclass that writes a XRef table class XRefTableWriter: public XRefWriter { public: XRefTableWriter(OutStream* outStrA); @@ -252,14 +253,26 @@ private: OutStream* outStr; }; + // XRefWriter subclass that writes a XRef stream class XRefStreamWriter: public XRefWriter { public: - XRefStreamWriter(Object *index, GooString *stmBuf); + XRefStreamWriter(Object *index, GooString *stmBuf, int offsetSize); void startSection(int first, int count); void writeEntry(Goffset offset, int gen, XRefEntryType type); private: Object *index; GooString *stmBuf; + int offsetSize; + }; + + // Dummy XRefWriter subclass that only checks if all offsets fit in 4 bytes + class XRefPreScanWriter: public XRefWriter { + public: + XRefPreScanWriter(); + void startSection(int first, int count); + void writeEntry(Goffset offset, int gen, XRefEntryType type); + + GBool hasOffsetsBeyond4GB; }; void writeXRef(XRefWriter *writer, GBool writeAllEntries); -- 1.8.1.4
>From db1aef5129c3d0b52d7bacd555e123b2e1a3fdd1 Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Thu, 25 Apr 2013 19:06:09 +0200 Subject: [PATCH 2/3] Re-enable commented-out printf-format attribute on OutStream::printf The answer to the question "2,3 because the first arg is class instance ?" is yes. It's documented behavior: Since non-static C++ methods have an implicit this argument, the arguments of such methods should be counted from two, not one. from http://gcc.gnu.org/onlinedocs/gcc-4.3.2/gcc/Function-Attributes.html --- poppler/Stream.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/poppler/Stream.h b/poppler/Stream.h index 0a178b4..7d932b0 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -21,7 +21,7 @@ // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2011, 2012 William Bader <[email protected]> // Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> -// Copyright (C) 2012 Fabio D'Urso <[email protected]> +// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]> // Copyright (C) 2013 Adrian Johnson <[email protected]> // Copyright (C) 2013 Peter Breitenlohner <[email protected]> // Copyright (C) 2013 Adam Reichold <[email protected]> @@ -265,9 +265,7 @@ public: // Put a char in the stream virtual void put (char c) = 0; - //FIXME - // Printf-like function 2,3 because the first arg is class instance ? - virtual void printf (const char *format, ...) = 0 ; //__attribute__((format(printf, 2,3))) = 0; + virtual void printf (const char *format, ...) GCC_PRINTF_FORMAT(2,3) = 0; private: int ref; // reference count -- 1.8.1.4
>From 1a604ac4fb8fa95ae989785f0037ac1037ed33d5 Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Thu, 25 Apr 2013 19:18:30 +0200 Subject: [PATCH 3/3] Fix printf format specifiers (Goffset is a long long, not a int) --- poppler/PDFDoc.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 9cfbc7a..c78d5ca 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -27,7 +27,7 @@ // Copyright (C) 2010 Srinivas Adicherla <[email protected]> // Copyright (C) 2010 Philip Lorenz <[email protected]> // Copyright (C) 2011-2013 Thomas Freitag <[email protected]> -// Copyright (C) 2012 Fabio D'Urso <[email protected]> +// Copyright (C) 2012, 2013 Fabio D'Urso <[email protected]> // Copyright (C) 2013 Adrian Johnson <[email protected]> // Copyright (C) 2013 Adam Reichold <[email protected]> // @@ -1355,7 +1355,7 @@ void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAl outStr->printf( "trailer\r\n"); writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0); outStr->printf( "\r\nstartxref\r\n"); - outStr->printf( "%i\r\n", uxrefOffset); + outStr->printf( "%lli\r\n", uxrefOffset); outStr->printf( "%%%%EOF\r\n"); } @@ -1376,7 +1376,7 @@ void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefS obj1.free(); outStr->printf( "startxref\r\n"); - outStr->printf( "%i\r\n", uxrefOffset); + outStr->printf( "%lli\r\n", uxrefOffset); outStr->printf( "%%%%EOF\r\n"); } -- 1.8.1.4
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
