On Tuesday, April 24, 2012 01:54:27 AM Leonard Rosenthol wrote: > On 4/23/12 11:37 PM, "Fabio D'Urso" <[email protected]> wrote: > >According to my tests, it seems to me that it's not possible to > >incrementally- > >update a document indexed by a XRef stream by appending a XRef table and > >making /Prev point to the previous XRef stream (which is what poppler > >currently does). > > As they say in Ghostbusters - "Don't cross the streams"! > > You are correct, you can't do that. If the original file uses Xref > streams, you need to append the same way. > > Leonard Many thanks :)
These are the patches and a semi-automated qt4 program to test them. In patch 0002, I feel that writeStreamToBuffer should share code with writeTableToFile. But I can't think of a non-ugly way to do this (callbacks or arguments used alternatively both seem ugly to me). The patches fix both bug38761 and bug49080. However bug49080's document has form problems when opened in acroread. Text field contents is only shown when user clicks on it. Maybe it's like bug44506 (I haven't spent much time on it). Fabio
test programs.tar
Description: Unix tar archive
From fd247442cf24f2385defb3f2019fe32b1896bc78 Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Tue, 24 Apr 2012 18:10:15 +0200 Subject: [PATCH 1/2] Refactoring of XRef table write support (in preparation for XRef stream write support) - Trailer dictionary creation now lives in its own function "createTrailerDict" (that will be used by XRef stream creation too) - writeXRefTableTrailer (WAS writeTrailer) now takes care of writing the XRef table too (previously it was demanded to the caller) --- poppler/PDFDoc.cc | 61 ++++++++++++++++++++++++++--------------------------- poppler/PDFDoc.h | 14 +++++++++-- poppler/XRef.cc | 2 +- poppler/XRef.h | 2 +- utils/pdfunite.cc | 10 +++++--- 5 files changed, 49 insertions(+), 40 deletions(-) diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 898bcbb..9c977a6 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -716,12 +716,14 @@ int PDFDoc::savePageAs(GooString *name, int pageNo) page.free(); Guint uxrefOffset = outStr->getPos(); - yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */); - Ref ref; ref.num = rootNum; ref.gen = 0; - writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), name->getCString(), outStr->getPos()); + Dict *trailerDict = createTrailerDict(objectsCount, gFalse, 0, &ref, getXRef(), + name->getCString(), uxrefOffset); + writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */, + uxrefOffset, outStr, getXRef()); + delete trailerDict; outStr->close(); fclose(f); @@ -862,10 +864,8 @@ void PDFDoc::saveIncrementalUpdate (OutStream* outStr) } Guint uxrefOffset = outStr->getPos(); - uxref->writeToFile(outStr, gFalse /* do not write unnecessary entries */); - - writeTrailer(uxrefOffset, xref->getNumObjects(), outStr, gTrue); - + writeXRefTableTrailer(uxrefOffset, uxref, gFalse /* do not write unnecessary entries */, + xref->getNumObjects(), outStr, gTrue /* incremental update */); delete uxref; } @@ -902,13 +902,9 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr) } } Guint uxrefOffset = outStr->getPos(); - uxref->writeToFile(outStr, gTrue /* write all entries */); - - writeTrailer(uxrefOffset, uxref->getNumObjects(), outStr, gFalse); - - + writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */, + uxref->getNumObjects(), outStr, gFalse /* complete rewrite */); delete uxref; - } void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset) @@ -1109,10 +1105,8 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef *xRef, return offset; } -void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, - OutStream* outStr, GBool incrUpdate, - Guint startxRef, Ref *root, XRef *xRef, const char *fileName, - Guint fileSize) +Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Guint startxRef, + Ref *root, XRef *xRef, const char *fileName, Guint fileSize) { Dict *trailerDict = new Dict(xRef); Object obj1; @@ -1120,7 +1114,6 @@ void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, trailerDict->set("Size", &obj1); obj1.free(); - //build a new ID, as recommended in the reference, uses: // - current time // - file name @@ -1130,7 +1123,9 @@ void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, char buffer[256]; sprintf(buffer, "%i", (int)time(NULL)); message.append(buffer); - message.append(fileName); + + if (fileName) + message.append(fileName); sprintf(buffer, "%i", fileSize); message.append(buffer); @@ -1162,7 +1157,7 @@ void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, //only update the second part of the array xRef->getTrailerDict()->getDict()->lookup("ID", &obj4); if (!obj4.isArray()) { - error(errSyntaxWarning, -1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue"); + error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue"); } else { //Get the first part of the ID obj4.arrayGet(0,&obj3); @@ -1194,24 +1189,25 @@ void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, trailerDict->set("Info", &obj5); } } - + + return trailerDict; +} + +void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Guint uxrefOffset, OutStream* outStr, XRef *xRef) +{ + uxref->writeTableToFile( outStr, writeAllEntries ); outStr->printf( "trailer\r\n"); writeDictionnary(trailerDict, outStr, xRef, 0); outStr->printf( "\r\nstartxref\r\n"); outStr->printf( "%i\r\n", uxrefOffset); outStr->printf( "%%%%EOF\r\n"); - - delete trailerDict; } -void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate) +void PDFDoc::writeXRefTableTrailer(Guint uxrefOffset, XRef *uxref, GBool writeAllEntries, + int uxrefSize, OutStream* outStr, GBool incrUpdate) { - const char *fileNameA; - if (fileName) - fileNameA = fileName->getCString(); - else - fileNameA = "streamwithoutfilename.pdf"; - // file size + const char *fileNameA = fileName ? fileName->getCString() : NULL; + // file size (doesn't include the trailer) unsigned int fileSize = 0; int c; str->reset(); @@ -1222,7 +1218,10 @@ void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, G Ref ref; ref.num = getXRef()->getRootNum(); ref.gen = getXRef()->getRootGen(); - writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize); + Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref, + getXRef(), fileNameA, fileSize); + writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef()); + delete trailerDict; } void PDFDoc::writeHeader(OutStream *outStr, int major, int minor) diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index ccb1b22..468f698 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -23,6 +23,7 @@ // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2010 Srinivas Adicherla <[email protected]> // Copyright (C) 2011 Thomas Freitag <[email protected]> +// Copyright (C) 2012 Fabio D'Urso <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -244,8 +245,14 @@ public: Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset); static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef *xref, Guint numOffset); static void writeHeader(OutStream *outStr, int major, int minor); - static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate, - Guint startxRef, Ref *root, XRef *xRef, const char *fileName, Guint fileSize); + + // Ownership goes to the caller + static Dict *createTrailerDict (int uxrefSize, GBool incrUpdate, Guint startxRef, + Ref *root, XRef *xRef, const char *fileName, Guint fileSize); + static void writeXRefTableTrailer (Dict *trailerDict, XRef *uxref, GBool writeAllEntries, + Guint uxrefOffset, OutStream* outStr, XRef *xRef); + static void writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, + Guint uxrefOffset, OutStream* outStr, XRef *xRef); private: // insert referenced objects in XRef @@ -260,7 +267,8 @@ private: { writeDictionnary(dict, outStr, getXRef(), 0); } static void writeStream (Stream* str, OutStream* outStr); static void writeRawStream (Stream* str, OutStream* outStr); - void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate); + void writeXRefTableTrailer (Guint uxrefOffset, XRef *uxref, GBool writeAllEntries, + int uxrefSize, OutStream* outStr, GBool incrUpdate); static void writeString (GooString* s, OutStream* outStr); void saveIncrementalUpdate (OutStream* outStr); void saveCompleteRewrite (OutStream* outStr); diff --git a/poppler/XRef.cc b/poppler/XRef.cc index dfc6d73..3b0d7f8 100644 --- a/poppler/XRef.cc +++ b/poppler/XRef.cc @@ -1240,7 +1240,7 @@ void XRef::removeIndirectObject(Ref r) { e->updated = true; } -void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) { +void XRef::writeTableToFile(OutStream* outStr, GBool writeAllEntries) { //create free entries linked-list if (getEntry(0)->gen != 65535) { error(errInternal, -1, "XRef::writeToFile, entry 0 of the XRef is invalid (gen != 65535)\n"); diff --git a/poppler/XRef.h b/poppler/XRef.h index 4cba474..ab8047c 100644 --- a/poppler/XRef.h +++ b/poppler/XRef.h @@ -135,7 +135,7 @@ public: Ref addIndirectObject (Object* o); void removeIndirectObject(Ref r); void add(int num, int gen, Guint offs, GBool used); - void writeToFile(OutStream* outStr, GBool writeAllEntries); + void writeTableToFile(OutStream* outStr, GBool writeAllEntries); private: diff --git a/utils/pdfunite.cc b/utils/pdfunite.cc index 93850a4..212f89b 100644 --- a/utils/pdfunite.cc +++ b/utils/pdfunite.cc @@ -6,6 +6,7 @@ // // Copyright (C) 2011 Thomas Freitag <[email protected]> // Copyright (C) 2012 Arseny Solokha <[email protected]> +// Copyright (C) 2012 Fabio D'Urso <[email protected]> // //======================================================================== #include <PDFDoc.h> @@ -161,13 +162,14 @@ int main (int argc, char *argv[]) objectsCount++; } Guint uxrefOffset = outStr->getPos(); - yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ ); - Ref ref; ref.num = rootNum; ref.gen = 0; - PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0, - &ref, yRef, fileName, outStr->getPos()); + Dict *trailerDict = PDFDoc::createTrailerDict(objectsCount, gFalse, 0, &ref, yRef, + fileName, outStr->getPos()); + PDFDoc::writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */, + uxrefOffset, outStr, yRef); + delete trailerDict; outStr->close(); fclose(f); -- 1.7.6.5
From ce5a6a15367d7588c7d246306cb1d8e3e8128fef Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Tue, 24 Apr 2012 21:00:11 +0200 Subject: [PATCH 2/2] Output XRef stream when incrementally updating if there's already a XRef stream --- poppler/PDFDoc.cc | 45 ++++++++++++++++++++++++++++++++++++++++- poppler/XRef.cc | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ poppler/XRef.h | 7 ++++++ 3 files changed, 106 insertions(+), 2 deletions(-) diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 9c977a6..2dc1ac8 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -864,8 +864,30 @@ void PDFDoc::saveIncrementalUpdate (OutStream* outStr) } Guint uxrefOffset = outStr->getPos(); - writeXRefTableTrailer(uxrefOffset, uxref, gFalse /* do not write unnecessary entries */, - xref->getNumObjects(), outStr, gTrue /* incremental update */); + int numobjects = xref->getNumObjects(); + const char *fileNameA = fileName ? fileName->getCString() : NULL; + Ref rootRef, uxrefStreamRef; + rootRef.num = getXRef()->getRootNum(); + rootRef.gen = getXRef()->getRootGen(); + + // Output a xref stream if there is a xref stream already + GBool xRefStream = xref->isXRefStream(); + + if (xRefStream) { + // Append an entry for the xref stream itself + uxrefStreamRef.num = numobjects++; + uxrefStreamRef.gen = 0; + uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue); + } + + Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset); + if (xRefStream) { + writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef()); + } else { + writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef()); + } + + delete trailerDict; delete uxref; } @@ -1203,6 +1225,25 @@ void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAl outStr->printf( "%%%%EOF\r\n"); } +void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Guint uxrefOffset, OutStream* outStr, XRef *xRef) +{ + GooString stmData; + + // Fill stmData and some trailerDict fields + uxref->writeStreamToBuffer(&stmData, trailerDict, xRef); + + // Create XRef stream object and write it + Object obj1; + MemStream *mStream = new MemStream( stmData.getCString(), 0, + stmData.getLength(), obj1.initDict(trailerDict) ); + writeObject(obj1.initStream(mStream), uxrefStreamRef, outStr, xRef, 0); + obj1.free(); + + outStr->printf( "startxref\r\n"); + outStr->printf( "%i\r\n", uxrefOffset); + outStr->printf( "%%%%EOF\r\n"); +} + void PDFDoc::writeXRefTableTrailer(Guint uxrefOffset, XRef *uxref, GBool writeAllEntries, int uxrefSize, OutStream* outStr, GBool incrUpdate) { diff --git a/poppler/XRef.cc b/poppler/XRef.cc index 3b0d7f8..9133287 100644 --- a/poppler/XRef.cc +++ b/poppler/XRef.cc @@ -1288,6 +1288,62 @@ void XRef::writeTableToFile(OutStream* outStr, GBool writeAllEntries) { } } +void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref) { + //create free entries linked-list + if (getEntry(0)->gen != 65535) { + error(errInternal, -1, "XRef::outputXRefStm, entry 0 of the XRef is invalid (gen != 65535)\n"); + } + int lastFreeEntry = 0; + for (int i=0; i<size; i++) { + if (getEntry(i)->type == xrefEntryFree) { + getEntry(lastFreeEntry)->offset = i; + lastFreeEntry = i; + } + } + + Object index; + index.initArray(xref); + stmBuf->clear(); + int i = 0; + while (i < size) { + int j; + for(j=i; j<size; j++) { //look for consecutive entries + if ((getEntry(j)->type == xrefEntryFree) && (getEntry(j)->gen == 0)) + break; + } + if (j-i != 0) + { + Object obj; + index.arrayAdd( obj.initInt(i) ); + index.arrayAdd( obj.initInt(j-i) ); + for (int k=i; k<j; k++) { + XRefEntry *e = getEntry(k); + if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference) + char data[7]; + data[0] = (e->type==xrefEntryFree) ? 0 : 1; + data[1] = (e->offset >> 24) & 0xff; + data[2] = (e->offset >> 16) & 0xff; + data[3] = (e->offset >> 8) & 0xff; + data[4] = e->offset & 0xff; + data[5] = (e->gen >> 8) & 0xff; + data[6] = e->gen & 0xff; + stmBuf->append(data, sizeof(data)); + } + i = j; + } + else ++i; + } + + Object obj1, obj2; + xrefDict->set("Type", obj1.initName("XRef")); + xrefDict->set("Index", &index); + obj2.initArray(xref); + obj2.arrayAdd( obj1.initInt(1) ); + obj2.arrayAdd( obj1.initInt(4) ); + obj2.arrayAdd( obj1.initInt(2) ); + xrefDict->set("W", &obj2); +} + GBool XRef::parseEntry(Guint offset, XRefEntry *entry) { GBool r; diff --git a/poppler/XRef.h b/poppler/XRef.h index ab8047c..0bd1502 100644 --- a/poppler/XRef.h +++ b/poppler/XRef.h @@ -79,6 +79,9 @@ public: // Is xref table valid? GBool isOk() { return ok; } + // Is the last XRef section a stream or a table? + GBool isXRefStream() { return xRefStream; } + // Get the error code (if isOk() returns false). int getErrorCode() { return errCode; } @@ -135,7 +138,11 @@ public: Ref addIndirectObject (Object* o); void removeIndirectObject(Ref r); void add(int num, int gen, Guint offs, GBool used); + + // Output XRef table to stream void writeTableToFile(OutStream* outStr, GBool writeAllEntries); + // Output XRef stream contents to GooString and fill trailerDict fields accordingly + void writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref); private: -- 1.7.6.5
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
