Hello,
The goal of the attached patch is to provide a way to dynamically update
some parts of a PDF and save the updated file. It is usefull for things
like Form and Annotations.

It basically does two things:
- It adds an XRef::setModifiedObject(Object* o, Ref r) function, which
allow the client to 'redirect' XRef::fetch to the specified reference to
the specified Object. 
When you set an Object as 'modified' through this function, the next
call to XRef::fetch with the corresponding ref as parameter will return
your Object instead of re-reading it from the Stream.

- It adds two functions to PDFDoc : saveCompleteRewrite and
saveIncrementalUpdate. These functions allow the client to save the
modification he does (through setModifiedObject) either by rewriting the
whole document or using incremental update (this may be required for
digitally signed document for example).

There are a bunch of other internal functions added, mostly in PDFDoc,
but I think they are pretty self-explanatory.

One things that isn't implemented at the moment is the update of direct
Objects. For example, the Annotation may be direct Objects (directly
contained in the Page dict "Annots" entry). If the client updates a
direct Annotation, the whole first 'indirect-parent' Object (probably
the Page dict in our example) must be updated through setModifiedObject.
This is, at least, the only solution I see for direct Objects update,
but perhaps other people have other ideas.

I tested the saveCompleteRewrite function against some random PDF files
I have and it worked (all the rewrited files open without warnings in
both Evince and Acrobat 7.0), but I guess it has some bugs, leave me a
mail if you find one.


I think that's all, I write this email in the hope I'll get comments
(good or bad), so feel free to answer :-)

Thanks,
Julien


Index: poppler/Dict.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Dict.cc,v
retrieving revision 1.4
diff -u -8 -p -B -r1.4 Dict.cc
--- poppler/Dict.cc	18 Jan 2006 22:32:13 -0000	1.4
+++ poppler/Dict.cc	18 Aug 2006 00:38:18 -0000
@@ -50,16 +50,47 @@ void Dict::add(const UGooString &key, Ob
     }
     entries = (DictEntry *)greallocn(entries, size, sizeof(DictEntry));
   }
   entries[length].key = new UGooString(key);
   entries[length].val = *val;
   ++length;
 }
 
+void Dict::remove(const UGooString &key) {
+  int i; 
+  bool found = false;
+  DictEntry tmp;
+  if(length == 0) return;
+
+  for(i=0; i<length; i++) {
+    if (!key.cmp(entries[i].key)) {
+      found = true;
+      break;
+    }
+  }
+  if(!found) return;
+  //replace the deleted entry with the last entry
+  length -= 1;
+  tmp = entries[length];
+  if (i!=length) //don't copy the last entry if it is deleted 
+    entries[i] = tmp;
+}
+
+void Dict::set(const UGooString &key, Object *val) {
+	DictEntry *e;
+	e = find (key);
+	if (e) {
+		e->val.free();
+		e->val = *val;
+	} else {
+		add (key, val);
+	}
+}
+
 inline DictEntry *Dict::find(const UGooString &key) {
   int i;
 
   for (i = 0; i < length; ++i) {
     if (!key.cmp(entries[i].key))
       return &entries[i];
   }
   return NULL;
Index: poppler/Dict.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Dict.h,v
retrieving revision 1.3
diff -u -8 -p -B -r1.3 Dict.h
--- poppler/Dict.h	18 Jan 2006 22:32:13 -0000	1.3
+++ poppler/Dict.h	18 Aug 2006 00:38:18 -0000
@@ -38,16 +38,20 @@ public:
   int incRef() { return ++ref; }
   int decRef() { return --ref; }
 
   // Get number of entries.
   int getLength() { return length; }
 
   // Add an entry
   void add(const UGooString &key, Object *val);
+  // Update the value of an existing entry, otherwise create it
+  void set(const UGooString &key, Object *val);
+  // Remove an entry. This invalidate indexes
+  void remove(const UGooString &key);
 
   // Check if dictionary is of specified type.
   GBool is(char *type);
 
   // Look up an entry and return the value.  Returns a null object
   // if <key> is not in the dictionary.
   Object *lookup(const UGooString &key, Object *obj);
   Object *lookupNF(const UGooString &key, Object *obj);
Index: poppler/Object.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Object.h,v
retrieving revision 1.2
diff -u -8 -p -B -r1.2 Object.h
--- poppler/Object.h	18 Jan 2006 22:32:13 -0000	1.2
+++ poppler/Object.h	18 Aug 2006 00:38:18 -0000
@@ -160,16 +160,17 @@ public:
   int arrayGetLength();
   void arrayAdd(Object *elem);
   Object *arrayGet(int i, Object *obj);
   Object *arrayGetNF(int i, Object *obj);
 
   // Dict accessors.
   int dictGetLength();
   void dictAdd(const UGooString &key, Object *val);
+  void dictSet(const UGooString &key, Object *val);
   GBool dictIs(char *dictType);
   Object *dictLookup(const UGooString &key, Object *obj);
   Object *dictLookupNF(const UGooString &key, Object *obj);
   UGooString *dictGetKey(int i);
   Object *dictGetVal(int i, Object *obj);
   Object *dictGetValNF(int i, Object *obj);
 
   // Stream accessors.
@@ -237,16 +238,19 @@ inline Object *Object::arrayGetNF(int i,
 #include "Dict.h"
 
 inline int Object::dictGetLength()
   { return dict->getLength(); }
 
 inline void Object::dictAdd(const UGooString &key, Object *val)
   { dict->add(key, val); }
 
+inline void Object::dictSet(const UGooString &key, Object *val)
+	{ dict->set(key, val); }
+
 inline GBool Object::dictIs(char *dictType)
   { return dict->is(dictType); }
 
 inline GBool Object::isDict(char *dictType)
   { return type == objDict && dictIs(dictType); }
 
 inline Object *Object::dictLookup(const UGooString &key, Object *obj)
   { return dict->lookup(key, obj); }
Index: poppler/PDFDoc.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/PDFDoc.cc,v
retrieving revision 1.10
diff -u -8 -p -B -r1.10 PDFDoc.cc
--- poppler/PDFDoc.cc	18 Jan 2006 22:32:13 -0000	1.10
+++ poppler/PDFDoc.cc	18 Aug 2006 00:38:19 -0000
@@ -456,31 +456,290 @@ GBool PDFDoc::isLinearized() {
   obj4.free();
   obj3.free();
   obj2.free();
   obj1.free();
   delete parser;
   return lin;
 }
 
-GBool PDFDoc::saveAs(GooString *name) {
+GBool PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
   FILE *f;
-  int c;
 
   if (!(f = fopen(name->getCString(), "wb"))) {
     error(-1, "Couldn't open file '%s'", name->getCString());
     return gFalse;
   }
+
+  if (mode == writeForceRewrite) {
+    saveCompleteRewrite(f);
+  } else if (mode == writeForceIncremental) {
+    saveIncrementalUpdate(f); 
+  } else { // let poppler decide
+    // find if we have updated objects
+    GBool updated = gFalse;
+    for(int i=0; i<xref->getNumObjects(); i++) {
+      if (xref->getEntry(i)->obj) {
+        updated = gTrue;
+        break;
+      }
+    }
+    if(updated) { 
+      saveCompleteRewrite(f);
+    } else {
+    // simply copy the original file
+      int c;
+  	  str->reset();
+      while ((c = str->getChar()) != EOF) {
+       fputc(c, f);
+      }
+      str->close();
+    }
+  }
+    
+  fclose(f);
+  return gTrue;
+}
+
+void PDFDoc::saveIncrementalUpdate (FILE *f)
+{
+  XRef *uxref;
+  int c;
+  //copy the original file
   str->reset();
   while ((c = str->getChar()) != EOF) {
     fputc(c, f);
   }
   str->close();
-  fclose(f);
-  return gTrue;
+
+  uxref = new XRef();
+  uxref->add(0, 65535, 0, gFalse);
+  int objectsCount = 0; //count the number of objects in the XRef(s)
+  for(int i=0; i<xref->getNumObjects(); i++) {
+   if ((xref->getEntry(i)->type == xrefEntryFree) && 
+       (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
+           continue;
+   objectsCount++;
+   if (xref->getEntry(i)->obj) { //we have an updated object
+    Object obj1;
+    Ref ref;
+    ref.num = i;
+    ref.gen = xref->getEntry(i)->gen;
+    xref->fetch(ref.num, ref.gen, &obj1);
+    Guint offset = writeObject(&obj1, &ref, f);
+    uxref->add(ref.num, ref.gen, offset, gTrue);
+    obj1.free();
+   }
+  }
+  if (uxref->getSize() == 0) { //we have nothing to update
+    delete uxref;
+    return;
+  }
+
+  Guint uxrefOffset = ftell(f);
+  uxref->writeToFile(f);
+
+  writeTrailer(uxrefOffset, objectsCount, f, gTrue);
+
+  delete uxref;
+}
+
+void PDFDoc::saveCompleteRewrite (FILE *f)
+{
+  fprintf(f, "%%PDF-%.1f\r\n",pdfVersion);
+  XRef *uxref = new XRef();
+  uxref->add(0, 65535, 0, gFalse);
+  for(int i=0; i<xref->getNumObjects(); i++) {
+    Object obj1;
+    Ref ref;
+    XRefEntryType type = xref->getEntry(i)->type;
+    if (type == xrefEntryFree) {
+      ref.num = i;
+      ref.gen = xref->getEntry(i)->gen;
+      /* the XRef class add a lot of unrelevant free entries, we only want the significant one
+	 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
+      if (ref.gen > 0 && ref.num > 0)
+        uxref->add(ref.num, ref.gen, 0, gFalse);
+    } else if (type == xrefEntryUncompressed){ 
+      ref.num = i;
+      ref.gen = xref->getEntry(i)->gen;
+      xref->fetch(ref.num, ref.gen, &obj1);
+      Guint offset = writeObject(&obj1, &ref, f);
+      uxref->add(ref.num, ref.gen, offset, gTrue);
+      obj1.free();
+    } else if (type == xrefEntryCompressed) {
+      ref.num = i;
+      ref.gen = 0; //compressed entries have gen == 0
+      xref->fetch(ref.num, ref.gen, &obj1);
+      Guint offset = writeObject(&obj1, &ref, f);
+      uxref->add(ref.num, ref.gen, offset, gTrue);
+      obj1.free();
+    }
+  }
+  Guint uxrefOffset = ftell(f);
+  uxref->writeToFile(f);
+  
+  writeTrailer(uxrefOffset, uxref->getSize(), f, gFalse);
+
+
+  delete uxref;
+
+}
+
+void PDFDoc::writeDictionnary (Dict* dict, FILE *f)
+{
+  Object obj1;
+  fprintf(f,"<< ");
+  for (int i=0; i<dict->getLength(); i++) {
+    fprintf(f,"/%s ", dict->getKey(i)->getCString());
+    writeObject(dict->getValNF(i, &obj1), NULL, f);
+    fprintf(f,"\r\n");
+    
+    obj1.free();
+  }
+  fprintf(f,">>\r\n");
+}
+
+void PDFDoc::writeStream (Stream* str, FILE *f)
+{
+  int c;
+  fprintf(f,"stream\r\n");
+  str->reset();
+  for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
+    fprintf(f,"%c", c);  
+  }
+  fprintf(f,"\r\nendstream\r\n");
+}
+
+void PDFDoc::writeString (GooString* s, FILE* f)
+{
+  //write hexa string
+  const char* c = s->getCString();
+  fprintf(f, "<");
+  while(*c!='\0') {
+   fprintf(f, "%02x", *c);
+   c++;
+  }
+  fprintf(f, "> ");
+}
+
+Guint PDFDoc::writeObject (Object* obj, Ref* ref, FILE *f)
+{
+  Array *array;
+  Object obj1;
+  Guint offset = ftell(f);
+  int tmp;
+
+  if(ref) 
+    fprintf(f,"%i %i obj\r\n", ref->num, ref->gen);
+
+  switch (obj->getType()) {
+    case objBool:
+      fprintf(f,"%s ", obj->getBool()?"true":"false");
+      break;
+    case objInt:
+      fprintf(f,"%i ", obj->getInt());
+      break;
+    case objReal:
+      fprintf(f,"%f ", obj->getReal());
+      break;
+    case objString:
+      writeString(obj->getString(), f);
+      break;
+    case objName:
+      fprintf(f,"/%s ", obj->getName());
+      break;
+    case objNull:
+      fprintf(f, "null\r\n");
+      break;
+    case objArray:
+      array = obj->getArray();
+      fprintf(f,"[");
+      for (int i=0; i<array->getLength(); i++) {
+        writeObject(array->getNF(i, &obj1), NULL,f);
+        obj1.free();
+      }
+      fprintf(f,"]");
+      break;
+    case objDict:
+      writeDictionnary (obj->getDict(),f);
+      break;
+    case objStream:
+      //we write the stream unencoded => TODO: write stream encoder
+      obj->getStream()->reset();
+      //recalculate stream length
+      tmp = 0;
+      for (int c=obj->getStream()->getChar(); c!=EOF; c=obj->getStream()->getChar()) {
+        tmp++;
+      }
+      obj1.initInt(tmp);
+      obj->getStream()->getDict()->set("Length", &obj1);
+      
+      //Remove Stream encoding
+      obj->getStream()->getDict()->remove("Filter");
+      obj->getStream()->getDict()->remove("DecodeParms");
+
+      writeDictionnary (obj->getStream()->getDict(),f);
+      writeStream (obj->getStream(),f);
+      obj1.free();
+      break;
+    case objRef:
+      fprintf(f,"%i %i R ", obj->getRef().num, obj->getRef().gen);
+      break;
+    case objCmd:
+      printf("objCmd\n");
+      fprintf(f,"null\r\n");
+      break;
+    case objError:
+      printf("objError\n");
+      fprintf(f,"null\r\n");
+      break;
+    case objEOF:
+      printf("objEOF\n");
+      fprintf(f,"null\r\n");
+      break;
+    case objNone:
+      printf("objNone\n");
+      fprintf(f,"null\r\n");
+      break;
+    default:
+      error(-1,"Unhandled objType : %i, please report a bug with a testcase\r\n", obj->getType());
+      break;
+  }
+  if (ref)
+    fprintf(f,"endobj\r\n\r\n");
+  return offset;
+}
+
+void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, FILE* f, GBool incrUpdate)
+{
+  //Dict* trailerDict = xref->getTrailerDict()->getDict();
+  Dict *trailerDict = new Dict(xref);
+  Object obj1;
+  obj1.initInt(uxrefSize);
+  trailerDict->set("Size", &obj1);
+  obj1.free();
+  if(xref->getTrailerDict()->getDict()->lookup("ID", &obj1) != NULL) {
+    trailerDict->set("ID", &obj1);
+    obj1.free();
+  }
+  obj1.initRef(xref->getRootNum(), xref->getRootGen());
+  trailerDict->set("Root", &obj1);
+  obj1.free();
+
+  if (incrUpdate) { 
+    obj1.initInt(xref->getLastXRefPos());
+    trailerDict->set("Prev", &obj1);
+    obj1.free();
+  }
+  fprintf(f, "trailer\r\n");
+  writeDictionnary(trailerDict, f);
+  fprintf(f, "startxref\r\n");
+  fprintf(f, "%i\r\n", uxrefOffset);
+  fprintf(f, "%%%%EOF\r\n");
 }
 
 void PDFDoc::getLinks(Page *page) {
   Object obj;
 
   links = new Links(page->getAnnots(&obj), catalog->getBaseURI());
   obj.free();
 }
Index: poppler/PDFDoc.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/PDFDoc.h,v
retrieving revision 1.7
diff -u -8 -p -B -r1.7 PDFDoc.h
--- poppler/PDFDoc.h	18 Jan 2006 22:32:13 -0000	1.7
+++ poppler/PDFDoc.h	18 Aug 2006 00:38:19 -0000
@@ -22,16 +22,22 @@
 class GooString;
 class BaseStream;
 class OutputDev;
 class Links;
 class LinkAction;
 class LinkDest;
 class Outline;
 
+enum PDFWriteMode {
+  writeStandard,
+  writeForceRewrite,
+  writeForceIncremental
+};
+
 //------------------------------------------------------------------------
 // PDFDoc
 //------------------------------------------------------------------------
 
 class PDFDoc {
 public:
 
   PDFDoc(GooString *fileNameA, GooString *ownerPassword = NULL,
@@ -158,23 +164,31 @@ public:
 
   // Return the document's Info dictionary (if any).
   Object *getDocInfo(Object *obj) { return xref->getDocInfo(obj); }
   Object *getDocInfoNF(Object *obj) { return xref->getDocInfoNF(obj); }
 
   // Return the PDF version specified by the file.
   double getPDFVersion() { return pdfVersion; }
 
-  // Save this file with another name.
-  GBool saveAs(GooString *name);
+  // Save this file with another name
+  GBool saveAs(GooString *name, PDFWriteMode mode=writeStandard);
 
   // Return a pointer to the GUI (XPDFCore or WinPDFCore object).
   void *getGUIData() { return guiData; }
 
 private:
+  // Add object to current file stream and return the offset of the beginning of the object
+  Guint writeObject (Object *obj, Ref *ref, FILE* f);
+  void writeDictionnary (Dict* dict, FILE* f);
+  void writeStream (Stream* str, FILE* f);
+  void writeTrailer (Guint uxrefOffset, int uxrefSize, FILE* f, GBool incrUpdate);
+  void writeString (GooString* s, FILE* f);
+  void saveIncrementalUpdate (FILE* f);
+  void saveCompleteRewrite (FILE* f);
 
   GBool setup(GooString *ownerPassword, GooString *userPassword);
   GBool checkFooter();
   void checkHeader();
   GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
   void getLinks(Page *page);
 
   GooString *fileName;
Index: poppler/XRef.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/XRef.cc,v
retrieving revision 1.13
diff -u -8 -p -B -r1.13 XRef.cc
--- poppler/XRef.cc	1 Apr 2006 11:25:57 -0000	1.13
+++ poppler/XRef.cc	18 Aug 2006 00:38:20 -0000
@@ -196,16 +196,26 @@ Object *ObjectStream::getObject(int objI
   }
   return objs[objIdx].copy(obj);
 }
 
 //------------------------------------------------------------------------
 // XRef
 //------------------------------------------------------------------------
 
+XRef::XRef() {
+  ok = gTrue;
+  errCode = errNone;
+  entries = NULL;
+  size = 0;
+  streamEnds = NULL;
+  streamEndsLen = 0;
+  objStr = NULL;
+}
+
 XRef::XRef(BaseStream *strA) {
   Guint pos;
   Object obj;
 
   ok = gTrue;
   errCode = errNone;
   size = 0;
   entries = NULL;
@@ -259,17 +269,22 @@ XRef::XRef(BaseStream *strA) {
   }
 
   // now set the trailer dictionary's xref pointer so we can fetch
   // indirect objects from it
   trailerDict.getDict()->setXRef(this);
 }
 
 XRef::~XRef() {
+  for(int i=0; i<size; i++) {
+    if (entries[i].obj)
+            delete entries[i].obj;
+  }
   gfree(entries);
+
   trailerDict.free();
   if (streamEnds) {
     gfree(streamEnds);
   }
   if (objStr) {
     delete objStr;
   }
 }
@@ -392,29 +407,31 @@ GBool XRef::readXRefTable(Parser *parser
         error(-1, "Invalid 'obj' parameters'");
         goto err1;
       }
  
       entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
       for (i = size; i < newSize; ++i) {
 	entries[i].offset = 0xffffffff;
 	entries[i].type = xrefEntryFree;
+	entries[i].obj = NULL;
       }
       size = newSize;
     }
     for (i = first; i < first + n; ++i) {
       if (!parser->getObj(&obj)->isInt()) {
 	goto err1;
       }
       entry.offset = (Guint)obj.getInt();
       obj.free();
       if (!parser->getObj(&obj)->isInt()) {
 	goto err1;
       }
       entry.gen = obj.getInt();
+      entry.obj = NULL;
       obj.free();
       parser->getObj(&obj);
       if (obj.isCmd("n")) {
 	entry.type = xrefEntryUncompressed;
       } else if (obj.isCmd("f")) {
 	entry.type = xrefEntryFree;
       } else {
 	goto err1;
@@ -502,16 +519,17 @@ GBool XRef::readXRefStream(Stream *xrefS
     if (newSize * (int)sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) {
       error(-1, "Invalid 'size' parameter.");
       return gFalse;
     }
     entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
     for (i = size; i < newSize; ++i) {
       entries[i].offset = 0xffffffff;
       entries[i].type = xrefEntryFree;
+      entries[i].obj = NULL;
     }
     size = newSize;
   }
 
   if (!dict->lookupNF("W", &obj)->isArray() ||
       obj.arrayGetLength() < 3) {
     goto err1;
   }
@@ -596,16 +614,17 @@ GBool XRef::readXRefStreamSection(Stream
     if (newSize*(int)sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) {
       error(-1, "Invalid 'size' inside xref table.");
       return gFalse;
     }
     entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
     for (i = size; i < newSize; ++i) {
       entries[i].offset = 0xffffffff;
       entries[i].type = xrefEntryFree;
+      entries[i].obj = NULL;
     }
     size = newSize;
   }
   for (i = first; i < first + n; ++i) {
     if (w[0] == 0) {
       type = 1;
     } else {
       for (type = 0, j = 0; j < w[0]; ++j) {
@@ -736,16 +755,17 @@ GBool XRef::constructXRef() {
                     error(-1, "Invalid 'obj' parameters.");
                     return gFalse;
                   }
 		  entries = (XRefEntry *)
 		      greallocn(entries, newSize, sizeof(XRefEntry));
 		  for (i = size; i < newSize; ++i) {
 		    entries[i].offset = 0xffffffff;
 		    entries[i].type = xrefEntryFree;
+		    entries[i].obj = NULL;
 		  }
 		  size = newSize;
 		}
 		if (entries[num].type == xrefEntryFree ||
 		    gen >= entries[num].gen) {
 		  entries[num].offset = pos - start;
 		  entries[num].gen = gen;
 		  entries[num].type = xrefEntryUncompressed;
@@ -845,16 +865,20 @@ Object *XRef::fetch(int num, int gen, Ob
   Object obj1, obj2, obj3;
 
   // check for bogus ref - this can happen in corrupted PDF files
   if (num < 0 || num >= size) {
     goto err;
   }
 
   e = &entries[num];
+  if(e->obj) { //check for updated object
+    obj = e->obj->copy(obj);
+    return obj;
+  }
   switch (e->type) {
 
   case xrefEntryUncompressed:
     if (e->gen != gen) {
       goto err;
     }
     obj1.initNull();
     parser = new Parser(this,
@@ -962,8 +986,71 @@ Guint XRef::strToUnsigned(char *s) {
   int i;
 
   x = 0;
   for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
     x = 10 * x + (*p - '0');
   }
   return x;
 }
+
+void XRef::add(int num, int gen, Guint offs, GBool used) {
+  size += 1;
+  entries = (XRefEntry *)greallocn(entries, size, sizeof(XRefEntry));
+  XRefEntry *e = &entries[size-1];
+
+  e->gen = gen;
+  e->num = num;
+  e->obj = NULL;
+  if (used) {
+    e->type = xrefEntryUncompressed;
+    e->offset = offs;
+  } else {
+    e->type = xrefEntryFree;
+    e->offset = 0;
+  }
+}
+
+void XRef::setModifiedObject (Object* o, Ref r) {
+  if (r.num < 0 || r.num >= size) {
+    error(-1,"XRef::setModifiedObject on unknown ref: %i, %i\n", r.num, r.gen);
+    return;
+  }
+  entries[r.num].obj = new Object();
+  o->copy(entries[r.num].obj);
+}
+
+//used to sort the entries
+int compare (const void* a, const void* b)
+{
+  return (((XRefEntry*)a)->num - ((XRefEntry*)b)->num);
+}
+
+void XRef::writeToFile(FILE* file) {
+  qsort(entries, size, sizeof(XRefEntry), compare);
+  //create free entries linked-list
+  if (entries[0].gen != 65535) {
+    error(-1, "XRef::writeToFile, entry 0 of the XRef is invalid (gen != 65535)\n");
+  }
+  int lastFreeEntry = 0; 
+  for (int i=0; i<size; i++) {
+    if (entries[i].type == xrefEntryFree) {
+      entries[lastFreeEntry].offset = entries[i].num;
+      lastFreeEntry = i;
+    }
+  }
+  //write the new xref
+  int j;
+  fprintf(file,"xref\r\n");
+  for (int i=0; i<size; i++) {
+    for(j=i; j<size; j++) { //look for consecutive entry
+      if (j!=i && entries[j].num != entries[j-1].num+1) 
+              break;
+    }
+    fprintf(file,"%i %i\r\n", entries[i].num, j-i);
+    for (int k=i; k<j; k++) {
+      if(entries[k].gen > 65535) entries[k].gen = 65535; //cap generation number to 65535 (required by PDFReference)
+      fprintf(file,"%010i %05i %c\r\n", entries[k].offset, entries[k].gen, (entries[k].type==xrefEntryFree)?'f':'n');
+    }
+    i = j-1;
+  }
+}
+
Index: poppler/XRef.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/XRef.h,v
retrieving revision 1.6
diff -u -8 -p -B -r1.6 XRef.h
--- poppler/XRef.h	17 Jan 2006 21:35:31 -0000	1.6
+++ poppler/XRef.h	18 Aug 2006 00:38:21 -0000
@@ -29,22 +29,26 @@ enum XRefEntryType {
   xrefEntryFree,
   xrefEntryUncompressed,
   xrefEntryCompressed
 };
 
 struct XRefEntry {
   Guint offset;
   int gen;
+  int num;
   XRefEntryType type;
+  Object* obj;
 };
 
 class XRef {
 public:
 
+  // Constructor, create an empty XRef, used for PDF writing
+  XRef();
   // Constructor.  Read xref table from stream.
   XRef(BaseStream *strA);
 
   // Destructor.
   ~XRef();
 
   // Is xref table valid?
   GBool isOk() { return ok; }
@@ -97,16 +101,21 @@ public:
   // Retuns the entry that belongs to the offset
   int getNumEntry(int offset) const;
 
   // Direct access.
   int getSize() { return size; }
   XRefEntry *getEntry(int i) { return &entries[i]; }
   Object *getTrailerDict() { return &trailerDict; }
 
+  // Write access
+  void setModifiedObject(Object* o, Ref r);
+  void add(int num, int gen,  Guint offs, GBool used);
+  void writeToFile(FILE* f);
+
 private:
 
   BaseStream *str;		// input stream
   Guint start;			// offset in file (to allow for garbage
 				//   at beginning of file)
   XRefEntry *entries;		// xref entries
   int size;			// size of <entries> array
   int rootNum, rootGen;		// catalog dict
Index: test/Makefile.am
===================================================================
RCS file: /cvs/poppler/poppler/test/Makefile.am,v
retrieving revision 1.7
diff -u -8 -p -B -r1.7 Makefile.am
--- test/Makefile.am	12 Apr 2006 02:07:07 -0000	1.7
+++ test/Makefile.am	18 Aug 2006 00:38:21 -0000
@@ -21,26 +21,29 @@ pdf_inspector =				\
 cairo_includes =				\
 	$(CAIRO_CFLAGS)				\
 	$(FREETYPE_CFLAGS)
 
 endif
 
 endif
 
+pdf_modification_test =				\
+	pdf-modification-test
+
 
 INCLUDES =					\
 	-I$(top_srcdir)				\
 	-I$(top_srcdir)/poppler			\
 	-I$(top_srcdir)/glib			\
 	-I$(top_builddir)/glib			\
 	$(cairo_includes)			\
 	$(GTK_TEST_CFLAGS)			
 
-noinst_PROGRAMS = $(gtk_splash_test) $(gtk_cairo_test) $(pdf_inspector)
+noinst_PROGRAMS = $(gtk_splash_test) $(gtk_cairo_test) $(pdf_inspector) $(pdf_modification_test)
 
 gtk_splash_test_SOURCES =			\
        gtk-splash-test.cc
 
 gtk_splash_test_LDADD =				\
 	$(top_builddir)/poppler/libpoppler.la	\
 	$(GTK_TEST_LIBS)
 
@@ -58,10 +61,17 @@ pdf_inspector_SOURCES =			\
 
 pdf_inspector_LDADD =					\
 	$(top_builddir)/poppler/libpoppler.la		\
 	$(top_builddir)/poppler/libpoppler-cairo.la	\
 	$(CAIRO_LIBS)					\
 	$(FREETYPE_LIBS)				\
 	$(GTK_TEST_LIBS)
 
+pdf_modification_test_SOURCES =			\
+	pdf-modification-test.cc
+
+pdf_modification_test_LDADD =			\
+	$(top_builddir)/poppler/libpoppler.la   
+
+
 EXTRA_DIST =					\
 	pdf-operators.c
Index: test/pdf-modification-test.cc
===================================================================
RCS file: test/pdf-modification-test.cc
diff -N test/pdf-modification-test.cc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ test/pdf-modification-test.cc	18 Aug 2006 00:38:22 -0000
@@ -0,0 +1,84 @@
+//========================================================================
+//
+// pdftotext.cc
+//
+// Copyright 2006 Julien Rebetez
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "TextOutputDev.h"
+#include "CharTypes.h"
+#include "UnicodeMap.h"
+#include "Error.h"
+#include "UGooString.h"
+
+void modifyCatalogLang(PDFDoc *doc, GooString* lang);
+
+int main(int argc, char **argv)
+{
+  PDFDoc *doc;
+  GBool ok;
+  GooString *inputFileName;
+  GooString *newLang;
+  GooString *outputs[3];
+  
+  // parse args
+  if (argc < 3 || argc > 4) {
+    fprintf(stderr, "pdf-modification-test\n");
+    printf("usage: <PDF-input-file> <PDF-output-dir> <Lang>\n");
+    return 0;
+  }
+
+  inputFileName = new GooString(argv[1]);
+  newLang = new GooString(argv[3]);
+  outputs[0] = new GooString(argv[2]); outputs[0]->append("/no_update.pdf");
+  outputs[1] = new GooString(argv[2]); outputs[1]->append("/update_full_rewrite.pdf");
+  outputs[2] = new GooString(argv[2]); outputs[2]->append("/update_incremental.pdf");
+  doc = new PDFDoc(inputFileName, NULL, NULL);
+  doc->saveAs(outputs[0], writeForceRewrite);
+  modifyCatalogLang(doc, newLang);
+  doc->saveAs(outputs[1], writeForceRewrite);
+  doc->saveAs(outputs[2], writeForceIncremental);
+
+  delete newLang;
+  for(int i=0; i<3; i++) delete outputs[i];
+  delete doc;
+}
+
+void modifyCatalogLang(PDFDoc* doc, GooString* lang)
+{
+  Object *catalog = new Object(); 
+  XRef *xref = doc->getXRef();
+  catalog = xref->getCatalog(catalog);
+
+  Object o;
+  catalog->dictLookup("Lang", &o);
+  if(o.isString()) printf("input Lang: %s\n", o.getString()->getCString());
+  if(catalog->isDict()) {
+    Object* val = new Object();
+    val->initString(lang);
+    printf("output Lang: %s\n", val->getString()->getCString());
+    catalog->dictSet("Lang", val);
+  }
+  Ref newRef;
+  newRef.num = xref->getRootNum();
+  newRef.gen = xref->getRootGen();
+  xref->setModifiedObject(catalog, newRef);
+}
+
+
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to