pdfmerge.cc

Albert Astals Cid Mon, 29 Aug 2011 13:22:40 -0700

 poppler/PDFDoc.cc    |  389 +++++++++++++++++++++++++++++++++++++++++++++------
 poppler/PDFDoc.h     |   30 +++
 utils/CMakeLists.txt |   15 +
 utils/Makefile.am    |   15 +
 utils/pdfextract.cc  |  111 ++++++++++++++
 utils/pdfmerge.cc    |  176 +++++++++++++++++++++++
 6 files changed, 690 insertions(+), 46 deletions(-)


New commits:
commit 1431564f3363a63a8669c8dd15970db814f4969f
Author: Thomas Freitag <[email protected]>
Date:   Mon Aug 29 22:22:02 2011 +0200

    Add pdfextract and pdfmerge
    
    See "Creating PDF with poppler ?" thread for more info

diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index beeedb8..90030cd 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -102,3 +102,18 @@ target_link_libraries(pdftohtml ${common_libs})
 install(TARGETS pdftohtml DESTINATION bin)
 install(FILES pdftohtml.1 DESTINATION share/man/man1)
 
+# pdfextract
+set(pdfextract_SOURCES ${common_srcs}
+  pdfextract.cc
+)
+add_executable(pdfextract ${pdfextract_SOURCES})
+target_link_libraries(pdfextract ${common_libs})
+install(TARGETS pdfextract DESTINATION bin)
+
+# pdfmerge
+set(pdfmerge_SOURCES ${common_srcs}
+  pdfmerge.cc
+)
+add_executable(pdfmerge ${pdfmerge_SOURCES})
+target_link_libraries(pdfmerge ${common_libs})
+install(TARGETS pdfmerge DESTINATION bin)
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 4faddad..30328f2 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -50,6 +50,8 @@ bin_PROGRAMS =                                        \
        pdftops                                 \
        pdftotext                               \
        pdftohtml                               \
+       pdfextract                              \
+       pdfmerge                                \
        $(pdftoppm_binary)                      \
        $(pdftocairo_binary)
 
@@ -102,6 +104,14 @@ pdftohtml_SOURCES =                                \
        HtmlUtils.h                             \
        $(common)
 
+pdfextract_SOURCES =                           \
+       pdfextract.cc                           \
+       $(common)
+
+pdfmerge_SOURCES =                             \
+       pdfmerge.cc                             \
+       $(common)
+
 # Yay, automake!  It should be able to figure out that it has to dist
 # pdftoppm.1, but nooo.  So we just add it here.
 
diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc
new file mode 100644
index 0000000..c8c4749
--- /dev/null
+++ b/utils/pdfextract.cc
@@ -0,0 +1,111 @@
+//========================================================================
+//
+// pdfextract.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <[email protected]>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+  {"-f", argInt, &firstPage, 0,
+   "first page to extract"},
+  {"-l", argInt, &lastPage, 0,
+   "last page to extract"},
+  {"-v", argFlag, &printVersion, 0,
+   "print copyright and version info"},
+  {"-h", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"--help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-?", argFlag, &printHelp, 0,
+   "print usage information"},
+  {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+  char pathName[1024];
+  GooString *gfileName = new GooString (srcFileName);
+  PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+  if (!doc->isOk()) {
+    error(-1, "Could not extract page(s) from damaged file ('%s')", 
srcFileName);
+    return false;
+  }
+  if (doc->isEncrypted()) {
+    error(-1, "Could not extract page(s) from encrypted file ('%s')", 
srcFileName);
+    return false;
+  }
+
+  if (firstPage == 0 && lastPage == 0) {
+    firstPage = 1;
+    lastPage = doc->getNumPages();
+  }
+  if (lastPage == 0)
+    lastPage = doc->getNumPages();
+  if (firstPage == 0)
+    firstPage = 1;
+  for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+    sprintf (pathName, destFileName, pageNo);
+    GooString *gpageName = new GooString (pathName);
+    int errCode = doc->savePageAs(gpageName, pageNo);
+    if ( errCode != errNone) {
+      delete gpageName;
+      delete gfileName;
+      return false;
+    }
+    delete gpageName;
+  }
+  delete gfileName;
+  return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+  Object info;
+  GBool ok;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs (argDesc, &argc, argv);
+  if (!ok || argc != 3 || printVersion || printHelp)
+    {
+      fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION);
+      fprintf (stderr, "%s\n", popplerCopyright);
+      fprintf (stderr, "%s\n", xpdfCopyright);
+      if (!printVersion)
+       {
+         printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>",
+                     argDesc);
+       }
+      if (printVersion || printHelp)
+       exitCode = 0;
+      goto err0;
+    }
+  extractPages (argv[1], argv[2]);
+
+err0:
+
+  return exitCode;
+}
diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc
new file mode 100644
index 0000000..28f7265
--- /dev/null
+++ b/utils/pdfmerge.cc
@@ -0,0 +1,176 @@
+//========================================================================
+//
+// pdfmerge.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <[email protected]>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+  {"-v", argFlag, &printVersion, 0,
+   "print copyright and version info"},
+  {"-h", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"--help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-?", argFlag, &printHelp, 0,
+   "print usage information"},
+  {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+  int objectsCount = 0;
+  Guint numOffset = 0;
+  std::vector<Object> pages;
+  std::vector<Guint> offsets;
+  XRef *yRef, *countRef;
+  FILE *f;
+  OutStream *outStr;
+  int i;
+  int j, rootNum;
+  std::vector<PDFDoc *>docs;
+  int majorVersion = 0;
+  int minorVersion = 0;
+  char *fileName = argv[argc - 1];
+  int exitCode;
+
+  exitCode = 99;
+  if (argc <= 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
+    fprintf(stderr, "%s\n", popplerCopyright);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> 
<PDF-destfile>",
+       argDesc);
+    }
+    if (printVersion || printHelp)
+      exitCode = 0;
+    return exitCode;
+  }
+  exitCode = 0;
+
+  for (i = 1; i < argc - 1; i++) {
+    GooString *gfileName = new GooString(argv[i]);
+    PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+    if (doc->isOk() && !doc->isEncrypted()) {
+      docs.push_back(doc);
+      if (doc->getPDFMajorVersion() > majorVersion) {
+        majorVersion = doc->getPDFMajorVersion();
+        minorVersion = doc->getPDFMinorVersion();
+      } else if (doc->getPDFMajorVersion() == majorVersion) {
+        if (doc->getPDFMinorVersion() > minorVersion) {
+          minorVersion = doc->getPDFMinorVersion();
+        }
+      }
+    } else if (doc->isOk()) {
+      error(-1, "Could not merge encrypted files ('%s')", argv[i]);
+      return -1;
+    } else {
+      error(-1, "Could not merge damaged documents ('%s')", argv[i]);
+      return -1;
+    }
+  }
+
+  if (!(f = fopen(fileName, "wb"))) {
+    error(-1, "Could not open file '%s'", fileName);
+    return -1;
+  }
+  outStr = new FileOutStream(f, 0);
+
+  yRef = new XRef();
+  countRef = new XRef();
+  yRef->add(0, 65535, 0, gFalse);
+  PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
+
+  for (i = 0; i < (int) docs.size(); i++) {
+    for (j = 1; j <= docs[i]->getNumPages(); j++) {
+      PDFRectangle *cropBox = NULL;
+      if (docs[i]->getCatalog()->getPage(j)->isCropped())
+        cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+      docs[i]->replacePageDict(j,
+           docs[i]->getCatalog()->getPage(j)->getRotate(),
+           docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+      Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+      Object page;
+      docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+      pages.push_back(page);
+      offsets.push_back(numOffset);
+      Dict *pageDict = page.getDict();
+      docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
+    }
+    objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+    numOffset = yRef->getNumObjects() + 1;
+  }
+
+  rootNum = yRef->getNumObjects() + 1;
+  yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+  outStr->printf("%d 0 obj\n", rootNum);
+  outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+  outStr->printf(">>\nendobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 1);
+  outStr->printf("<< /Type /Pages /Kids [");
+  for (j = 0; j < (int) pages.size(); j++)
+    outStr->printf(" %d 0 R", rootNum + j + 2);
+  outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+  objectsCount++;
+
+  for (i = 0; i < (int) pages.size(); i++) {
+    yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+    outStr->printf("%d 0 obj\n", rootNum + i + 2);
+    outStr->printf("<< ");
+    Dict *pageDict = pages[i].getDict();
+    for (j = 0; j < pageDict->getLength(); j++) {
+      if (j > 0)
+       outStr->printf(" ");
+      const char *key = pageDict->getKey(j);
+      Object value;
+      pageDict->getValNF(j, &value);
+      if (strcmp(key, "Parent") == 0) {
+        outStr->printf("/Parent %d 0 R", rootNum + 1);
+      } else {
+        outStr->printf("/%s ", key);
+        PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+      }
+      value.free();
+    }
+    outStr->printf(" >>\nendobj\n");
+    objectsCount++;
+  }
+  Guint uxrefOffset = outStr->getPos();
+  yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
+
+  Ref ref;
+  ref.num = rootNum;
+  ref.gen = 0;
+  PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
+       &ref, yRef, fileName, outStr->getPos());
+
+  outStr->close();
+  fclose(f);
+  delete yRef;
+  delete countRef;
+  for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+  for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+  return exitCode;
+}
commit 8ca2f41089bc6402baf9b24428af04314c037b54
Author: Thomas Freitag <[email protected]>
Date:   Mon Aug 29 22:20:52 2011 +0200

    Rework writing of PDF files
    
    Makes it more compatible with other PDF readers
    See "Creating PDF with poppler ?" thread in the mailing list for more info

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index aa52140..01d2759 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -26,6 +26,7 @@
 // Copyright (C) 2010 Ilya Gorenbein <[email protected]>
 // Copyright (C) 2010 Srinivas Adicherla <[email protected]>
 // Copyright (C) 2010 Philip Lorenz <[email protected]>
+// Copyright (C) 2011 Thomas Freitag <[email protected]>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -573,6 +574,121 @@ Hints *PDFDoc::getHints()
   return hints;
 }
 
+int PDFDoc::savePageAs(GooString *name, int pageNo) 
+{
+  FILE *f;
+  OutStream *outStr;
+  XRef *yRef, *countRef;
+  int rootNum = getXRef()->getSize() + 1;
+
+  if (pageNo < 1 || pageNo > getNumPages()) {
+    error(-1, "Illegal pageNo: %d(%d)", pageNo, getNumPages() );
+    return errOpenFile;
+  }
+  PDFRectangle *cropBox = NULL;
+  if (getCatalog()->getPage(pageNo)->isCropped()) {
+    cropBox = getCatalog()->getPage(pageNo)->getCropBox();
+  }
+  replacePageDict(pageNo, 
+    getCatalog()->getPage(pageNo)->getRotate(),
+    getCatalog()->getPage(pageNo)->getMediaBox(),
+    cropBox, NULL);
+  Ref *refPage = getCatalog()->getPageRef(pageNo);
+  Object page;
+  getXRef()->fetch(refPage->num, refPage->gen, &page);
+
+  if (!(f = fopen(name->getCString(), "wb"))) {
+    error(-1, "Couldn't open file '%s'", name->getCString());
+    return errOpenFile;
+  }
+  outStr = new FileOutStream(f,0);
+
+  yRef = new XRef();
+  countRef = new XRef();
+  yRef->add(0, 65535, 0, gFalse);
+  writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
+
+  // get and mark optional content groups
+  OCGs *ocgs = getCatalog()->getOptContentConfig();
+  if (ocgs != NULL) {
+    Object catDict, optContentProps;
+    getXRef()->getCatalog(&catDict);
+    catDict.dictLookup("OCProperties", &optContentProps);
+    Dict *pageDict = optContentProps.getDict();
+    markPageObjects(pageDict, yRef, countRef, 0);
+    catDict.free();
+    optContentProps.free();
+  }
+
+  Dict *pageDict = page.getDict();
+  markPageObjects(pageDict, yRef, countRef, 0);
+  Guint objectsCount = writePageObjects(outStr, yRef, 0);
+
+  yRef->add(rootNum,0,outStr->getPos(),gTrue);
+  outStr->printf("%d 0 obj\n", rootNum);
+  outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); 
+  if (ocgs != NULL) {
+    Object catDict, optContentProps;
+    getXRef()->getCatalog(&catDict);
+    catDict.dictLookup("OCProperties", &optContentProps);
+    outStr->printf(" /OCProperties <<");
+    Dict *pageDict = optContentProps.getDict();
+    for (int n = 0; n < pageDict->getLength(); n++) {
+      if (n > 0) outStr->printf(" ");
+      const char *key = pageDict->getKey(n);
+      Object value; pageDict->getValNF(n, &value);
+      outStr->printf("/%s ", key);
+      writeObject(&value, NULL, outStr, getXRef(), 0);
+      value.free();
+    }
+    outStr->printf(" >> ");
+    catDict.free();
+    optContentProps.free();
+  }
+  outStr->printf(">>\nendobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 1);
+  outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 
2);
+  outStr->printf("endobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 2);
+  outStr->printf("<< ");
+  for (int n = 0; n < pageDict->getLength(); n++) {
+    if (n > 0) outStr->printf(" ");
+    const char *key = pageDict->getKey(n);
+    Object value; pageDict->getValNF(n, &value);
+    if (strcmp(key, "Parent") == 0) {
+      outStr->printf("/Parent %d 0 R", rootNum + 1);
+    } else {
+      outStr->printf("/%s ", key);
+      writeObject(&value, NULL, outStr, getXRef(), 0); 
+    }
+    value.free();
+  }
+  outStr->printf(" >>\nendobj\n");
+  objectsCount++;
+  page.free();
+
+  Guint uxrefOffset = outStr->getPos();
+  yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
+
+  Ref ref;
+  ref.num = rootNum;
+  ref.gen = 0;
+  writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), 
name->getCString(), outStr->getPos());
+
+  outStr->close();
+  fclose(f);
+  delete yRef;
+  delete countRef;
+
+  return errNone;
+}
+
 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
   FILE *f;
   OutStream *outStr;
@@ -740,7 +856,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 
 }
 
-void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
+void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, 
Guint numOffset)
 {
   Object obj1;
   outStr->printf("<<");
@@ -749,7 +865,7 @@ void PDFDoc::writeDictionnary (Dict* dict, OutStream* 
outStr)
     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode 
*/);
     outStr->printf("/%s ", keyNameToPrint->getCString());
     delete keyNameToPrint;
-    writeObject(dict->getValNF(i, &obj1), NULL, outStr);
+    writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset);
     obj1.free();
   }
   outStr->printf(">> ");
@@ -805,18 +921,24 @@ void PDFDoc::writeString (GooString* s, OutStream* outStr)
     const char* c = s->getCString();
     outStr->printf("(");
     for(int i=0; i<s->getLength(); i++) {
-      char unescaped = (*c)&0x000000ff;
+      char unescaped = *(c+i)&0x000000ff;
       //escape if needed
-      if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
-        outStr->printf("%c", '\\');
-      outStr->printf("%c", unescaped);
-      c++;
+      if (unescaped == '\r')
+        outStr->printf("\\r");
+      else if (unescaped == '\n')
+        outStr->printf("\\n");
+      else {
+        if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
+          outStr->printf("%c", '\\');
+        }
+        outStr->printf("%c", unescaped);
+      }
     }
     outStr->printf(") ");
   }
 }
 
-Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
+Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef 
*xRef, Guint numOffset)
 {
   Array *array;
   Object obj1;
@@ -858,13 +980,13 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, 
OutStream* outStr)
       array = obj->getArray();
       outStr->printf("[");
       for (int i=0; i<array->getLength(); i++) {
-        writeObject(array->getNF(i, &obj1), NULL,outStr);
+        writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset);
         obj1.free();
       }
       outStr->printf("] ");
       break;
     case objDict:
-      writeDictionnary (obj->getDict(),outStr);
+      writeDictionnary (obj->getDict(),outStr, xRef, numOffset);
       break;
     case objStream: 
       {
@@ -886,7 +1008,7 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, 
OutStream* outStr)
           stream->getDict()->remove("Filter");
           stream->getDict()->remove("DecodeParms");
 
-          writeDictionnary (stream->getDict(),outStr);
+          writeDictionnary (stream->getDict(),outStr, xRef, numOffset);
           writeStream (stream,outStr);
           obj1.free();
         } else {
@@ -896,23 +1018,23 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, 
OutStream* outStr)
             BaseStream *bs = fs->getBaseStream();
             if (bs) {
               Guint streamEnd;
-                if (xref->getStreamEnd(bs->getStart(), &streamEnd)) {
+                if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
                   Object val;
                   val.initInt(streamEnd - bs->getStart());
                   stream->getDict()->set("Length", &val);
                 }
               }
           }
-          writeDictionnary (stream->getDict(), outStr);
+          writeDictionnary (stream->getDict(), outStr, xRef, numOffset);
           writeRawStream (stream, outStr);
         }
         break;
       }
     case objRef:
-      outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen);
+      outStr->printf("%i %i R ", obj->getRef().num + numOffset, 
obj->getRef().gen);
       break;
     case objCmd:
-      outStr->printf("cmd\r\n");
+      outStr->printf("%s\n", obj->getCmd());
       break;
     case objError:
       outStr->printf("error\r\n");
@@ -932,9 +1054,12 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, 
OutStream* outStr)
   return offset;
 }
 
-void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* 
outStr, GBool incrUpdate)
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, 
+                         OutStream* outStr, GBool incrUpdate,
+                         Guint startxRef, Ref *root, XRef *xRef, const char 
*fileName,
+                         Guint fileSize)
 {
-  Dict *trailerDict = new Dict(xref);
+  Dict *trailerDict = new Dict(xRef);
   Object obj1;
   obj1.initInt(uxrefSize);
   trailerDict->set("Size", &obj1);
@@ -950,23 +1075,13 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int 
uxrefSize, OutStream* outStr,
   char buffer[256];
   sprintf(buffer, "%i", (int)time(NULL));
   message.append(buffer);
-  if (fileName)
-    message.append(fileName);
-  else
-    message.append("streamwithoutfilename.pdf");
-  // file size
-  unsigned int fileSize = 0;
-  int c;
-  str->reset();
-  while ((c = str->getChar()) != EOF) {
-    fileSize++;
-  }
-  str->close();
+  message.append(fileName);
+
   sprintf(buffer, "%i", fileSize);
   message.append(buffer);
 
   //info dict -- only use text string
-  if (xref->getDocInfo(&obj1)->isDict()) {
+  if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
     for(int i=0; i<obj1.getDict()->getLength(); i++) {
       Object obj2;
       obj1.getDict()->getVal(i, &obj2);  
@@ -985,12 +1100,12 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int 
uxrefSize, OutStream* outStr,
 
   //create ID array
   Object obj2,obj3,obj5;
-  obj2.initArray(xref);
+  obj2.initArray(xRef);
 
   if (incrUpdate) {
     Object obj4;
     //only update the second part of the array
-    xref->getTrailerDict()->getDict()->lookup("ID", &obj4);
+    xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
     if (!obj4.isArray()) {
       error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. 
Trying to continue");
     } else {
@@ -1010,22 +1125,23 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int 
uxrefSize, OutStream* outStr,
     trailerDict->set("ID", &obj2);
   }
 
-
-  obj1.initRef(xref->getRootNum(), xref->getRootGen());
+  obj1.initRef(root->num, root->gen);
   trailerDict->set("Root", &obj1);
 
   if (incrUpdate) { 
-    obj1.initInt(getStartXRef());
+    obj1.initInt(startxRef);
     trailerDict->set("Prev", &obj1);
   }
   
-  xref->getDocInfoNF(&obj5);
-  if (!obj5.isNull()) {
-    trailerDict->set("Info", &obj5);
+  if (!xRef->getTrailerDict()->isNone()) {
+    xRef->getDocInfoNF(&obj5);
+    if (!obj5.isNull()) {
+      trailerDict->set("Info", &obj5);
+    }
   }
   
   outStr->printf( "trailer\r\n");
-  writeDictionnary(trailerDict, outStr);
+  writeDictionnary(trailerDict, outStr, xRef, 0);
   outStr->printf( "\r\nstartxref\r\n");
   outStr->printf( "%i\r\n", uxrefOffset);
   outStr->printf( "%%%%EOF\r\n");
@@ -1033,6 +1149,201 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int 
uxrefSize, OutStream* outStr,
   delete trailerDict;
 }
 
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, 
GBool incrUpdate)
+{
+  char *fileNameA;
+  if (fileName)
+    fileNameA = fileName->getCString();
+  else
+    fileNameA = "streamwithoutfilename.pdf";
+  // file size
+  unsigned int fileSize = 0;
+  int c;
+  str->reset();
+  while ((c = str->getChar()) != EOF) {
+    fileSize++;
+  }
+  str->close();
+  Ref ref;
+  ref.num = getXRef()->getRootNum();
+  ref.gen = getXRef()->getRootGen();
+  writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), 
&ref, getXRef(), fileNameA, fileSize);
+}
+
+void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
+{
+   outStr->printf("%%PDF-%d.%d\n", major, minor);
+   outStr->printf("%%\xE2\xE3\xCF\xD3\n");
+}
+
+void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint 
numOffset)
+{
+  Object obj1;
+  for (int i=0; i<dict->getLength(); i++) {
+    markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset);
+    obj1.free();
+  }
+}
+
+void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint 
numOffset)
+{
+  Array *array;
+  Object obj1;
+
+  switch (obj->getType()) {
+    case objArray:
+      array = obj->getArray();
+      for (int i=0; i<array->getLength(); i++) {
+        markObject(array->getNF(i, &obj1), xRef, countRef, numOffset);
+        obj1.free();
+      }
+      break;
+    case objDict:
+      markDictionnary (obj->getDict(), xRef, countRef, numOffset);
+      break;
+    case objStream: 
+      {
+        Stream *stream = obj->getStream();
+        markDictionnary (stream->getDict(), xRef, countRef, numOffset);
+      }
+      break;
+    case objRef:
+      {
+        if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || 
xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
+          if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
+            return;  // already marked as free => should be replaced
+          }
+          xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, 
gTrue);
+          if (getXRef()->getEntry(obj->getRef().num)->type == 
xrefEntryCompressed) {
+            xRef->getEntry(obj->getRef().num + numOffset)->type = 
xrefEntryCompressed;
+          }
+        }
+        if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() 
|| 
+            countRef->getEntry(obj->getRef().num + numOffset)->type == 
xrefEntryFree)
+        {
+          countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
+        } else {
+          XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
+          entry->gen++;
+        } 
+        Object obj1;
+        getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
+        markObject(&obj1, xRef, countRef, numOffset);
+        obj1.free();
+      }
+      break;
+    default:
+      break;
+  }
+}
+
+void PDFDoc::replacePageDict(int pageNo, int rotate,
+                             PDFRectangle *mediaBox, 
+                             PDFRectangle *cropBox, Object *pageCTM)
+{
+  Ref *refPage = getCatalog()->getPageRef(pageNo);
+  Object page;
+  getXRef()->fetch(refPage->num, refPage->gen, &page);
+  Dict *pageDict = page.getDict();
+  pageDict->remove("MediaBox");
+  pageDict->remove("CropBox");
+  pageDict->remove("ArtBox");
+  pageDict->remove("BleedBox");
+  pageDict->remove("TrimBox");
+  pageDict->remove("Rotate");
+  Object *mediaBoxObj = new Object();
+  mediaBoxObj->initArray(getXRef());
+  Object *murx = new Object();
+  murx->initReal(mediaBox->x1);
+  Object *mury = new Object();
+  mury->initReal(mediaBox->y1);
+  Object *mllx = new Object();
+  mllx->initReal(mediaBox->x2);
+  Object *mlly = new Object();
+  mlly->initReal(mediaBox->y2);
+  mediaBoxObj->arrayAdd(murx);
+  mediaBoxObj->arrayAdd(mury);
+  mediaBoxObj->arrayAdd(mllx);
+  mediaBoxObj->arrayAdd(mlly);
+  pageDict->add(copyString("MediaBox"), mediaBoxObj);
+  if (cropBox != NULL) {
+    Object *cropBoxObj = new Object();
+    cropBoxObj->initArray(getXRef());
+    Object *curx = new Object();
+    curx->initReal(cropBox->x1);
+    Object *cury = new Object();
+    cury->initReal(cropBox->y1);
+    Object *cllx = new Object();
+    cllx->initReal(cropBox->x2);
+    Object *clly = new Object();
+    clly->initReal(cropBox->y2);
+    cropBoxObj->arrayAdd(curx);
+    cropBoxObj->arrayAdd(cury);
+    cropBoxObj->arrayAdd(cllx);
+    cropBoxObj->arrayAdd(clly);
+    pageDict->add(copyString("CropBox"), cropBoxObj);
+  }
+  Object *rotateObj = new Object();
+  rotateObj->initInt(rotate);
+  pageDict->add(copyString("Rotate"), rotateObj);
+  if (pageCTM != NULL) {
+    Object *contents = new Object();
+    Ref cmRef = getXRef()->addIndirectObject(pageCTM);
+    Object *ref = new Object();
+    ref->initRef(cmRef.num, cmRef.gen);
+    pageDict->lookupNF("Contents", contents);
+    Object *newContents = new Object();
+    newContents->initArray(getXRef());
+    if (contents->getType() == objRef) {
+      newContents->arrayAdd(ref);
+      newContents->arrayAdd(contents);
+    } else {
+      newContents->arrayAdd(ref);
+      for (int i = 0; i < contents->arrayGetLength(); i++) {
+        Object *contentEle = new Object();
+        contents->arrayGetNF(i, contentEle);
+        newContents->arrayAdd(contentEle);
+      }
+    }
+    pageDict->remove("Contents");
+    pageDict->add(copyString("Contents"), newContents);
+  }
+  getXRef()->setModifiedObject(&page, *refPage);
+  page.free();
+}
+
+void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint 
numOffset) 
+{
+  for (int n = 0; n < pageDict->getLength(); n++) {
+    const char *key = pageDict->getKey(n);
+    Object value; pageDict->getValNF(n, &value);
+    if (strcmp(key, "Parent") != 0) {
+      markObject(&value, xRef, countRef, numOffset);
+    }
+    value.free();
+  }
+}
+
+Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset) 
+{
+  Guint objectsCount = 0; //count the number of objects in the XRef(s)
+
+  for (int n = numOffset; n < xRef->getNumObjects(); n++) {
+    if (xRef->getEntry(n)->type != xrefEntryFree) {
+      Object obj;
+      Ref ref;
+      ref.num = n;
+      ref.gen = xRef->getEntry(n)->gen;
+      objectsCount++;
+      getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
+      Guint offset = writeObject(&obj, &ref, outStr, xRef, numOffset);
+      xRef->add(ref.num, ref.gen, offset, gTrue);
+      obj.free();
+    }
+  }
+  return objectsCount;
+}
+
 #ifndef DISABLE_OUTLINE
 Outline *PDFDoc::getOutline()
 {
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index a7113c8..92cee78 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -22,6 +22,7 @@
 // Copyright (C) 2009 Kovid Goyal <[email protected]>
 // Copyright (C) 2010 Hib Eris <[email protected]>
 // Copyright (C) 2010 Srinivas Adicherla <[email protected]>
+// Copyright (C) 2011 Thomas Freitag <[email protected]>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -219,6 +220,8 @@ public:
   //Return the PDF ID in the trailer dictionary (if any).
   GBool getID(GooString *permanent_id, GooString *update_id);
 
+  // Save one page with another name.
+  int savePageAs(GooString *name, int pageNo);
   // Save this file with another name.
   int saveAs(GooString *name, PDFWriteMode mode=writeStandard);
   // Save this file in the given output stream.
@@ -231,14 +234,31 @@ public:
   // Return a pointer to the GUI (XPDFCore or WinPDFCore object).
   void *getGUIData() { return guiData; }
 
+  // rewrite pageDict with MediaBox, CropBox and new page CTM
+  void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, 
PDFRectangle *cropBox, Object *pageCTM);
+  void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint 
numOffset);
+  // write all objects used by pageDict to outStr
+  Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset);
+  static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef 
*xref, Guint numOffset);
+  static void writeHeader(OutStream *outStr, int major, int minor);
+  static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* 
outStr, GBool incrUpdate,
+                            Guint startxRef, Ref *root, XRef *xRef, const char 
*fileName, Guint fileSize);
+
 private:
+  // insert referenced objects in XRef
+  void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint 
numOffset);
+  void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset);
+  static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, 
Guint numOffset);
+
   // Add object to current file stream and return the offset of the beginning 
of the object
-  Guint writeObject (Object *obj, Ref *ref, OutStream* outStr);
-  void writeDictionnary (Dict* dict, OutStream* outStr);
-  void writeStream (Stream* str, OutStream* outStr);
-  void writeRawStream (Stream* str, OutStream* outStr);
+  Guint writeObject (Object *obj, Ref *ref, OutStream* outStr)
+  { return writeObject(obj, ref, outStr, getXRef(), 0); }
+  void writeDictionnary (Dict* dict, OutStream* outStr)
+  { writeDictionnary(dict, outStr, getXRef(), 0); }
+  static void writeStream (Stream* str, OutStream* outStr);
+  static void writeRawStream (Stream* str, OutStream* outStr);
   void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, 
GBool incrUpdate);
-  void writeString (GooString* s, OutStream* outStr);
+  static void writeString (GooString* s, OutStream* outStr);
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
commit 33da7e270431e8e4c500e7573b3ca0dddd9f237e
Author: suzuki toshiya <[email protected]>
Date:   Sun Aug 28 22:07:38 2011 +0200

    Fix building static-linked pdftocairo

diff --git a/utils/Makefile.am b/utils/Makefile.am
index 373d049..4faddad 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -30,8 +30,9 @@ pdftocairo_SOURCES =                          \
        pdftocairo.cc                           \
        $(common)
 
-pdftocairo_LDADD = $(LDADD) $(CAIRO_LIBS) \
-       $(top_builddir)/poppler/libpoppler-cairo.la
+pdftocairo_LDADD =                             \
+       $(top_builddir)/poppler/libpoppler-cairo.la \
+       $(LDADD) $(CAIRO_LIBS)
 
 
 pdftocairo_binary = pdftocairo
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

[poppler] 3 commits - poppler/PDFDoc.cc poppler/PDFDoc.h utils/CMakeLists.txt utils/Makefile.am utils/pdfextract.cc utils/pdfmerge.cc

Reply via email to