poppler/PDFDoc.cc | 389 +++++++++++++++++++++++++++++++++++++++++++++------ poppler/PDFDoc.h | 30 +++ utils/CMakeLists.txt | 15 + utils/Makefile.am | 15 + utils/pdfextract.cc | 111 ++++++++++++++ utils/pdfmerge.cc | 176 +++++++++++++++++++++++ 6 files changed, 690 insertions(+), 46 deletions(-)
New commits: commit 1431564f3363a63a8669c8dd15970db814f4969f Author: Thomas Freitag <[email protected]> Date: Mon Aug 29 22:22:02 2011 +0200 Add pdfextract and pdfmerge See "Creating PDF with poppler ?" thread for more info diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index beeedb8..90030cd 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -102,3 +102,18 @@ target_link_libraries(pdftohtml ${common_libs}) install(TARGETS pdftohtml DESTINATION bin) install(FILES pdftohtml.1 DESTINATION share/man/man1) +# pdfextract +set(pdfextract_SOURCES ${common_srcs} + pdfextract.cc +) +add_executable(pdfextract ${pdfextract_SOURCES}) +target_link_libraries(pdfextract ${common_libs}) +install(TARGETS pdfextract DESTINATION bin) + +# pdfmerge +set(pdfmerge_SOURCES ${common_srcs} + pdfmerge.cc +) +add_executable(pdfmerge ${pdfmerge_SOURCES}) +target_link_libraries(pdfmerge ${common_libs}) +install(TARGETS pdfmerge DESTINATION bin) diff --git a/utils/Makefile.am b/utils/Makefile.am index 4faddad..30328f2 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -50,6 +50,8 @@ bin_PROGRAMS = \ pdftops \ pdftotext \ pdftohtml \ + pdfextract \ + pdfmerge \ $(pdftoppm_binary) \ $(pdftocairo_binary) @@ -102,6 +104,14 @@ pdftohtml_SOURCES = \ HtmlUtils.h \ $(common) +pdfextract_SOURCES = \ + pdfextract.cc \ + $(common) + +pdfmerge_SOURCES = \ + pdfmerge.cc \ + $(common) + # Yay, automake! It should be able to figure out that it has to dist # pdftoppm.1, but nooo. So we just add it here. diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc new file mode 100644 index 0000000..c8c4749 --- /dev/null +++ b/utils/pdfextract.cc @@ -0,0 +1,111 @@ +//======================================================================== +// +// pdfextract.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2011 Thomas Freitag <[email protected]> +// +//======================================================================== +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include "parseargs.h" +#include "goo/GooString.h" +#include "PDFDoc.h" +#include "ErrorCodes.h" + +static int firstPage = 0; +static int lastPage = 0; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to extract"}, + {"-l", argInt, &lastPage, 0, + "last page to extract"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +bool extractPages (const char *srcFileName, const char *destFileName) { + char pathName[1024]; + GooString *gfileName = new GooString (srcFileName); + PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL); + + if (!doc->isOk()) { + error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName); + return false; + } + if (doc->isEncrypted()) { + error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName); + return false; + } + + if (firstPage == 0 && lastPage == 0) { + firstPage = 1; + lastPage = doc->getNumPages(); + } + if (lastPage == 0) + lastPage = doc->getNumPages(); + if (firstPage == 0) + firstPage = 1; + for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) { + sprintf (pathName, destFileName, pageNo); + GooString *gpageName = new GooString (pathName); + int errCode = doc->savePageAs(gpageName, pageNo); + if ( errCode != errNone) { + delete gpageName; + delete gfileName; + return false; + } + delete gpageName; + } + delete gfileName; + return true; +} + +int +main (int argc, char *argv[]) +{ + Object info; + GBool ok; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs (argDesc, &argc, argv); + if (!ok || argc != 3 || printVersion || printHelp) + { + fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION); + fprintf (stderr, "%s\n", popplerCopyright); + fprintf (stderr, "%s\n", xpdfCopyright); + if (!printVersion) + { + printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>", + argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + extractPages (argv[1], argv[2]); + +err0: + + return exitCode; +} diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc new file mode 100644 index 0000000..28f7265 --- /dev/null +++ b/utils/pdfmerge.cc @@ -0,0 +1,176 @@ +//======================================================================== +// +// pdfmerge.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2011 Thomas Freitag <[email protected]> +// +//======================================================================== +#include <PDFDoc.h> +#include "parseargs.h" +#include "config.h" +#include <poppler-config.h> +#include <vector> + +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +/////////////////////////////////////////////////////////////////////////// +int main (int argc, char *argv[]) +/////////////////////////////////////////////////////////////////////////// +// Merge PDF files given by arguments 1 to argc-2 and write the result +// to the file specified by argument argc-1. +/////////////////////////////////////////////////////////////////////////// +{ + int objectsCount = 0; + Guint numOffset = 0; + std::vector<Object> pages; + std::vector<Guint> offsets; + XRef *yRef, *countRef; + FILE *f; + OutStream *outStr; + int i; + int j, rootNum; + std::vector<PDFDoc *>docs; + int majorVersion = 0; + int minorVersion = 0; + char *fileName = argv[argc - 1]; + int exitCode; + + exitCode = 99; + if (argc <= 3 || printVersion || printHelp) { + fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>", + argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + return exitCode; + } + exitCode = 0; + + for (i = 1; i < argc - 1; i++) { + GooString *gfileName = new GooString(argv[i]); + PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL); + if (doc->isOk() && !doc->isEncrypted()) { + docs.push_back(doc); + if (doc->getPDFMajorVersion() > majorVersion) { + majorVersion = doc->getPDFMajorVersion(); + minorVersion = doc->getPDFMinorVersion(); + } else if (doc->getPDFMajorVersion() == majorVersion) { + if (doc->getPDFMinorVersion() > minorVersion) { + minorVersion = doc->getPDFMinorVersion(); + } + } + } else if (doc->isOk()) { + error(-1, "Could not merge encrypted files ('%s')", argv[i]); + return -1; + } else { + error(-1, "Could not merge damaged documents ('%s')", argv[i]); + return -1; + } + } + + if (!(f = fopen(fileName, "wb"))) { + error(-1, "Could not open file '%s'", fileName); + return -1; + } + outStr = new FileOutStream(f, 0); + + yRef = new XRef(); + countRef = new XRef(); + yRef->add(0, 65535, 0, gFalse); + PDFDoc::writeHeader(outStr, majorVersion, minorVersion); + + for (i = 0; i < (int) docs.size(); i++) { + for (j = 1; j <= docs[i]->getNumPages(); j++) { + PDFRectangle *cropBox = NULL; + if (docs[i]->getCatalog()->getPage(j)->isCropped()) + cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox(); + docs[i]->replacePageDict(j, + docs[i]->getCatalog()->getPage(j)->getRotate(), + docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL); + Ref *refPage = docs[i]->getCatalog()->getPageRef(j); + Object page; + docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page); + pages.push_back(page); + offsets.push_back(numOffset); + Dict *pageDict = page.getDict(); + docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset); + } + objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset); + numOffset = yRef->getNumObjects() + 1; + } + + rootNum = yRef->getNumObjects() + 1; + yRef->add(rootNum, 0, outStr->getPos(), gTrue); + outStr->printf("%d 0 obj\n", rootNum); + outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); + outStr->printf(">>\nendobj\n"); + objectsCount++; + + yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue); + outStr->printf("%d 0 obj\n", rootNum + 1); + outStr->printf("<< /Type /Pages /Kids ["); + for (j = 0; j < (int) pages.size(); j++) + outStr->printf(" %d 0 R", rootNum + j + 2); + outStr->printf(" ] /Count %d >>\nendobj\n", pages.size()); + objectsCount++; + + for (i = 0; i < (int) pages.size(); i++) { + yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue); + outStr->printf("%d 0 obj\n", rootNum + i + 2); + outStr->printf("<< "); + Dict *pageDict = pages[i].getDict(); + for (j = 0; j < pageDict->getLength(); j++) { + if (j > 0) + outStr->printf(" "); + const char *key = pageDict->getKey(j); + Object value; + pageDict->getValNF(j, &value); + if (strcmp(key, "Parent") == 0) { + outStr->printf("/Parent %d 0 R", rootNum + 1); + } else { + outStr->printf("/%s ", key); + PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]); + } + value.free(); + } + outStr->printf(" >>\nendobj\n"); + objectsCount++; + } + Guint uxrefOffset = outStr->getPos(); + yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ ); + + Ref ref; + ref.num = rootNum; + ref.gen = 0; + PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0, + &ref, yRef, fileName, outStr->getPos()); + + outStr->close(); + fclose(f); + delete yRef; + delete countRef; + for (j = 0; j < (int) pages.size (); j++) pages[j].free(); + for (i = 0; i < (int) docs.size (); i++) delete docs[i]; + return exitCode; +} commit 8ca2f41089bc6402baf9b24428af04314c037b54 Author: Thomas Freitag <[email protected]> Date: Mon Aug 29 22:20:52 2011 +0200 Rework writing of PDF files Makes it more compatible with other PDF readers See "Creating PDF with poppler ?" thread in the mailing list for more info diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index aa52140..01d2759 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -26,6 +26,7 @@ // Copyright (C) 2010 Ilya Gorenbein <[email protected]> // Copyright (C) 2010 Srinivas Adicherla <[email protected]> // Copyright (C) 2010 Philip Lorenz <[email protected]> +// Copyright (C) 2011 Thomas Freitag <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -573,6 +574,121 @@ Hints *PDFDoc::getHints() return hints; } +int PDFDoc::savePageAs(GooString *name, int pageNo) +{ + FILE *f; + OutStream *outStr; + XRef *yRef, *countRef; + int rootNum = getXRef()->getSize() + 1; + + if (pageNo < 1 || pageNo > getNumPages()) { + error(-1, "Illegal pageNo: %d(%d)", pageNo, getNumPages() ); + return errOpenFile; + } + PDFRectangle *cropBox = NULL; + if (getCatalog()->getPage(pageNo)->isCropped()) { + cropBox = getCatalog()->getPage(pageNo)->getCropBox(); + } + replacePageDict(pageNo, + getCatalog()->getPage(pageNo)->getRotate(), + getCatalog()->getPage(pageNo)->getMediaBox(), + cropBox, NULL); + Ref *refPage = getCatalog()->getPageRef(pageNo); + Object page; + getXRef()->fetch(refPage->num, refPage->gen, &page); + + if (!(f = fopen(name->getCString(), "wb"))) { + error(-1, "Couldn't open file '%s'", name->getCString()); + return errOpenFile; + } + outStr = new FileOutStream(f,0); + + yRef = new XRef(); + countRef = new XRef(); + yRef->add(0, 65535, 0, gFalse); + writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion()); + + // get and mark optional content groups + OCGs *ocgs = getCatalog()->getOptContentConfig(); + if (ocgs != NULL) { + Object catDict, optContentProps; + getXRef()->getCatalog(&catDict); + catDict.dictLookup("OCProperties", &optContentProps); + Dict *pageDict = optContentProps.getDict(); + markPageObjects(pageDict, yRef, countRef, 0); + catDict.free(); + optContentProps.free(); + } + + Dict *pageDict = page.getDict(); + markPageObjects(pageDict, yRef, countRef, 0); + Guint objectsCount = writePageObjects(outStr, yRef, 0); + + yRef->add(rootNum,0,outStr->getPos(),gTrue); + outStr->printf("%d 0 obj\n", rootNum); + outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); + if (ocgs != NULL) { + Object catDict, optContentProps; + getXRef()->getCatalog(&catDict); + catDict.dictLookup("OCProperties", &optContentProps); + outStr->printf(" /OCProperties <<"); + Dict *pageDict = optContentProps.getDict(); + for (int n = 0; n < pageDict->getLength(); n++) { + if (n > 0) outStr->printf(" "); + const char *key = pageDict->getKey(n); + Object value; pageDict->getValNF(n, &value); + outStr->printf("/%s ", key); + writeObject(&value, NULL, outStr, getXRef(), 0); + value.free(); + } + outStr->printf(" >> "); + catDict.free(); + optContentProps.free(); + } + outStr->printf(">>\nendobj\n"); + objectsCount++; + + yRef->add(rootNum + 1,0,outStr->getPos(),gTrue); + outStr->printf("%d 0 obj\n", rootNum + 1); + outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 2); + outStr->printf("endobj\n"); + objectsCount++; + + yRef->add(rootNum + 2,0,outStr->getPos(),gTrue); + outStr->printf("%d 0 obj\n", rootNum + 2); + outStr->printf("<< "); + for (int n = 0; n < pageDict->getLength(); n++) { + if (n > 0) outStr->printf(" "); + const char *key = pageDict->getKey(n); + Object value; pageDict->getValNF(n, &value); + if (strcmp(key, "Parent") == 0) { + outStr->printf("/Parent %d 0 R", rootNum + 1); + } else { + outStr->printf("/%s ", key); + writeObject(&value, NULL, outStr, getXRef(), 0); + } + value.free(); + } + outStr->printf(" >>\nendobj\n"); + objectsCount++; + page.free(); + + Guint uxrefOffset = outStr->getPos(); + yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */); + + Ref ref; + ref.num = rootNum; + ref.gen = 0; + writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), name->getCString(), outStr->getPos()); + + outStr->close(); + fclose(f); + delete yRef; + delete countRef; + + return errNone; +} + int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) { FILE *f; OutStream *outStr; @@ -740,7 +856,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr) } -void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr) +void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset) { Object obj1; outStr->printf("<<"); @@ -749,7 +865,7 @@ void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr) GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */); outStr->printf("/%s ", keyNameToPrint->getCString()); delete keyNameToPrint; - writeObject(dict->getValNF(i, &obj1), NULL, outStr); + writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset); obj1.free(); } outStr->printf(">> "); @@ -805,18 +921,24 @@ void PDFDoc::writeString (GooString* s, OutStream* outStr) const char* c = s->getCString(); outStr->printf("("); for(int i=0; i<s->getLength(); i++) { - char unescaped = (*c)&0x000000ff; + char unescaped = *(c+i)&0x000000ff; //escape if needed - if (unescaped == '(' || unescaped == ')' || unescaped == '\\') - outStr->printf("%c", '\\'); - outStr->printf("%c", unescaped); - c++; + if (unescaped == '\r') + outStr->printf("\\r"); + else if (unescaped == '\n') + outStr->printf("\\n"); + else { + if (unescaped == '(' || unescaped == ')' || unescaped == '\\') { + outStr->printf("%c", '\\'); + } + outStr->printf("%c", unescaped); + } } outStr->printf(") "); } } -Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr) +Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef *xRef, Guint numOffset) { Array *array; Object obj1; @@ -858,13 +980,13 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr) array = obj->getArray(); outStr->printf("["); for (int i=0; i<array->getLength(); i++) { - writeObject(array->getNF(i, &obj1), NULL,outStr); + writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset); obj1.free(); } outStr->printf("] "); break; case objDict: - writeDictionnary (obj->getDict(),outStr); + writeDictionnary (obj->getDict(),outStr, xRef, numOffset); break; case objStream: { @@ -886,7 +1008,7 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr) stream->getDict()->remove("Filter"); stream->getDict()->remove("DecodeParms"); - writeDictionnary (stream->getDict(),outStr); + writeDictionnary (stream->getDict(),outStr, xRef, numOffset); writeStream (stream,outStr); obj1.free(); } else { @@ -896,23 +1018,23 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr) BaseStream *bs = fs->getBaseStream(); if (bs) { Guint streamEnd; - if (xref->getStreamEnd(bs->getStart(), &streamEnd)) { + if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) { Object val; val.initInt(streamEnd - bs->getStart()); stream->getDict()->set("Length", &val); } } } - writeDictionnary (stream->getDict(), outStr); + writeDictionnary (stream->getDict(), outStr, xRef, numOffset); writeRawStream (stream, outStr); } break; } case objRef: - outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen); + outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen); break; case objCmd: - outStr->printf("cmd\r\n"); + outStr->printf("%s\n", obj->getCmd()); break; case objError: outStr->printf("error\r\n"); @@ -932,9 +1054,12 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr) return offset; } -void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate) +void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, + OutStream* outStr, GBool incrUpdate, + Guint startxRef, Ref *root, XRef *xRef, const char *fileName, + Guint fileSize) { - Dict *trailerDict = new Dict(xref); + Dict *trailerDict = new Dict(xRef); Object obj1; obj1.initInt(uxrefSize); trailerDict->set("Size", &obj1); @@ -950,23 +1075,13 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, char buffer[256]; sprintf(buffer, "%i", (int)time(NULL)); message.append(buffer); - if (fileName) - message.append(fileName); - else - message.append("streamwithoutfilename.pdf"); - // file size - unsigned int fileSize = 0; - int c; - str->reset(); - while ((c = str->getChar()) != EOF) { - fileSize++; - } - str->close(); + message.append(fileName); + sprintf(buffer, "%i", fileSize); message.append(buffer); //info dict -- only use text string - if (xref->getDocInfo(&obj1)->isDict()) { + if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) { for(int i=0; i<obj1.getDict()->getLength(); i++) { Object obj2; obj1.getDict()->getVal(i, &obj2); @@ -985,12 +1100,12 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, //create ID array Object obj2,obj3,obj5; - obj2.initArray(xref); + obj2.initArray(xRef); if (incrUpdate) { Object obj4; //only update the second part of the array - xref->getTrailerDict()->getDict()->lookup("ID", &obj4); + xRef->getTrailerDict()->getDict()->lookup("ID", &obj4); if (!obj4.isArray()) { error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue"); } else { @@ -1010,22 +1125,23 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, trailerDict->set("ID", &obj2); } - - obj1.initRef(xref->getRootNum(), xref->getRootGen()); + obj1.initRef(root->num, root->gen); trailerDict->set("Root", &obj1); if (incrUpdate) { - obj1.initInt(getStartXRef()); + obj1.initInt(startxRef); trailerDict->set("Prev", &obj1); } - xref->getDocInfoNF(&obj5); - if (!obj5.isNull()) { - trailerDict->set("Info", &obj5); + if (!xRef->getTrailerDict()->isNone()) { + xRef->getDocInfoNF(&obj5); + if (!obj5.isNull()) { + trailerDict->set("Info", &obj5); + } } outStr->printf( "trailer\r\n"); - writeDictionnary(trailerDict, outStr); + writeDictionnary(trailerDict, outStr, xRef, 0); outStr->printf( "\r\nstartxref\r\n"); outStr->printf( "%i\r\n", uxrefOffset); outStr->printf( "%%%%EOF\r\n"); @@ -1033,6 +1149,201 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, delete trailerDict; } +void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate) +{ + char *fileNameA; + if (fileName) + fileNameA = fileName->getCString(); + else + fileNameA = "streamwithoutfilename.pdf"; + // file size + unsigned int fileSize = 0; + int c; + str->reset(); + while ((c = str->getChar()) != EOF) { + fileSize++; + } + str->close(); + Ref ref; + ref.num = getXRef()->getRootNum(); + ref.gen = getXRef()->getRootGen(); + writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize); +} + +void PDFDoc::writeHeader(OutStream *outStr, int major, int minor) +{ + outStr->printf("%%PDF-%d.%d\n", major, minor); + outStr->printf("%%\xE2\xE3\xCF\xD3\n"); +} + +void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset) +{ + Object obj1; + for (int i=0; i<dict->getLength(); i++) { + markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset); + obj1.free(); + } +} + +void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset) +{ + Array *array; + Object obj1; + + switch (obj->getType()) { + case objArray: + array = obj->getArray(); + for (int i=0; i<array->getLength(); i++) { + markObject(array->getNF(i, &obj1), xRef, countRef, numOffset); + obj1.free(); + } + break; + case objDict: + markDictionnary (obj->getDict(), xRef, countRef, numOffset); + break; + case objStream: + { + Stream *stream = obj->getStream(); + markDictionnary (stream->getDict(), xRef, countRef, numOffset); + } + break; + case objRef: + { + if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) { + if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) { + return; // already marked as free => should be replaced + } + xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue); + if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) { + xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed; + } + } + if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() || + countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) + { + countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue); + } else { + XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset); + entry->gen++; + } + Object obj1; + getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1); + markObject(&obj1, xRef, countRef, numOffset); + obj1.free(); + } + break; + default: + break; + } +} + +void PDFDoc::replacePageDict(int pageNo, int rotate, + PDFRectangle *mediaBox, + PDFRectangle *cropBox, Object *pageCTM) +{ + Ref *refPage = getCatalog()->getPageRef(pageNo); + Object page; + getXRef()->fetch(refPage->num, refPage->gen, &page); + Dict *pageDict = page.getDict(); + pageDict->remove("MediaBox"); + pageDict->remove("CropBox"); + pageDict->remove("ArtBox"); + pageDict->remove("BleedBox"); + pageDict->remove("TrimBox"); + pageDict->remove("Rotate"); + Object *mediaBoxObj = new Object(); + mediaBoxObj->initArray(getXRef()); + Object *murx = new Object(); + murx->initReal(mediaBox->x1); + Object *mury = new Object(); + mury->initReal(mediaBox->y1); + Object *mllx = new Object(); + mllx->initReal(mediaBox->x2); + Object *mlly = new Object(); + mlly->initReal(mediaBox->y2); + mediaBoxObj->arrayAdd(murx); + mediaBoxObj->arrayAdd(mury); + mediaBoxObj->arrayAdd(mllx); + mediaBoxObj->arrayAdd(mlly); + pageDict->add(copyString("MediaBox"), mediaBoxObj); + if (cropBox != NULL) { + Object *cropBoxObj = new Object(); + cropBoxObj->initArray(getXRef()); + Object *curx = new Object(); + curx->initReal(cropBox->x1); + Object *cury = new Object(); + cury->initReal(cropBox->y1); + Object *cllx = new Object(); + cllx->initReal(cropBox->x2); + Object *clly = new Object(); + clly->initReal(cropBox->y2); + cropBoxObj->arrayAdd(curx); + cropBoxObj->arrayAdd(cury); + cropBoxObj->arrayAdd(cllx); + cropBoxObj->arrayAdd(clly); + pageDict->add(copyString("CropBox"), cropBoxObj); + } + Object *rotateObj = new Object(); + rotateObj->initInt(rotate); + pageDict->add(copyString("Rotate"), rotateObj); + if (pageCTM != NULL) { + Object *contents = new Object(); + Ref cmRef = getXRef()->addIndirectObject(pageCTM); + Object *ref = new Object(); + ref->initRef(cmRef.num, cmRef.gen); + pageDict->lookupNF("Contents", contents); + Object *newContents = new Object(); + newContents->initArray(getXRef()); + if (contents->getType() == objRef) { + newContents->arrayAdd(ref); + newContents->arrayAdd(contents); + } else { + newContents->arrayAdd(ref); + for (int i = 0; i < contents->arrayGetLength(); i++) { + Object *contentEle = new Object(); + contents->arrayGetNF(i, contentEle); + newContents->arrayAdd(contentEle); + } + } + pageDict->remove("Contents"); + pageDict->add(copyString("Contents"), newContents); + } + getXRef()->setModifiedObject(&page, *refPage); + page.free(); +} + +void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset) +{ + for (int n = 0; n < pageDict->getLength(); n++) { + const char *key = pageDict->getKey(n); + Object value; pageDict->getValNF(n, &value); + if (strcmp(key, "Parent") != 0) { + markObject(&value, xRef, countRef, numOffset); + } + value.free(); + } +} + +Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset) +{ + Guint objectsCount = 0; //count the number of objects in the XRef(s) + + for (int n = numOffset; n < xRef->getNumObjects(); n++) { + if (xRef->getEntry(n)->type != xrefEntryFree) { + Object obj; + Ref ref; + ref.num = n; + ref.gen = xRef->getEntry(n)->gen; + objectsCount++; + getXRef()->fetch(ref.num - numOffset, ref.gen, &obj); + Guint offset = writeObject(&obj, &ref, outStr, xRef, numOffset); + xRef->add(ref.num, ref.gen, offset, gTrue); + obj.free(); + } + } + return objectsCount; +} + #ifndef DISABLE_OUTLINE Outline *PDFDoc::getOutline() { diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index a7113c8..92cee78 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -22,6 +22,7 @@ // Copyright (C) 2009 Kovid Goyal <[email protected]> // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2010 Srinivas Adicherla <[email protected]> +// Copyright (C) 2011 Thomas Freitag <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -219,6 +220,8 @@ public: //Return the PDF ID in the trailer dictionary (if any). GBool getID(GooString *permanent_id, GooString *update_id); + // Save one page with another name. + int savePageAs(GooString *name, int pageNo); // Save this file with another name. int saveAs(GooString *name, PDFWriteMode mode=writeStandard); // Save this file in the given output stream. @@ -231,14 +234,31 @@ public: // Return a pointer to the GUI (XPDFCore or WinPDFCore object). void *getGUIData() { return guiData; } + // rewrite pageDict with MediaBox, CropBox and new page CTM + void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, PDFRectangle *cropBox, Object *pageCTM); + void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset); + // write all objects used by pageDict to outStr + Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset); + static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef *xref, Guint numOffset); + static void writeHeader(OutStream *outStr, int major, int minor); + static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate, + Guint startxRef, Ref *root, XRef *xRef, const char *fileName, Guint fileSize); + private: + // insert referenced objects in XRef + void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset); + void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset); + static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset); + // Add object to current file stream and return the offset of the beginning of the object - Guint writeObject (Object *obj, Ref *ref, OutStream* outStr); - void writeDictionnary (Dict* dict, OutStream* outStr); - void writeStream (Stream* str, OutStream* outStr); - void writeRawStream (Stream* str, OutStream* outStr); + Guint writeObject (Object *obj, Ref *ref, OutStream* outStr) + { return writeObject(obj, ref, outStr, getXRef(), 0); } + void writeDictionnary (Dict* dict, OutStream* outStr) + { writeDictionnary(dict, outStr, getXRef(), 0); } + static void writeStream (Stream* str, OutStream* outStr); + static void writeRawStream (Stream* str, OutStream* outStr); void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate); - void writeString (GooString* s, OutStream* outStr); + static void writeString (GooString* s, OutStream* outStr); void saveIncrementalUpdate (OutStream* outStr); void saveCompleteRewrite (OutStream* outStr); commit 33da7e270431e8e4c500e7573b3ca0dddd9f237e Author: suzuki toshiya <[email protected]> Date: Sun Aug 28 22:07:38 2011 +0200 Fix building static-linked pdftocairo diff --git a/utils/Makefile.am b/utils/Makefile.am index 373d049..4faddad 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -30,8 +30,9 @@ pdftocairo_SOURCES = \ pdftocairo.cc \ $(common) -pdftocairo_LDADD = $(LDADD) $(CAIRO_LIBS) \ - $(top_builddir)/poppler/libpoppler-cairo.la +pdftocairo_LDADD = \ + $(top_builddir)/poppler/libpoppler-cairo.la \ + $(LDADD) $(CAIRO_LIBS) pdftocairo_binary = pdftocairo _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
