poppler/Catalog.cc | 81 ++++++++++++++++++++++++++++++++++++ poppler/Catalog.h | 8 +++ poppler/Dict.cc | 13 +++++ poppler/Dict.h | 1 poppler/FileSpec.cc | 28 ++++++++++++ poppler/FileSpec.h | 2 poppler/PDFDoc.cc | 6 ++ poppler/Stream.cc | 1 poppler/Stream.h | 5 ++ utils/CMakeLists.txt | 9 ++++ utils/pdfattach.1 | 60 +++++++++++++++++++++++++++ utils/pdfattach.cc | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 326 insertions(+)
New commits: commit 4402e335d6a907c3eb73708a6cd50061625d431f Author: Albert Astals Cid <[email protected]> Date: Sun Feb 10 10:00:32 2019 +0100 Add new util: pdfattach diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc index e958ad12..4955088a 100644 --- a/poppler/Catalog.cc +++ b/poppler/Catalog.cc @@ -432,6 +432,87 @@ FileSpec *Catalog::embeddedFile(int i) return embeddedFile; } +bool Catalog::hasEmbeddedFile(const std::string &fileName) +{ + NameTree *ef = getEmbeddedFileNameTree(); + for (int i = 0; i < ef->numEntries(); ++i) { + if (fileName == ef->getName(i)->toStr()) + return true; + } + return false; +} + +void Catalog::addEmbeddedFile(GooFile *file, const std::string &fileName) +{ + catalogLocker(); + + Object fileSpecObj = FileSpec::newFileSpecObject(xref, file, fileName); + const Ref fileSpecRef = xref->addIndirectObject(&fileSpecObj); + + Object catDict = xref->getCatalog(); + Ref namesObjRef; + Object namesObj = catDict.getDict()->lookup("Names", &namesObjRef); + if (!namesObj.isDict()) { + // Need to create the names Dict + catDict.dictSet("Names", Object(new Dict(xref))); + namesObj = catDict.getDict()->lookup("Names"); + + // Trigger getting the names dict again when needed + names = Object(); + } + + Dict *namesDict = namesObj.getDict(); + + // We create a new EmbeddedFiles nametree, this replaces the existing one (if any), but it's not a problem + Object embeddedFilesObj = Object(new Dict(xref)); + const Ref embeddedFilesRef = xref->addIndirectObject(&embeddedFilesObj); + + Array *embeddedFilesNamesArray = new Array(xref); + + // This flattens out the existing EmbeddedFiles nametree (if any), should not be a problem + NameTree *ef = getEmbeddedFileNameTree(); + bool fileAlreadyAdded = false; + for (int i = 0; i < ef->numEntries(); ++i) { + GooString *efNameI = ef->getName(i); + + // we need to add the file if it has not been added yet and the name is smaller or equal lexicographically + // than the current item + const bool sameFileName = fileName == efNameI->toStr(); + const bool addFile = !fileAlreadyAdded && (sameFileName || fileName < efNameI->toStr()); + if (addFile) { + // If the new name is smaller lexicographically than an existing file add it in its correct position + embeddedFilesNamesArray->add(Object(new GooString(fileName))); + embeddedFilesNamesArray->add(Object(fileSpecRef)); + fileAlreadyAdded = true; + } + if (sameFileName) { + // If the new name is the same lexicographically than an existing file then don't add the existing file (i.e. replace) + continue; + } + embeddedFilesNamesArray->add(Object(efNameI->copy())); + embeddedFilesNamesArray->add(ef->getValue(i)->copy()); + } + + if (!fileAlreadyAdded) { + // The new file is bigger lexicographically than the existing ones + embeddedFilesNamesArray->add(Object(new GooString(fileName))); + embeddedFilesNamesArray->add(Object(fileSpecRef)); + } + + embeddedFilesObj.dictSet("Names", Object(embeddedFilesNamesArray)); + namesDict->set("EmbeddedFiles", Object(embeddedFilesRef)); + + if (namesObjRef.num != 0) { + xref->setModifiedObject(&namesObj, namesObjRef); + } else { + xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() }); + } + + // recreate Nametree on next call that uses it + delete embeddedFileNameTree; + embeddedFileNameTree = nullptr; +} + GooString *Catalog::getJS(int i) { Object obj; diff --git a/poppler/Catalog.h b/poppler/Catalog.h index 97ac35a0..f4c3fe6c 100644 --- a/poppler/Catalog.h +++ b/poppler/Catalog.h @@ -178,6 +178,14 @@ public: // Get the i'th file embedded (at the Document level) in the document FileSpec *embeddedFile(int i); + // Is there an embedded file with the given name? + bool hasEmbeddedFile(const std::string &fileName); + + // Adds and embeddedFile + // If there is already an existing embedded file with the given fileName + // it gets replaced, if that's not what you want check hasEmbeddedFile first + void addEmbeddedFile(GooFile *file, const std::string &fileName); + // Get the number of javascript scripts int numJS() { return getJSNameTree()->numEntries(); } GooString *getJSName(int i) { return getJSNameTree()->getName(i); } diff --git a/poppler/Dict.cc b/poppler/Dict.cc index 383f7fd4..f1d6c6e1 100644 --- a/poppler/Dict.cc +++ b/poppler/Dict.cc @@ -168,6 +168,19 @@ Object Dict::lookup(const char *key, int recursion) const { return Object(objNull); } +Object Dict::lookup(const char *key, Ref *returnRef, int recursion) const { + if (const auto *entry = find(key)) { + if (entry->second.getType() == objRef) { + *returnRef = entry->second.getRef(); + } else { + *returnRef = { 0, 0 }; + } + return entry->second.fetch(xref, recursion); + } + *returnRef = { 0, 0 }; + return Object(objNull); +} + const Object &Dict::lookupNF(const char *key) const { if (const auto *entry = find(key)) { return entry->second; diff --git a/poppler/Dict.h b/poppler/Dict.h index db381310..13da4822 100644 --- a/poppler/Dict.h +++ b/poppler/Dict.h @@ -76,6 +76,7 @@ public: // Look up an entry and return the value. Returns a null object // if <key> is not in the dictionary. Object lookup(const char *key, int recursion = 0) const; + Object lookup(const char *key, Ref *returnRef, int recursion = 0) const; const Object &lookupNF(const char *key) const; bool lookupInt(const char *key, const char *alt_key, int *value) const; diff --git a/poppler/FileSpec.cc b/poppler/FileSpec.cc index 463e5514..be262ad4 100644 --- a/poppler/FileSpec.cc +++ b/poppler/FileSpec.cc @@ -29,6 +29,7 @@ #include <config.h> #include "FileSpec.h" +#include "XRef.h" #include "goo/gfile.h" EmbFile::EmbFile(Object &&efStream) @@ -165,6 +166,33 @@ EmbFile *FileSpec::getEmbeddedFile() return embFile; } +Object FileSpec::newFileSpecObject(XRef *xref, GooFile *file, const std::string &fileName) +{ + Object paramsDict = Object(new Dict(xref)); + paramsDict.dictSet("Size", Object(file->size())); + + // No Subtype in the embedded file stream dictionary for now + Object streamDict = Object(new Dict(xref)); + streamDict.dictSet("Length", Object(file->size())); + streamDict.dictSet("Params", std::move(paramsDict)); + + FileStream *fStream = new FileStream(file, 0, false, file->size(), std::move(streamDict)); + fStream->setNeedsEncryptionOnSave(true); + Stream *stream = fStream; + Object streamObj = Object(stream); + const Ref streamRef = xref->addIndirectObject(&streamObj); + + Dict *efDict = new Dict(xref); + efDict->set("F", Object(streamRef)); + + Dict *fsDict = new Dict(xref); + fsDict->set("Type", Object(objName, "Filespec")); + fsDict->set("UF", Object(new GooString(fileName))); + fsDict->set("EF", Object(efDict)); + + return Object(fsDict); +} + GooString *FileSpec::getFileNameForPlatform() { if (platformFileName) diff --git a/poppler/FileSpec.h b/poppler/FileSpec.h index a4fcc725..f205ac32 100644 --- a/poppler/FileSpec.h +++ b/poppler/FileSpec.h @@ -62,6 +62,8 @@ public: const GooString *getDescription() const { return desc; } EmbFile *getEmbeddedFile(); + static Object newFileSpecObject(XRef *xref, GooFile *file, const std::string &fileName); + private: bool ok; diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 75f169e8..26842f84 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -1469,6 +1469,12 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, unsigned i writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); writeStream (stream,outStr); delete encStream; + } else if (fileKey != nullptr && stream->getKind() == strFile && static_cast<FileStream*>(stream)->getNeedsEncryptionOnSave()) { + EncryptStream *encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen); + encStream->setAutoDelete(false); + writeDictionnary (encStream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); + writeStream (encStream, outStr); + delete encStream; } else { //raw stream copy FilterStream *fs = dynamic_cast<FilterStream*>(stream); diff --git a/poppler/Stream.cc b/poppler/Stream.cc index a41435ab..99ccbd22 100644 --- a/poppler/Stream.cc +++ b/poppler/Stream.cc @@ -759,6 +759,7 @@ FileStream::FileStream(GooFile* fileA, Goffset startA, bool limitedA, bufPos = start; savePos = 0; saved = false; + needsEncryptionOnSave = false; } FileStream::~FileStream() { diff --git a/poppler/Stream.h b/poppler/Stream.h index f6aa41cd..851d2fe3 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -470,6 +470,9 @@ public: int getUnfilteredChar () override { return getChar(); } void unfilteredReset () override { reset(); } + bool getNeedsEncryptionOnSave() const { return needsEncryptionOnSave; } + void setNeedsEncryptionOnSave(bool needsEncryptionOnSaveA) { needsEncryptionOnSave = needsEncryptionOnSaveA; } + private: bool fillBuf(); @@ -508,6 +511,8 @@ private: Goffset bufPos; Goffset savePos; bool saved; + bool needsEncryptionOnSave; // Needed for FileStreams that point to "external" files + // and thus when saving we can't do a raw copy }; //------------------------------------------------------------------------ diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 3516479e..3a1e0d1a 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -57,6 +57,15 @@ target_link_libraries(pdfdetach ${common_libs}) install(TARGETS pdfdetach DESTINATION bin) install(FILES pdfdetach.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) +# pdfdetach +set(pdfattach_SOURCES ${common_srcs} + pdfattach.cc +) +add_executable(pdfattach ${pdfattach_SOURCES}) +target_link_libraries(pdfattach ${common_libs}) +install(TARGETS pdfattach DESTINATION bin) +install(FILES pdfattach.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) + # pdffonts set(pdffonts_SOURCES ${common_srcs} pdffonts.cc diff --git a/utils/pdfattach.1 b/utils/pdfattach.1 new file mode 100644 index 00000000..c9589a78 --- /dev/null +++ b/utils/pdfattach.1 @@ -0,0 +1,60 @@ +.\" Copyright 2019 Albert Astals Cid +.TH pdfattach 1 "10 Febuary 2019" +.SH NAME +pdfattach \- Portable Document Format (PDF) document embedded file +creator (version 3.03) +.SH SYNOPSIS +.B pdfattach +[options] +.I input-PDF-file file-to-attach output-PDF-file +.SH DESCRIPTION +.B Pdfattach +adds a new embedded file (attachment) to an existing Portable +Document Format (PDF) file. +.SH OPTIONS +.TP +.B \-replace +Replace embedded file with same name (if it exists) +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +.TP +0 +No error. +.TP +1 +Error opening input PDF file. +.TP +2 +Error opening file to attach. +.TP +3 +Output file already exists. +.TP +3 +There is already an attached file with that name. +.TP +5 +Error saving the output file. +.SH AUTHOR +The pdfattach software and documentation are copyright 2019 The Poppler developers +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/utils/pdfattach.cc b/utils/pdfattach.cc new file mode 100644 index 00000000..e4f40a32 --- /dev/null +++ b/utils/pdfattach.cc @@ -0,0 +1,112 @@ +//======================================================================== +// +// pdfattach.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2019 Albert Astals Cid <[email protected]> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include "gbasename.h" +#include "parseargs.h" +#include "GlobalParams.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "Error.h" +#include "ErrorCodes.h" +#include "Win32Console.h" + +static bool doReplace = false; +static bool printVersion = false; +static bool printHelp = false; + +static ArgDesc argDesc[] = { + {"-replace", argFlag, &doReplace, 0, + "replace embedded file with same name (if it exists)"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + { } +}; + +static bool fileExists(const char *filePath) +{ + FILE *f = openFile(filePath, "r"); + if (f != nullptr) { + fclose(f); + return true; + } + return false; +} + +int main(int argc, char *argv[]) { + Win32Console win32Console(&argc, &argv); + + // parse args + const bool ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc != 4 || printVersion || printHelp) { + fprintf(stderr, "pdfattach version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfattach", "<input-PDF-file> <file-to-attach> <output-PDF-file>", argDesc); + } + return 99; + } + const GooString pdfFileName(argv[1]); + const GooString attachFilePath(argv[2]); + + // init GlobalParams + auto gp = std::make_unique<GlobalParams>(); + globalParams = gp.get(); + + // open PDF file + std::unique_ptr<PDFDoc> doc(PDFDocFactory().createPDFDoc(pdfFileName, nullptr, nullptr)); + + if (!doc->isOk()) { + fprintf(stderr, "Couldn't open %s\n", pdfFileName.c_str()); + return 1; + } + + std::unique_ptr<GooFile> attachFile(GooFile::open(&attachFilePath)); + if (!attachFile) { + fprintf(stderr, "Couldn't open %s\n", attachFilePath.c_str()); + return 2; + } + + if (fileExists(argv[3])) { + fprintf(stderr, "File %s already exists.\n", argv[3]); + return 3; + } + + const std::string attachFileName = gbasename(attachFilePath.c_str()); + + if (!doReplace && doc->getCatalog()->hasEmbeddedFile(attachFileName)) { + fprintf(stderr, "There is already an embedded file named %s.\n", attachFileName.c_str()); + return 4; + } + + doc->getCatalog()->addEmbeddedFile(attachFile.get(), attachFileName); + + const GooString outputPdfFilePath(argv[3]); + const int saveResult = doc->saveAs(&outputPdfFilePath); + if (saveResult != errNone) { + fprintf(stderr, "Couldn't save the file properly.\n"); + return 5; + } + + return 0; +} _______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
