poppler/PDFDoc.cc | 132 +++++++++++++++++++++++++++++ poppler/PDFDoc.h | 46 ++++++++++ utils/pdfinfo.cc | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 416 insertions(+)
New commits: commit 45f0f6d21d51c0408fe1d876f18ef05489e69bc0 Author: Evangelos Rigas <[email protected]> Date: Mon Aug 6 10:57:47 2018 +0100 [utils] Add PDF subtype to pdfinfo If the document is compliant with PDF A, E, VT, UA or X standard print PDF subtype version, title, subtitle and explain the part and conformance levels. diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index 50042393..91423ebd 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -24,6 +24,7 @@ // Copyright (C) 2013 Suzuki Toshiya <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich // Copyright (C) 2018 Adam Reichold <[email protected]> +// Copyright (C) 2018 Evangelos Rigas <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -424,6 +425,241 @@ static void printDestinations(PDFDoc *doc, UnicodeMap *uMap) { } } +static void printPdfSubtype(PDFDoc *doc, UnicodeMap *uMap) { + const Object info = doc->getDocInfo(); + if (info.isDict()) { + const PDFSubtype pdftype = doc->getPDFSubtype(); + + if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) { + return; + } + + std::unique_ptr<GooString> part; + std::unique_ptr<GooString> abbr; + std::unique_ptr<GooString> standard; + std::unique_ptr<GooString> typeExp; + std::unique_ptr<GooString> confExp; + + // Form title from PDFSubtype + switch (pdftype) + { + case subtypePDFA: + printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype: ", uMap); + typeExp.reset( new GooString("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)") ); + standard.reset( new GooString("ISO 19005") ); + abbr.reset( new GooString("PDF/A") ); + break; + case subtypePDFE: + printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype: ", uMap); + typeExp.reset( new GooString("ISO 24517 - Engineering document format using PDF (PDF/E)") ); + standard.reset( new GooString("ISO 24517") ); + abbr.reset( new GooString("PDF/E") ); + break; + case subtypePDFUA: + printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype: ", uMap); + typeExp.reset( new GooString("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)") ); + standard.reset( new GooString("ISO 14289") ); + abbr.reset( new GooString("PDF/UA") ); + break; + case subtypePDFVT: + printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype: ", uMap); + typeExp.reset( new GooString("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)") ); + standard.reset( new GooString("ISO 16612") ); + abbr.reset( new GooString("PDF/VT") ); + break; + case subtypePDFX: + printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype: ", uMap); + typeExp.reset( new GooString("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)") ); + standard.reset( new GooString("ISO 15930") ); + abbr.reset( new GooString("PDF/X") ); + break; + case subtypeNone: + case subtypeNull: + default: + return; + } + + // Form the abbreviation from PDFSubtypePart and PDFSubtype + const PDFSubtypePart subpart = doc->getPDFSubtypePart(); + switch (pdftype) { + case subtypePDFX: + switch (subpart) { + case subtypePart1: + abbr->append("-1:2001"); + break; + case subtypePart2: + abbr->append("-2"); + break; + case subtypePart3: + abbr->append("-3:2002"); + break; + case subtypePart4: + abbr->append("-1:2003"); + break; + case subtypePart5: + abbr->append("-2"); + break; + case subtypePart6: + abbr->append("-3:2003"); + break; + case subtypePart7: + abbr->append("-4"); + break; + case subtypePart8: + abbr->append("-5"); + break; + default: + break; + } + break; + case subtypeNone: + case subtypeNull: + break; + default: + abbr->appendf("-{0:d}", subpart); + break; + } + + // Form standard from PDFSubtypePart + switch (subpart) { + case subtypePartNone: + case subtypePartNull: + break; + default: + standard->appendf("-{0:d}", subpart); + break; + } + + // Form the subtitle from PDFSubtypePart and PDFSubtype + switch (pdftype) { + case subtypePDFA: + switch (subpart) { + case subtypePart1: + part.reset( new GooString("Use of PDF 1.4") ); + break; + case subtypePart2: + part.reset( new GooString("Use of ISO 32000-1") ); + break; + case subtypePart3: + part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") ); + break; + default: + break; + } + break; + case subtypePDFE: + switch (subpart) { + case subtypePart1: + part.reset( new GooString("Use of PDF 1.6") ); + break; + default: + break; + } + break; + case subtypePDFUA: + switch (subpart) { + case subtypePart1: + part.reset( new GooString("Use of ISO 32000-1") ); + break; + case subtypePart2: + part.reset( new GooString("Use of ISO 32000-2") ); + break; + case subtypePart3: + part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") ); + break; + default: + break; + } + break; + case subtypePDFVT: + switch (subpart) { + case subtypePart1: + part.reset( new GooString("Using PPML 2.1 and PDF 1.4") ); + break; + case subtypePart2: + part.reset( new GooString("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)") ); + break; + case subtypePart3: + part.reset( new GooString("Using PDF/X-6 (PDF/VT-3)") ); + break; + default: + break; + } + break; + case subtypePDFX: + switch (subpart) { + case subtypePart1: + part.reset( new GooString("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)") ); + break; + case subtypePart3: + part.reset( new GooString("Complete exchange suitable for colour-managed workflows (PDF/X-3)") ); + break; + case subtypePart4: + part.reset( new GooString("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)") ); + break; + case subtypePart5: + part.reset( new GooString("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]") ); + break; + case subtypePart6: + part.reset( new GooString("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)") ); + break; + case subtypePart7: + part.reset( new GooString("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6") ); + break; + case subtypePart8: + part.reset( new GooString("Partial exchange of printing data using PDF 1.6 (PDF/X-5)") ); + break; + default: + break; + } + break; + default: + break; + } + + // Form Conformance explanation from PDFSubtypeConformance + switch (doc->getPDFSubtypeConformance()) + { + case subtypeConfA: + confExp.reset( new GooString("Level A, Accessible") ); + break; + case subtypeConfB: + confExp.reset( new GooString("Level B, Basic") ); + break; + case subtypeConfG: + confExp.reset( new GooString("Level G, External graphical content") ); + break; + case subtypeConfN: + confExp.reset( new GooString("Level N, External ICC profile") ); + break; + case subtypeConfP: + confExp.reset( new GooString("Level P, Embedded ICC profile") ); + break; + case subtypeConfPG: + confExp.reset( new GooString("Level PG, Embedded ICC profile and external graphical content") ); + break; + case subtypeConfU: + confExp.reset( new GooString("Level U, Unicode support") ); + break; + case subtypeConfNone: + case subtypeConfNull: + default: + confExp.reset(); + break; + } + + printf(" Title: %s\n",typeExp->getCString()); + printf(" Abbreviation: %s\n", abbr->getCString()); + if (part.get()) + printf(" Subtitle: Part %d: %s\n", subpart, part->getCString()); + else + printf(" Subtitle: Part %d\n", subpart); + printf(" Standard: %s-%d\n", typeExp->toStr().substr(0,9).c_str(), subpart); + if (confExp.get()) + printf(" Conformance: %s\n", confExp->getCString()); + } +} + static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool multiPage) { Page *page; char buf[256]; @@ -596,6 +832,8 @@ static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool m // print PDF version printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion()); + + printPdfSubtype(doc, uMap); } int main(int argc, char *argv[]) { commit 98d1b3dcc2c0530c12fb4422067c529ab375c680 Author: Evangelos Rigas <[email protected]> Date: Wed Aug 22 10:51:12 2018 +0300 [core] Add support for PDF subtype property Parse /GTS_PDF(A,E,UA,VT,X)Version from the PDF Information Dictionary into three enums: PDFSubtype, PDFSubtypePart, and PDFSubtypeConformance. diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 0ee0b50e..cb8fd0d7 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -40,6 +40,7 @@ // Copyright (C) 2018 Ben Timby <[email protected]> // Copyright (C) 2018 Evangelos Foutras <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich +// Copyright (C) 2018 Evangelos Rigas <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -62,6 +63,7 @@ #include <stddef.h> #include <string.h> #include <time.h> +#include <regex> #include <sys/stat.h> #include "goo/glibc.h" #include "goo/gstrtod.h" @@ -318,6 +320,9 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) { } } + // Extract PDF Subtype information + extractPDFSubtype(); + // done return gTrue; } @@ -482,6 +487,133 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) return ret; } +static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, GooString *pdfSubtypeVersion) { + const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?"); + std::smatch match; + std::string pdfsubver = pdfSubtypeVersion->toStr(); + PDFSubtypePart subtypePart = subtypePartNone; + + if (std::regex_search(pdfsubver, match, regex)) { + int date = 0; + const int part = std::stoi(match.str(1)); + + if (match[2].matched) { + date = std::stoi(match.str(2)); + } + + switch (subtype) { + case subtypePDFX: + switch (part) { + case 1: + switch (date) { + case 2001: + default: + subtypePart = subtypePart1; + break; + case 2003: + subtypePart = subtypePart4; + break; + } + break; + case 2: + subtypePart = subtypePart5; + break; + case 3: + switch (date) { + case 2002: + default: + subtypePart = subtypePart3; + break; + case 2003: + subtypePart = subtypePart6; + break; + } + break; + case 4: + subtypePart = subtypePart7; + break; + case 5: + subtypePart = subtypePart8; + break; + } + break; + default: + subtypePart = (PDFSubtypePart)part; + break; + + } + } + + return subtypePart; +} + +static PDFSubtypeConformance pdfConformanceFromString(GooString *pdfSubtypeVersion) { + const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)"); + std::smatch match; + const std::string pdfsubver = pdfSubtypeVersion->toStr(); + PDFSubtypeConformance pdfConf = subtypeConfNone; + + // match contains the PDF conformance (A, B, G, N, P, PG or U) + if (std::regex_search(pdfsubver, match, regex)) { + GooString *conf = new GooString(match.str(1)); + // Convert to lowercase as the conformance may appear in both cases + conf->lowerCase(); + if (conf->cmp("a")==0) { + pdfConf = subtypeConfA; + } else if (conf->cmp("b")==0) { + pdfConf = subtypeConfB; + } else if (conf->cmp("g")==0) { + pdfConf = subtypeConfG; + } else if (conf->cmp("n")==0) { + pdfConf = subtypeConfN; + } else if (conf->cmp("p")==0) { + pdfConf = subtypeConfP; + } else if (conf->cmp("pg")==0) { + pdfConf = subtypeConfPG; + } else if (conf->cmp("u")==0) { + pdfConf = subtypeConfU; + } else { + pdfConf = subtypeConfNone; + } + delete conf; + } + + return pdfConf; +} + +void PDFDoc::extractPDFSubtype() { + pdfSubtype = subtypeNull; + pdfPart = subtypePartNull; + pdfConformance = subtypeConfNull; + + GooString *pdfSubtypeVersion = nullptr; + // Find PDF InfoDict subtype key if any + if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) { + pdfSubtype = subtypePDFA; + } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) { + pdfSubtype = subtypePDFE; + } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) { + pdfSubtype = subtypePDFUA; + } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) { + pdfSubtype = subtypePDFVT; + } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) { + pdfSubtype = subtypePDFX; + } else { + pdfSubtype = subtypeNone; + pdfPart = subtypePartNone; + pdfConformance = subtypeConfNone; + return; + } + + // Extract part from version string + pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion); + + // Extract conformance from version string + pdfConformance = pdfConformanceFromString(pdfSubtypeVersion); + + delete pdfSubtypeVersion; +} + std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets() { int num_pages = getNumPages(); diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index 1678d167..3353db74 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -31,6 +31,7 @@ // Copyright (C) 2015 André Esser <[email protected]> // Copyright (C) 2016 Jakub Alba <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich +// Copyright (C) 2018 Evangelos Rigas <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -74,6 +75,41 @@ enum PDFWriteMode { writeForceIncremental }; +enum PDFSubtype { + subtypeNull, + subtypePDFA, + subtypePDFE, + subtypePDFUA, + subtypePDFVT, + subtypePDFX, + subtypeNone +}; + +enum PDFSubtypePart { + subtypePartNull, + subtypePart1, + subtypePart2, + subtypePart3, + subtypePart4, + subtypePart5, + subtypePart6, + subtypePart7, + subtypePart8, + subtypePartNone +}; + +enum PDFSubtypeConformance { + subtypeConfNull, + subtypeConfA, + subtypeConfB, + subtypeConfG, + subtypeConfN, + subtypeConfP, + subtypeConfPG, + subtypeConfU, + subtypeConfNone +}; + //------------------------------------------------------------------------ // PDFDoc //------------------------------------------------------------------------ @@ -273,6 +309,11 @@ public: GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); } GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); } + // Return the PDF subtype, part, and conformance + PDFSubtype getPDFSubtype() const { return pdfSubtype; } + PDFSubtypePart getPDFSubtypePart() const { return pdfPart; } + PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; } + // Return the PDF version specified by the file. int getPDFMajorVersion() { return pdfMajorVersion; } int getPDFMinorVersion() { return pdfMinorVersion; } @@ -346,6 +387,8 @@ private: GBool checkFooter(); void checkHeader(); GBool checkEncryption(GooString *ownerPassword, GooString *userPassword); + void extractPDFSubtype(); + // Get the offset of the start xref table. Goffset getStartXRef(GBool tryingToReconstruct = gFalse); // Get the offset of the entries in the main XRef table of a @@ -365,6 +408,9 @@ private: void *guiData; int pdfMajorVersion; int pdfMinorVersion; + PDFSubtype pdfSubtype; + PDFSubtypePart pdfPart; + PDFSubtypeConformance pdfConformance; Linearization *linearization; // linearizationState = 0: unchecked // linearizationState = 1: checked and valid _______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
