poppler/Dict.cc | 4 ++-- poppler/Dict.h | 2 +- poppler/Object.cc | 4 ++-- poppler/Object.h | 8 ++++---- poppler/Parser.cc | 32 +++++++++++++++----------------- poppler/Parser.h | 10 +++------- poppler/XRef.cc | 33 +++++++-------------------------- poppler/XRef.h | 2 +- 8 files changed, 35 insertions(+), 60 deletions(-)
New commits: commit 743f70f594bf3c9a58d1ff0738b9a2bc3ea03382 Author: Albert Astals Cid <[email protected]> Date: Tue Dec 6 20:27:03 2011 +0100 xpdf303: Use xpdf method against recursion while parsing Ours detected loops correctly, but not "valid" files containing lots of arrays one inside the other [[[[[[[[[[[[[[[[[[[ So go to this more crude "fix" used in xpdf diff --git a/poppler/Dict.cc b/poppler/Dict.cc index d0d4b85..2615fde 100644 --- a/poppler/Dict.cc +++ b/poppler/Dict.cc @@ -193,10 +193,10 @@ GBool Dict::is(const char *type) { return (e = find("Type")) && e->val.isName(type); } -Object *Dict::lookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums) { +Object *Dict::lookup(const char *key, Object *obj, int recursion) { DictEntry *e; - return (e = find(key)) ? e->val.fetch(xref, obj, fetchOriginatorNums) : obj->initNull(); + return (e = find(key)) ? e->val.fetch(xref, obj, recursion) : obj->initNull(); } Object *Dict::lookupNF(const char *key, Object *obj) { diff --git a/poppler/Dict.h b/poppler/Dict.h index a589377..897f221 100644 --- a/poppler/Dict.h +++ b/poppler/Dict.h @@ -72,7 +72,7 @@ public: // Look up an entry and return the value. Returns a null object // if <key> is not in the dictionary. - Object *lookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums = NULL); + Object *lookup(const char *key, Object *obj, int recursion = 0); Object *lookupNF(const char *key, Object *obj); GBool lookupInt(const char *key, const char *alt_key, int *value); diff --git a/poppler/Object.cc b/poppler/Object.cc index 84b5583..1cedba4 100644 --- a/poppler/Object.cc +++ b/poppler/Object.cc @@ -115,9 +115,9 @@ Object *Object::copy(Object *obj) { return obj; } -Object *Object::fetch(XRef *xref, Object *obj, std::set<int> *fetchOriginatorNums) { +Object *Object::fetch(XRef *xref, Object *obj, int recursion) { return (type == objRef && xref) ? - xref->fetch(ref.num, ref.gen, obj, fetchOriginatorNums) : copy(obj); + xref->fetch(ref.num, ref.gen, obj, recursion) : copy(obj); } void Object::free() { diff --git a/poppler/Object.h b/poppler/Object.h index 1b58037..a67b403 100644 --- a/poppler/Object.h +++ b/poppler/Object.h @@ -154,7 +154,7 @@ public: // If object is a Ref, fetch and return the referenced object. // Otherwise, return a copy of the object. - Object *fetch(XRef *xref, Object *obj, std::set<int> *fetchOriginatorNums = NULL); + Object *fetch(XRef *xref, Object *obj, int recursion = 0); // Free object contents. void free(); @@ -213,7 +213,7 @@ public: void dictAdd(char *key, Object *val); void dictSet(const char *key, Object *val); GBool dictIs(const char *dictType); - Object *dictLookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums = NULL); + Object *dictLookup(const char *key, Object *obj, int recursion = 0); Object *dictLookupNF(const char *key, Object *obj); char *dictGetKey(int i); Object *dictGetVal(int i, Object *obj); @@ -300,8 +300,8 @@ inline GBool Object::dictIs(const char *dictType) inline GBool Object::isDict(const char *dictType) { return type == objDict && dictIs(dictType); } -inline Object *Object::dictLookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums) - { OBJECT_TYPE_CHECK(objDict); return dict->lookup(key, obj, fetchOriginatorNums); } +inline Object *Object::dictLookup(const char *key, Object *obj, int recursion) + { OBJECT_TYPE_CHECK(objDict); return dict->lookup(key, obj, recursion); } inline Object *Object::dictLookupNF(const char *key, Object *obj) { OBJECT_TYPE_CHECK(objDict); return dict->lookupNF(key, obj); } diff --git a/poppler/Parser.cc b/poppler/Parser.cc index 790ec3c..03b836e 100644 --- a/poppler/Parser.cc +++ b/poppler/Parser.cc @@ -37,6 +37,11 @@ #include "XRef.h" #include "Error.h" +// Max number of nested objects. This is used to catch infinite loops +// in the object structure. And also technically valid files with +// lots of nested arrays that made us consume all the stack +#define recursionLimit 500 + Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) { xref = xrefA; lexer = lexerA; @@ -52,21 +57,14 @@ Parser::~Parser() { delete lexer; } -Object *Parser::getObj(Object *obj, Guchar *fileKey, - CryptAlgorithm encAlgorithm, int keyLength, - int objNum, int objGen) { - std::set<int> fetchOriginatorNums; - return getObj(obj, fileKey, encAlgorithm, keyLength, objNum, objGen, &fetchOriginatorNums); -} - -Object *Parser::getObj(Object *obj, std::set<int> *fetchOriginatorNums) +Object *Parser::getObj(Object *obj, int recursion) { - return getObj(obj, NULL, cryptRC4, 0, 0, 0, fetchOriginatorNums); + return getObj(obj, NULL, cryptRC4, 0, 0, 0, recursion); } Object *Parser::getObj(Object *obj, Guchar *fileKey, CryptAlgorithm encAlgorithm, int keyLength, - int objNum, int objGen, std::set<int> *fetchOriginatorNums) { + int objNum, int objGen, int recursion) { char *key; Stream *str; Object obj2; @@ -85,18 +83,18 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey, } // array - if (buf1.isCmd("[")) { + if (likely(recursion < recursionLimit) && buf1.isCmd("[")) { shift(); obj->initArray(xref); while (!buf1.isCmd("]") && !buf1.isEOF()) obj->arrayAdd(getObj(&obj2, fileKey, encAlgorithm, keyLength, - objNum, objGen, fetchOriginatorNums)); + objNum, objGen, recursion + 1)); if (buf1.isEOF()) error(errSyntaxError, getPos(), "End of file inside array"); shift(); // dictionary or stream - } else if (buf1.isCmd("<<")) { + } else if (likely(recursion < recursionLimit) && buf1.isCmd("<<")) { shift(objNum); obj->initDict(xref); while (!buf1.isCmd(">>") && !buf1.isEOF()) { @@ -111,7 +109,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey, gfree(key); break; } - obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, fetchOriginatorNums)); + obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1)); } } if (buf1.isEOF()) @@ -120,7 +118,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey, // object streams if (allowStreams && buf2.isCmd("stream")) { if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength, - objNum, objGen, fetchOriginatorNums))) { + objNum, objGen, recursion + 1))) { obj->initStream(str); } else { obj->free(); @@ -174,7 +172,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey, Stream *Parser::makeStream(Object *dict, Guchar *fileKey, CryptAlgorithm encAlgorithm, int keyLength, - int objNum, int objGen, std::set<int> *fetchOriginatorNums) { + int objNum, int objGen, int recursion) { Object obj; BaseStream *baseStr; Stream *str; @@ -188,7 +186,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey, pos = str->getPos(); // get length - dict->dictLookup("Length", &obj, fetchOriginatorNums); + dict->dictLookup("Length", &obj, recursion); if (obj.isInt()) { length = (Guint)obj.getInt(); obj.free(); diff --git a/poppler/Parser.h b/poppler/Parser.h index 3d8a831..f1fa765 100644 --- a/poppler/Parser.h +++ b/poppler/Parser.h @@ -45,13 +45,9 @@ public: // Get the next object from the input stream. Object *getObj(Object *obj, Guchar *fileKey = NULL, CryptAlgorithm encAlgorithm = cryptRC4, int keyLength = 0, - int objNum = 0, int objGen = 0); + int objNum = 0, int objGen = 0, int recursion = 0); - Object *getObj(Object *obj, Guchar *fileKey, - CryptAlgorithm encAlgorithm, int keyLength, - int objNum, int objGen, std::set<int> *fetchOriginatorNums); - - Object *getObj(Object *obj, std::set<int> *fetchOriginatorNums); + Object *getObj(Object *obj, int recursion); // Get stream. Stream *getStream() { return lexer->getStream(); } @@ -69,7 +65,7 @@ private: Stream *makeStream(Object *dict, Guchar *fileKey, CryptAlgorithm encAlgorithm, int keyLength, - int objNum, int objGen, std::set<int> *fetchOriginatorNums); + int objNum, int objGen, int recursion); void shift(int objNum = -1); }; diff --git a/poppler/XRef.cc b/poppler/XRef.cc index e1115c0..bacd540 100644 --- a/poppler/XRef.cc +++ b/poppler/XRef.cc @@ -967,16 +967,14 @@ GBool XRef::okToAssemble(GBool ignoreOwnerPW) { return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permAssemble); } -Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginatorNums) { +Object *XRef::fetch(int num, int gen, Object *obj, int recursion) { XRefEntry *e; Parser *parser; Object obj1, obj2, obj3; - bool deleteFetchOriginatorNums = false; - std::pair<std::set<int>::iterator, bool> fetchInsertResult; // check for bogus ref - this can happen in corrupted PDF files - if (num < 0 || num >= size || (fetchOriginatorNums != NULL && fetchOriginatorNums->find(num) != fetchOriginatorNums->end())) { - goto err2; + if (num < 0 || num >= size) { + goto err; } e = getEntry(num); @@ -985,12 +983,6 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato return obj; } - if (fetchOriginatorNums == NULL) { - fetchOriginatorNums = new std::set<int>(); - deleteFetchOriginatorNums = true; - } - fetchInsertResult = fetchOriginatorNums->insert(num); - switch (e->type) { case xrefEntryUncompressed: @@ -1002,9 +994,9 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato new Lexer(this, str->makeSubStream(start + e->offset, gFalse, 0, &obj1)), gTrue); - parser->getObj(&obj1, fetchOriginatorNums); - parser->getObj(&obj2, fetchOriginatorNums); - parser->getObj(&obj3, fetchOriginatorNums); + parser->getObj(&obj1, recursion); + parser->getObj(&obj2, recursion); + parser->getObj(&obj3, recursion); if (!obj1.isInt() || obj1.getInt() != num || !obj2.isInt() || obj2.getInt() != gen || !obj3.isCmd("obj")) { @@ -1039,7 +1031,7 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato goto err; } parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, - encAlgorithm, keyLength, num, gen, fetchOriginatorNums); + encAlgorithm, keyLength, num, gen, recursion); obj1.free(); obj2.free(); obj3.free(); @@ -1087,20 +1079,9 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato goto err; } - if (deleteFetchOriginatorNums) { - delete fetchOriginatorNums; - } else { - fetchOriginatorNums->erase(fetchInsertResult.first); - } return obj; err: - if (deleteFetchOriginatorNums) { - delete fetchOriginatorNums; - } else { - fetchOriginatorNums->erase(fetchInsertResult.first); - } - err2: return obj->initNull(); } diff --git a/poppler/XRef.h b/poppler/XRef.h index ecb1706..adfdc1a 100644 --- a/poppler/XRef.h +++ b/poppler/XRef.h @@ -102,7 +102,7 @@ public: Object *getCatalog(Object *obj) { return fetch(rootNum, rootGen, obj); } // Fetch an indirect reference. - Object *fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginatorNums = NULL); + Object *fetch(int num, int gen, Object *obj, int recursion = 0); // Return the document's Info dictionary (if any). Object *getDocInfo(Object *obj); _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
