poppler/Gfx.cc | 2 ++ poppler/MarkedContentOutputDev.cc | 25 +++++++++++++++++++++++-- poppler/MarkedContentOutputDev.h | 8 +++++++- poppler/OutputDev.h | 2 ++ poppler/StructElement.cc | 15 ++++++++++++--- poppler/StructElement.h | 3 ++- 6 files changed, 48 insertions(+), 7 deletions(-)
New commits: commit 32f27b888d0e89cd40c086bd8d70381ee474078c Author: Adrian Johnson <[email protected]> Date: Mon Oct 4 10:53:54 2021 +1030 StructElement: support MCID in XObjects diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc index d389f90d..c596b147 100644 --- a/poppler/Gfx.cc +++ b/poppler/Gfx.cc @@ -4120,7 +4120,9 @@ void Gfx::opXObject(Object args[], int numArgs) if (out->useDrawForm() && refObj.isRef()) { out->drawForm(refObj.getRef()); } else { + out->beginForm(refObj.getRef()); doForm(&obj1); + out->endForm(refObj.getRef()); } } if (refObj.isRef() && shouldDoForm) { diff --git a/poppler/MarkedContentOutputDev.cc b/poppler/MarkedContentOutputDev.cc index 164c5cea..558af589 100644 --- a/poppler/MarkedContentOutputDev.cc +++ b/poppler/MarkedContentOutputDev.cc @@ -17,8 +17,9 @@ #include "Annot.h" #include <vector> -MarkedContentOutputDev::MarkedContentOutputDev(int mcidA) : currentFont(nullptr), currentText(nullptr), mcid(mcidA), pageWidth(0.0), pageHeight(0.0), unicodeMap(nullptr) +MarkedContentOutputDev::MarkedContentOutputDev(int mcidA, const Object &stmObj) : currentFont(nullptr), currentText(nullptr), mcid(mcidA), pageWidth(0.0), pageHeight(0.0), unicodeMap(nullptr) { + stmRef = stmObj.copy(); currentColor.r = currentColor.g = currentColor.b = 0; } @@ -54,6 +55,26 @@ void MarkedContentOutputDev::endPage() pageWidth = pageHeight = 0.0; } +void MarkedContentOutputDev::beginForm(Ref id) +{ + formStack.push_back(id); +} + +void MarkedContentOutputDev::endForm(Ref id) +{ + formStack.pop_back(); +} + +bool MarkedContentOutputDev::contentStreamMatch() +{ + if (stmRef.isRef()) { + if (formStack.empty()) + return false; + return formStack.back() == stmRef.getRef(); + } + return formStack.empty(); +} + void MarkedContentOutputDev::beginMarkedContent(const char *name, Dict *properties) { int id = -1; @@ -64,7 +85,7 @@ void MarkedContentOutputDev::beginMarkedContent(const char *name, Dict *properti return; // The stack keep track of MCIDs of nested marked content. - if (inMarkedContent() || id == mcid) + if (inMarkedContent() || (id == mcid && contentStreamMatch())) mcidStack.push_back(id); } diff --git a/poppler/MarkedContentOutputDev.h b/poppler/MarkedContentOutputDev.h index 43351ae4..ace886d0 100644 --- a/poppler/MarkedContentOutputDev.h +++ b/poppler/MarkedContentOutputDev.h @@ -88,7 +88,7 @@ typedef std::vector<TextSpan> TextSpanArray; class POPPLER_PRIVATE_EXPORT MarkedContentOutputDev : public OutputDev { public: - explicit MarkedContentOutputDev(int mcidA); + explicit MarkedContentOutputDev(int mcidA, const Object &stmObj); ~MarkedContentOutputDev() override; virtual bool isOk() { return true; } @@ -101,6 +101,9 @@ public: void startPage(int pageNum, GfxState *state, XRef *xref) override; void endPage() override; + void beginForm(Ref id) override; + void endForm(Ref id) override; + void drawChar(GfxState *state, double xx, double yy, double dx, double dy, double ox, double oy, CharCode c, int nBytes, const Unicode *u, int uLen) override; void beginMarkedContent(const char *name, Dict *properties) override; @@ -111,6 +114,7 @@ public: private: void endSpan(); bool inMarkedContent() const { return mcidStack.size() > 0; } + bool contentStreamMatch(); bool needFontChange(const GfxFont *font) const; GfxFont *currentFont; @@ -119,9 +123,11 @@ private: TextSpanArray textSpans; int mcid; std::vector<int> mcidStack; + std::vector<Ref> formStack; double pageWidth; double pageHeight; const UnicodeMap *unicodeMap; + Object stmRef; }; #endif /* !MARKEDCONTENTOUTPUTDEV_H */ diff --git a/poppler/OutputDev.h b/poppler/OutputDev.h index 3a8430c8..00b42e47 100644 --- a/poppler/OutputDev.h +++ b/poppler/OutputDev.h @@ -321,6 +321,8 @@ public: //----- form XObjects virtual void drawForm(Ref /*id*/) { } + virtual void beginForm(Ref /*id*/) { } + virtual void endForm(Ref /*id*/) { } //----- PostScript XObjects virtual void psXObject(Stream * /*psStream*/, Stream * /*level1Stream*/) { } diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc index 0a0f744f..2c703668 100644 --- a/poppler/StructElement.cc +++ b/poppler/StructElement.cc @@ -813,7 +813,7 @@ const Attribute *StructElement::findAttribute(Attribute::Type attributeType, boo GooString *StructElement::appendSubTreeText(GooString *string, bool recursive) const { if (isContent() && !isObjectRef()) { - MarkedContentOutputDev mcdev(getMCID()); + MarkedContentOutputDev mcdev(getMCID(), stmRef); const TextSpanArray &spans(getTextSpansInternal(mcdev)); if (!string) @@ -1031,9 +1031,9 @@ StructElement *StructElement::parseChild(const Object *ref, Object *childObj, st child = new StructElement(childObj->getInt(), treeRoot, this); } else if (childObj->isDict("MCR")) { /* - * TODO: The optional Stm/StwOwn attributes are not handled, so all the - * page will be always scanned when calling StructElement::getText(). + * TODO: The optional StmOwn attribute is not handled. */ + Object mcidObj = childObj->dictLookup("MCID"); if (!mcidObj.isInt()) { error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName()); @@ -1046,6 +1046,15 @@ StructElement *StructElement::parseChild(const Object *ref, Object *childObj, st if (pageRefObj.isRef()) { child->pageRef = std::move(pageRefObj); } + + const Object &stmObj = childObj->dictLookupNF("Stm"); + if (stmObj.isRef()) { + child->stmRef = stmObj.copy(); + } else if (!stmObj.isNull()) { + error(errSyntaxError, -1, "Stm object is wrong type ({0:s})", stmObj.getTypeName()); + return nullptr; + } + } else if (childObj->isDict("OBJR")) { const Object &refObj = childObj->dictLookupNF("Obj"); if (refObj.isRef()) { diff --git a/poppler/StructElement.h b/poppler/StructElement.h index 0d53e4ef..933be643 100644 --- a/poppler/StructElement.h +++ b/poppler/StructElement.h @@ -329,7 +329,7 @@ public: { if (!isContent()) return TextSpanArray(); - MarkedContentOutputDev mcdev(getMCID()); + MarkedContentOutputDev mcdev(getMCID(), stmRef); return getTextSpansInternal(mcdev); } @@ -379,6 +379,7 @@ private: StructTreeRoot *treeRoot; StructElement *parent; mutable Object pageRef; + Object stmRef; union { StructData *s;
