poppler/TextOutputDev.cc | 5 +++++ poppler/TextOutputDev.h | 5 +++++ utils/pdftotext.cc | 6 ++---- 3 files changed, 12 insertions(+), 4 deletions(-)
New commits: commit e8e95d2ca4f1c108cc69cab72c7c5ab31f80a597 Author: Sanchit Anand <[email protected]> Date: Tue Aug 28 02:58:39 2018 -0400 pdftotext: Fix only outputs first page content with -bbox-layout option Issue #88 diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 4a3070af..7a92ff3b 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -37,6 +37,7 @@ // Copyright (C) 2013 Ed Catmur <[email protected]> // Copyright (C) 2016 Khaled Hosny <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich +// Copyright (C) 2018 Sanchit Anand <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -5878,3 +5879,7 @@ TextPage *TextOutputDev::takeText() { text = new TextPage(rawOrder); return ret; } + +TextFlow *TextOutputDev::getFlows() { + return text->getFlows(); +} diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 092acd67..f2435545 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -22,6 +22,7 @@ // Copyright (C) 2012, 2013, 2015, 2016 Jason Crain <[email protected]> // Copyright (C) 2013 Thomas Freitag <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich +// Copyright (C) 2018 Sanchit Anand <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -890,6 +891,10 @@ public: // Turn extra processing for HTML conversion on or off. void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; } + // Get the head of the linked list of TextFlows for the + // last rasterized page. + TextFlow *getFlows(); + private: TextOutputFunc outputFunc; // output function diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc index 3d8dc95f..9844ba00 100644 --- a/utils/pdftotext.cc +++ b/utils/pdftotext.cc @@ -28,6 +28,7 @@ // Copyright (C) 2017 Adrian Johnson <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich // Copyright (C) 2018 Adam Reichold <[email protected]> +// Copyright (C) 2018 Sanchit Anand <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -511,7 +512,6 @@ static void printLine(FILE *f, TextLine *line) { void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int last) { double xMin, yMin, xMax, yMax; - TextPage *textPage; TextFlow *flow; TextBlock *blk; TextLine *line; @@ -520,8 +520,7 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l for (int page = first; page <= last; ++page) { fprintf(f, " <page width=\"%f\" height=\"%f\">\n",doc->getPageMediaWidth(page), doc->getPageMediaHeight(page)); doc->displayPage(textOut, page, resolution, resolution, 0, gTrue, gFalse, gFalse); - textPage = textOut->takeText(); - for (flow = textPage->getFlows(); flow; flow = flow->getNext()) { + for (flow = textOut->getFlows(); flow; flow = flow->getNext()) { fprintf(f, " <flow>\n"); for (blk = flow->getBlocks(); blk; blk = blk->getNext()) { blk->getBBox(&xMin, &yMin, &xMax, &yMax); @@ -534,7 +533,6 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l fprintf(f, " </flow>\n"); } fprintf(f, " </page>\n"); - textPage->decRefCnt(); } fprintf(f, "</doc>\n"); } _______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
