qt4/src/poppler-page-private.h | 5 + qt4/src/poppler-page.cc | 114 +++++++++++++++++++++++++------------ qt4/src/poppler-qt4.h | 44 +++++++++++++- qt4/tests/check_search.cpp | 112 +++++++++++++++++++++++++++++++----- qt5/src/poppler-page-private.h | 5 + qt5/src/poppler-page.cc | 116 +++++++++++++++++++++++++------------ qt5/src/poppler-qt5.h | 46 +++++++++++++- qt5/tests/check_search.cpp | 126 ++++++++++++++++++++++++++++++++--------- 8 files changed, 447 insertions(+), 121 deletions(-)
New commits: commit 027eac4e565576ca2e7042e21426e28abd775d98 Author: Adam Reichold <[email protected]> Date: Tue Jan 20 00:09:09 2015 +0100 Expose whole-words search option in Qt frontends diff --git a/qt4/src/poppler-page-private.h b/qt4/src/poppler-page-private.h index 91955e0..1cb63e9 100644 --- a/qt4/src/poppler-page-private.h +++ b/qt4/src/poppler-page-private.h @@ -2,6 +2,7 @@ * Copyright (C) 2005, Net Integration Technologies, Inc. * Copyright (C) 2007, 2012, Albert Astals Cid <[email protected]> * Copyright (C) 2008, Pino Toscano <[email protected]> + * Copyright (C) 2015 Adam Reichold <[email protected]> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,7 +47,9 @@ public: static Link* convertLinkActionToLink(::LinkAction * a, DocumentData *parentDoc, const QRectF &linkArea); - TextPage *prepareTextSearch(const QString &text, Page::SearchMode caseSensitive, Page::Rotation rotate, GBool *sCase, QVector<Unicode> *u); + TextPage *prepareTextSearch(const QString &text, Page::Rotation rotate, QVector<Unicode> *u); + GBool performSingleTextSearch(TextPage* textPage, QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double &sBottom, Page::SearchDirection direction, GBool sCase, GBool sWords); + QList<QRectF> performMultipleTextSearch(TextPage* textPage, QVector<Unicode> &u, GBool sCase, GBool sWords); }; } diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc index 9c99cda..a4536af 100644 --- a/qt4/src/poppler-page.cc +++ b/qt4/src/poppler-page.cc @@ -12,7 +12,7 @@ * Copyright (C) 2010 Hib Eris <[email protected]> * Copyright (C) 2012 Tobias Koenig <[email protected]> * Copyright (C) 2012 Fabio D'Urso <[email protected]> - * Copyright (C) 2012 Adam Reichold <[email protected]> + * Copyright (C) 2012, 2015 Adam Reichold <[email protected]> * Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> * Copyright (C) 2015 William Bader <[email protected]> * @@ -216,16 +216,13 @@ Link* PageData::convertLinkActionToLink(::LinkAction * a, DocumentData *parentDo return popplerLink; } -TextPage *PageData::prepareTextSearch(const QString &text, Page::SearchMode caseSensitive, Page::Rotation rotate, GBool *sCase, QVector<Unicode> *u) +inline TextPage *PageData::prepareTextSearch(const QString &text, Page::Rotation rotate, QVector<Unicode> *u) { const QChar * str = text.unicode(); const int len = text.length(); u->resize(len); for (int i = 0; i < len; ++i) (*u)[i] = str[i].unicode(); - if (caseSensitive == Page::CaseSensitive) *sCase = gTrue; - else *sCase = gFalse; - const int rotation = (int)rotate * 90; // fetch ourselves a textpage @@ -235,7 +232,43 @@ TextPage *PageData::prepareTextSearch(const QString &text, Page::SearchMode case TextPage *textPage=td.takeText(); return textPage; -} +} + +inline GBool PageData::performSingleTextSearch(TextPage* textPage, QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double &sBottom, Page::SearchDirection direction, GBool sCase, GBool sWords) +{ + if (direction == Page::FromTop) + return textPage->findText( u.data(), u.size(), + gTrue, gTrue, gFalse, gFalse, sCase, gFalse, sWords, &sLeft, &sTop, &sRight, &sBottom ); + else if ( direction == Page::NextResult ) + return textPage->findText( u.data(), u.size(), + gFalse, gTrue, gTrue, gFalse, sCase, gFalse, sWords, &sLeft, &sTop, &sRight, &sBottom ); + else if ( direction == Page::PreviousResult ) + return textPage->findText( u.data(), u.size(), + gFalse, gTrue, gTrue, gFalse, sCase, gTrue, sWords, &sLeft, &sTop, &sRight, &sBottom ); + + return gFalse; +} + +inline QList<QRectF> PageData::performMultipleTextSearch(TextPage* textPage, QVector<Unicode> &u, GBool sCase, GBool sWords) +{ + QList<QRectF> results; + double sLeft = 0.0, sTop = 0.0, sRight = 0.0, sBottom = 0.0; + + while(textPage->findText( u.data(), u.size(), + gFalse, gTrue, gTrue, gFalse, sCase, gFalse, sWords, &sLeft, &sTop, &sRight, &sBottom )) + { + QRectF result; + + result.setLeft(sLeft); + result.setTop(sTop); + result.setRight(sRight); + result.setBottom(sBottom); + + results.append(result); + } + + return results; +} Page::Page(DocumentData *doc, int index) { m_page = new PageData(); @@ -460,20 +493,27 @@ QString Page::text(const QRectF &r) const bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRight, double &sBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate) const { - GBool sCase; + const GBool sCase = caseSensitive == Page::CaseSensitive ? gTrue : gFalse; + QVector<Unicode> u; - TextPage *textPage = m_page->prepareTextSearch(text, caseSensitive, rotate, &sCase, &u); - - bool found = false; - if (direction == FromTop) - found = textPage->findText( u.data(), u.size(), - gTrue, gTrue, gFalse, gFalse, sCase, gFalse, gFalse, &sLeft, &sTop, &sRight, &sBottom ); - else if ( direction == NextResult ) - found = textPage->findText( u.data(), u.size(), - gFalse, gTrue, gTrue, gFalse, sCase, gFalse, gFalse, &sLeft, &sTop, &sRight, &sBottom ); - else if ( direction == PreviousResult ) - found = textPage->findText( u.data(), u.size(), - gFalse, gTrue, gTrue, gFalse, sCase, gTrue, gFalse, &sLeft, &sTop, &sRight, &sBottom ); + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); + + const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, sTop, sRight, sBottom, direction, sCase, gFalse); + + textPage->decRefCnt(); + + return found; +} + +bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRight, double &sBottom, SearchDirection direction, SearchFlags flags, Rotation rotate) const +{ + const GBool sCase = flags.testFlag(IgnoreCase) ? gFalse : gTrue; + const GBool sWords = flags.testFlag(WholeWorlds) ? gTrue : gFalse; + + QVector<Unicode> u; + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); + + const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, sTop, sRight, sBottom, direction, sCase, sWords); textPage->decRefCnt(); @@ -500,31 +540,33 @@ bool Page::search(const QString &text, QRectF &rect, SearchDirection direction, QList<QRectF> Page::search(const QString &text, SearchMode caseSensitive, Rotation rotate) const { - GBool sCase; + const GBool sCase = caseSensitive == Page::CaseSensitive ? gTrue : gFalse; + QVector<Unicode> u; - TextPage *textPage = m_page->prepareTextSearch(text, caseSensitive, rotate, &sCase, &u); + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); - QList<QRectF> results; - double sLeft = 0.0, sTop = 0.0, sRight = 0.0, sBottom = 0.0; - - while(textPage->findText( u.data(), u.size(), - gFalse, gTrue, gTrue, gFalse, sCase, gFalse, gFalse, &sLeft, &sTop, &sRight, &sBottom )) - { - QRectF result; - - result.setLeft(sLeft); - result.setTop(sTop); - result.setRight(sRight); - result.setBottom(sBottom); - - results.append(result); - } + const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, u, sCase, gFalse); textPage->decRefCnt(); return results; } +QList<QRectF> Page::search(const QString &text, SearchFlags flags, Rotation rotate) const +{ + const GBool sCase = flags.testFlag(IgnoreCase) ? gFalse : gTrue; + const GBool sWords = flags.testFlag(WholeWorlds) ? gTrue : gFalse; + + QVector<Unicode> u; + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); + + const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, u, sCase, sWords); + + textPage->decRefCnt(); + + return results; +} + QList<TextBox*> Page::textList(Rotation rotate) const { TextOutputDev *output_dev; diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h index ee7558e..48c517a 100644 --- a/qt4/src/poppler-qt4.h +++ b/qt4/src/poppler-qt4.h @@ -12,7 +12,7 @@ * Copyright (C) 2012, Guillermo A. Amaral B. <[email protected]> * Copyright (C) 2012, Fabio D'Urso <[email protected]> * Copyright (C) 2012, Tobias Koenig <[email protected]> - * Copyright (C) 2012, 2014 Adam Reichold <[email protected]> + * Copyright (C) 2012, 2014, 2015 Adam Reichold <[email protected]> * Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> * * This program is free software; you can redistribute it and/or modify @@ -579,6 +579,16 @@ delete it; enum SearchMode { CaseSensitive, ///< Case differences cause no match in searching CaseInsensitive ///< Case differences are ignored in matching }; + + /** + Flags to modify the search behaviour \since 0.31 + */ + enum SearchFlag + { + IgnoreCase = 0x00000001, ///< Case differences are ignored + WholeWorlds = 0x00000002 ///< Only whole words are matched + }; + Q_DECLARE_FLAGS( SearchFlags, SearchFlag ) /** Returns true if the specified text was found. @@ -603,7 +613,21 @@ delete it; \param rotate the rotation to apply for the search order \since 0.14 **/ - bool search(const QString &text, double &rectLeft, double &rectTop, double &rectRight, double &rectBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + Q_DECL_DEPRECATED bool search(const QString &text, double &rectLeft, double &rectTop, double &rectRight, double &rectBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + + /** + Returns true if the specified text was found. + + \param text the text the search + \param rectXXX in all directions is used to return where the text was found, for NextResult and PreviousResult + indicates where to continue searching for + \param direction in which direction do the search + \param flags the flags to consider during matching + \param rotate the rotation to apply for the search order + + \since 0.31 + **/ + bool search(const QString &text, double &rectLeft, double &rectTop, double &rectRight, double &rectBottom, SearchDirection direction, SearchFlags flags = 0, Rotation rotate = Rotate0) const; /** Returns a list of all occurrences of the specified text on the page. @@ -616,7 +640,20 @@ delete it; \since 0.22 **/ - QList<QRectF> search(const QString &text, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + Q_DECL_DEPRECATED QList<QRectF> search(const QString &text, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + + /** + Returns a list of all occurrences of the specified text on the page. + + \param text the text to search + \param flags the flags to consider during matching + \param rotate the rotation to apply for the search order + + \warning Do not use the returned QRectF as arguments of another search call because of truncation issues if qreal is defined as float. + + \since 0.31 + **/ + QList<QRectF> search(const QString &text, SearchFlags flags = 0, Rotation rotate = Rotate0) const; /** Returns a list of text of the page @@ -1826,6 +1863,7 @@ height = dummy.height(); } Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::Page::PainterFlags) +Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::Page::SearchFlags) Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::Document::RenderHints) Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::PDFConverter::PDFOptions) Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::PSConverter::PSOptions) diff --git a/qt4/tests/check_search.cpp b/qt4/tests/check_search.cpp index cabf82d..77e62e3 100644 --- a/qt4/tests/check_search.cpp +++ b/qt4/tests/check_search.cpp @@ -8,17 +8,18 @@ class TestSearch: public QObject private slots: void bug7063(); void testNextAndPrevious(); + void testWholeWordsOnly(); }; void TestSearch::bug7063() { - Poppler::Document *doc; - doc = Poppler::Document::load(TESTDATADIR "/unittestcases/bug7063.pdf"); - QVERIFY( doc ); - - Poppler::Page *page = doc->page(0); - QRectF pageRegion( QPointF(0,0), page->pageSize() ); + QScopedPointer< Poppler::Document > document(Poppler::Document::load(TESTDATADIR "/unittestcases/bug7063.pdf")); + QVERIFY( document ); + + QScopedPointer< Poppler::Page > page(document->page(0)); + QVERIFY( page ); + QRectF pageRegion( QPointF(0,0), page->pageSize() ); QCOMPARE( page->search(QString("non-ascii:"), pageRegion, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QCOMPARE( page->search(QString("Ascii"), pageRegion, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), false ); @@ -32,17 +33,32 @@ void TestSearch::bug7063() QCOMPARE( page->search(QString::fromUtf8("search \"é\", \"à \" or \"ç\""), pageRegion, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QCOMPARE( page->search(QString::fromUtf8("¥µ©"), pageRegion, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QCOMPARE( page->search(QString::fromUtf8("¥©"), pageRegion, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), false ); - - delete doc; + + double rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); + + QCOMPARE( page->search(QString("non-ascii:"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + + QCOMPARE( page->search(QString("Ascii"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false ); + QCOMPARE( page->search(QString("Ascii"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::IgnoreCase), true ); + + QCOMPARE( page->search(QString("latin1:"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false ); + + QCOMPARE( page->search(QString::fromUtf8("é"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("à "), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("ç"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("search \"é\", \"à \" or \"ç\""), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false ); } void TestSearch::testNextAndPrevious() { - Poppler::Document *doc; - doc = Poppler::Document::load(TESTDATADIR "/unittestcases/xr01.pdf"); - QVERIFY( doc ); - - Poppler::Page *page = doc->page(0); + QScopedPointer< Poppler::Document > document(Poppler::Document::load(TESTDATADIR "/unittestcases/xr01.pdf")); + QVERIFY( document ); + + QScopedPointer< Poppler::Page > page(document->page(0)); + QVERIFY( page ); + QRectF region( QPointF(0,0), page->pageSize() ); QCOMPARE( page->search(QString("is"), region, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); @@ -83,7 +99,75 @@ void TestSearch::testNextAndPrevious() QVERIFY( qAbs(region.height() - 8.85) < 0.01 ); QCOMPARE( page->search(QString("is"), region, Poppler::Page::PreviousResult, Poppler::Page::CaseSensitive), false ); - delete doc; + double rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); + + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true ); + QVERIFY( qAbs(rectLeft - 171.46) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 139.81) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true ); + QVERIFY( qAbs(rectLeft - 171.46) < 0.01 ); + QVERIFY( qAbs(rectTop - 139.81) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), false ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 139.81) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true ); + QVERIFY( qAbs(rectLeft - 171.46) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), false ); +} + +void TestSearch::testWholeWordsOnly() +{ + QScopedPointer< Poppler::Document > document(Poppler::Document::load(TESTDATADIR "/unittestcases/WithActualText.pdf")); + QVERIFY( document ); + + QScopedPointer< Poppler::Page > page(document->page(0)); + QVERIFY( page ); + + const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop; + + const Poppler::Page::SearchFlags mode0 = 0; + const Poppler::Page::SearchFlags mode1 = Poppler::Page::IgnoreCase; + const Poppler::Page::SearchFlags mode2 = Poppler::Page::WholeWorlds; + const Poppler::Page::SearchFlags mode3 = Poppler::Page::IgnoreCase | Poppler::Page::WholeWorlds; + + double left, top, right, bottom; + + QCOMPARE( page->search(QLatin1String("brown"), left, top, right, bottom, direction, mode0), true ); + QCOMPARE( page->search(QLatin1String("brOwn"), left, top, right, bottom, direction, mode0), false ); + + QCOMPARE( page->search(QLatin1String("brOwn"), left, top, right, bottom, direction, mode1), true ); + QCOMPARE( page->search(QLatin1String("brawn"), left, top, right, bottom, direction, mode1), false ); + + QCOMPARE( page->search(QLatin1String("brown"), left, top, right, bottom, direction, mode2), true ); + QCOMPARE( page->search(QLatin1String("own"), left, top, right, bottom, direction, mode2), false ); + + QCOMPARE( page->search(QLatin1String("brOwn"), left, top, right, bottom, direction, mode3), true ); + QCOMPARE( page->search(QLatin1String("Own"), left, top, right, bottom, direction, mode3), false ); } QTEST_MAIN(TestSearch) diff --git a/qt5/src/poppler-page-private.h b/qt5/src/poppler-page-private.h index 91955e0..1cb63e9 100644 --- a/qt5/src/poppler-page-private.h +++ b/qt5/src/poppler-page-private.h @@ -2,6 +2,7 @@ * Copyright (C) 2005, Net Integration Technologies, Inc. * Copyright (C) 2007, 2012, Albert Astals Cid <[email protected]> * Copyright (C) 2008, Pino Toscano <[email protected]> + * Copyright (C) 2015 Adam Reichold <[email protected]> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,7 +47,9 @@ public: static Link* convertLinkActionToLink(::LinkAction * a, DocumentData *parentDoc, const QRectF &linkArea); - TextPage *prepareTextSearch(const QString &text, Page::SearchMode caseSensitive, Page::Rotation rotate, GBool *sCase, QVector<Unicode> *u); + TextPage *prepareTextSearch(const QString &text, Page::Rotation rotate, QVector<Unicode> *u); + GBool performSingleTextSearch(TextPage* textPage, QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double &sBottom, Page::SearchDirection direction, GBool sCase, GBool sWords); + QList<QRectF> performMultipleTextSearch(TextPage* textPage, QVector<Unicode> &u, GBool sCase, GBool sWords); }; } diff --git a/qt5/src/poppler-page.cc b/qt5/src/poppler-page.cc index eec211e..88f2e73 100644 --- a/qt5/src/poppler-page.cc +++ b/qt5/src/poppler-page.cc @@ -12,7 +12,7 @@ * Copyright (C) 2010 Hib Eris <[email protected]> * Copyright (C) 2012 Tobias Koenig <[email protected]> * Copyright (C) 2012 Fabio D'Urso <[email protected]> - * Copyright (C) 2012 Adam Reichold <[email protected]> + * Copyright (C) 2012, 2015 Adam Reichold <[email protected]> * Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> * Copyright (C) 2015 William Bader <[email protected]> * @@ -216,16 +216,13 @@ Link* PageData::convertLinkActionToLink(::LinkAction * a, DocumentData *parentDo return popplerLink; } -TextPage *PageData::prepareTextSearch(const QString &text, Page::SearchMode caseSensitive, Page::Rotation rotate, GBool *sCase, QVector<Unicode> *u) +inline TextPage *PageData::prepareTextSearch(const QString &text, Page::Rotation rotate, QVector<Unicode> *u) { const QChar * str = text.unicode(); const int len = text.length(); u->resize(len); for (int i = 0; i < len; ++i) (*u)[i] = str[i].unicode(); - if (caseSensitive == Page::CaseSensitive) *sCase = gTrue; - else *sCase = gFalse; - const int rotation = (int)rotate * 90; // fetch ourselves a textpage @@ -233,9 +230,45 @@ TextPage *PageData::prepareTextSearch(const QString &text, Page::SearchMode case parentDoc->doc->displayPage( &td, index + 1, 72, 72, rotation, false, true, false, NULL, NULL, NULL, NULL, gTrue); TextPage *textPage=td.takeText(); - + return textPage; -} +} + +inline GBool PageData::performSingleTextSearch(TextPage* textPage, QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double &sBottom, Page::SearchDirection direction, GBool sCase, GBool sWords) +{ + if (direction == Page::FromTop) + return textPage->findText( u.data(), u.size(), + gTrue, gTrue, gFalse, gFalse, sCase, gFalse, sWords, &sLeft, &sTop, &sRight, &sBottom ); + else if ( direction == Page::NextResult ) + return textPage->findText( u.data(), u.size(), + gFalse, gTrue, gTrue, gFalse, sCase, gFalse, sWords, &sLeft, &sTop, &sRight, &sBottom ); + else if ( direction == Page::PreviousResult ) + return textPage->findText( u.data(), u.size(), + gFalse, gTrue, gTrue, gFalse, sCase, gTrue, sWords, &sLeft, &sTop, &sRight, &sBottom ); + + return gFalse; +} + +inline QList<QRectF> PageData::performMultipleTextSearch(TextPage* textPage, QVector<Unicode> &u, GBool sCase, GBool sWords) +{ + QList<QRectF> results; + double sLeft = 0.0, sTop = 0.0, sRight = 0.0, sBottom = 0.0; + + while(textPage->findText( u.data(), u.size(), + gFalse, gTrue, gTrue, gFalse, sCase, gFalse, sWords, &sLeft, &sTop, &sRight, &sBottom )) + { + QRectF result; + + result.setLeft(sLeft); + result.setTop(sTop); + result.setRight(sRight); + result.setBottom(sBottom); + + results.append(result); + } + + return results; +} Page::Page(DocumentData *doc, int index) { m_page = new PageData(); @@ -460,20 +493,27 @@ QString Page::text(const QRectF &r) const bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRight, double &sBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate) const { - GBool sCase; + const GBool sCase = caseSensitive == Page::CaseSensitive ? gTrue : gFalse; + QVector<Unicode> u; - TextPage *textPage = m_page->prepareTextSearch(text, caseSensitive, rotate, &sCase, &u); - - bool found = false; - if (direction == FromTop) - found = textPage->findText( u.data(), u.size(), - gTrue, gTrue, gFalse, gFalse, sCase, gFalse, gFalse, &sLeft, &sTop, &sRight, &sBottom ); - else if ( direction == NextResult ) - found = textPage->findText( u.data(), u.size(), - gFalse, gTrue, gTrue, gFalse, sCase, gFalse, gFalse, &sLeft, &sTop, &sRight, &sBottom ); - else if ( direction == PreviousResult ) - found = textPage->findText( u.data(), u.size(), - gFalse, gTrue, gTrue, gFalse, sCase, gTrue, gFalse, &sLeft, &sTop, &sRight, &sBottom ); + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); + + const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, sTop, sRight, sBottom, direction, sCase, gFalse); + + textPage->decRefCnt(); + + return found; +} + +bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRight, double &sBottom, SearchDirection direction, SearchFlags flags, Rotation rotate) const +{ + const GBool sCase = flags.testFlag(IgnoreCase) ? gFalse : gTrue; + const GBool sWords = flags.testFlag(WholeWorlds) ? gTrue : gFalse; + + QVector<Unicode> u; + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); + + const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, sTop, sRight, sBottom, direction, sCase, sWords); textPage->decRefCnt(); @@ -482,31 +522,33 @@ bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRig QList<QRectF> Page::search(const QString &text, SearchMode caseSensitive, Rotation rotate) const { - GBool sCase; + const GBool sCase = caseSensitive == Page::CaseSensitive ? gTrue : gFalse; + QVector<Unicode> u; - TextPage *textPage = m_page->prepareTextSearch(text, caseSensitive, rotate, &sCase, &u); + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); - QList<QRectF> results; - double sLeft = 0.0, sTop = 0.0, sRight = 0.0, sBottom = 0.0; - - while(textPage->findText( u.data(), u.size(), - gFalse, gTrue, gTrue, gFalse, sCase, gFalse, gFalse, &sLeft, &sTop, &sRight, &sBottom )) - { - QRectF result; - - result.setLeft(sLeft); - result.setTop(sTop); - result.setRight(sRight); - result.setBottom(sBottom); - - results.append(result); - } + const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, u, sCase, gFalse); textPage->decRefCnt(); return results; } +QList<QRectF> Page::search(const QString &text, SearchFlags flags, Rotation rotate) const +{ + const GBool sCase = flags.testFlag(IgnoreCase) ? gFalse : gTrue; + const GBool sWords = flags.testFlag(WholeWorlds) ? gTrue : gFalse; + + QVector<Unicode> u; + TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u); + + const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, u, sCase, sWords); + + textPage->decRefCnt(); + + return results; +} + QList<TextBox*> Page::textList(Rotation rotate) const { TextOutputDev *output_dev; diff --git a/qt5/src/poppler-qt5.h b/qt5/src/poppler-qt5.h index dee0b19..9ffecff 100644 --- a/qt5/src/poppler-qt5.h +++ b/qt5/src/poppler-qt5.h @@ -12,7 +12,7 @@ * Copyright (C) 2012, Guillermo A. Amaral B. <[email protected]> * Copyright (C) 2012, Fabio D'Urso <[email protected]> * Copyright (C) 2012, Tobias Koenig <[email protected]> - * Copyright (C) 2012, 2014 Adam Reichold <[email protected]> + * Copyright (C) 2012, 2014, 2015 Adam Reichold <[email protected]> * Copyright (C) 2012, 2013 Thomas Freitag <[email protected]> * Copyright (C) 2013 Anthony Granger <[email protected]> * @@ -580,6 +580,16 @@ delete it; enum SearchMode { CaseSensitive, ///< Case differences cause no match in searching CaseInsensitive ///< Case differences are ignored in matching }; + + /** + Flags to modify the search behaviour \since 0.31 + */ + enum SearchFlag + { + IgnoreCase = 0x00000001, ///< Case differences are ignored + WholeWorlds = 0x00000002 ///< Only whole words are matched + }; + Q_DECLARE_FLAGS( SearchFlags, SearchFlag ) /** Returns true if the specified text was found. @@ -592,8 +602,22 @@ delete it; \param rotate the rotation to apply for the search order \since 0.14 **/ - bool search(const QString &text, double &rectLeft, double &rectTop, double &rectRight, double &rectBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate = Rotate0) const; - + Q_DECL_DEPRECATED bool search(const QString &text, double &rectLeft, double &rectTop, double &rectRight, double &rectBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + + /** + Returns true if the specified text was found. + + \param text the text the search + \param rectXXX in all directions is used to return where the text was found, for NextResult and PreviousResult + indicates where to continue searching for + \param direction in which direction do the search + \param flags the flags to consider during matching + \param rotate the rotation to apply for the search order + + \since 0.31 + **/ + bool search(const QString &text, double &rectLeft, double &rectTop, double &rectRight, double &rectBottom, SearchDirection direction, SearchFlags flags = 0, Rotation rotate = Rotate0) const; + /** Returns a list of all occurrences of the specified text on the page. @@ -605,7 +629,20 @@ delete it; \since 0.22 **/ - QList<QRectF> search(const QString &text, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + Q_DECL_DEPRECATED QList<QRectF> search(const QString &text, SearchMode caseSensitive, Rotation rotate = Rotate0) const; + + /** + Returns a list of all occurrences of the specified text on the page. + + \param text the text to search + \param flags the flags to consider during matching + \param rotate the rotation to apply for the search order + + \warning Do not use the returned QRectF as arguments of another search call because of truncation issues if qreal is defined as float. + + \since 0.31 + **/ + QList<QRectF> search(const QString &text, SearchFlags flags = 0, Rotation rotate = Rotate0) const; /** Returns a list of text of the page @@ -1787,6 +1824,7 @@ height = dummy.height(); } Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::Page::PainterFlags) +Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::Page::SearchFlags) Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::Document::RenderHints) Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::PDFConverter::PDFOptions) Q_DECLARE_OPERATORS_FOR_FLAGS(Poppler::PSConverter::PSOptions) diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp index efb5556..2f6f022 100644 --- a/qt5/tests/check_search.cpp +++ b/qt5/tests/check_search.cpp @@ -8,20 +8,19 @@ class TestSearch: public QObject private slots: void bug7063(); void testNextAndPrevious(); + void testWholeWordsOnly(); }; void TestSearch::bug7063() { - Poppler::Document *doc; - doc = Poppler::Document::load(TESTDATADIR "/unittestcases/bug7063.pdf"); - QVERIFY( doc ); - - Poppler::Page *page = doc->page(0); - double rectLeft, rectTop, rectRight, rectBottom; - rectLeft = 0; - rectTop = 0; - rectRight = page->pageSize().width(); - rectBottom = page->pageSize().height(); + QScopedPointer< Poppler::Document > document(Poppler::Document::load(TESTDATADIR "/unittestcases/bug7063.pdf")); + QVERIFY( document ); + + QScopedPointer< Poppler::Page > page(document->page(0)); + QVERIFY( page ); + + double rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); + QCOMPARE( page->search(QString("non-ascii:"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QCOMPARE( page->search(QString("Ascii"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), false ); @@ -35,23 +34,32 @@ void TestSearch::bug7063() QCOMPARE( page->search(QString::fromUtf8("search \"é\", \"à \" or \"ç\""), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QCOMPARE( page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QCOMPARE( page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), false ); - - delete doc; + + QCOMPARE( page->search(QString("non-ascii:"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + + QCOMPARE( page->search(QString("Ascii"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false ); + QCOMPARE( page->search(QString("Ascii"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::IgnoreCase), true ); + + QCOMPARE( page->search(QString("latin1:"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false ); + + QCOMPARE( page->search(QString::fromUtf8("é"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("à "), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("ç"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("search \"é\", \"à \" or \"ç\""), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QCOMPARE( page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), false ); } void TestSearch::testNextAndPrevious() { - Poppler::Document *doc; - doc = Poppler::Document::load(TESTDATADIR "/unittestcases/xr01.pdf"); - QVERIFY( doc ); - - Poppler::Page *page = doc->page(0); - double rectLeft, rectTop, rectRight, rectBottom; - rectLeft = 0; - rectTop = 0; - rectRight = page->pageSize().width(); - rectBottom = page->pageSize().height(); - + QScopedPointer< Poppler::Document > document(Poppler::Document::load(TESTDATADIR "/unittestcases/xr01.pdf")); + QVERIFY( document ); + + QScopedPointer< Poppler::Page > page(document->page(0)); + QVERIFY( page ); + + double rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop, Poppler::Page::CaseSensitive), true ); QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); @@ -89,8 +97,76 @@ void TestSearch::testNextAndPrevious() QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult, Poppler::Page::CaseSensitive), false ); - - delete doc; + + rectLeft = 0.0, rectTop = 0.0, rectRight = page->pageSizeF().width(), rectBottom = page->pageSizeF().height(); + + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true ); + QVERIFY( qAbs(rectLeft - 171.46) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 139.81) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), true ); + QVERIFY( qAbs(rectLeft - 171.46) < 0.01 ); + QVERIFY( qAbs(rectTop - 139.81) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::NextResult), false ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 139.81) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true ); + QVERIFY( qAbs(rectLeft - 171.46) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), true ); + QVERIFY( qAbs(rectLeft - 161.44) < 0.01 ); + QVERIFY( qAbs(rectTop - 127.85) < 0.01 ); + QVERIFY( qAbs(rectRight - rectLeft - 6.70) < 0.01 ); + QVERIFY( qAbs(rectBottom - rectTop - 8.85) < 0.01 ); + QCOMPARE( page->search(QString("is"), rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::PreviousResult), false ); +} + +void TestSearch::testWholeWordsOnly() +{ + QScopedPointer< Poppler::Document > document(Poppler::Document::load(TESTDATADIR "/unittestcases/WithActualText.pdf")); + QVERIFY( document ); + + QScopedPointer< Poppler::Page > page(document->page(0)); + QVERIFY( page ); + + const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop; + + const Poppler::Page::SearchFlags mode0 = 0; + const Poppler::Page::SearchFlags mode1 = Poppler::Page::IgnoreCase; + const Poppler::Page::SearchFlags mode2 = Poppler::Page::WholeWorlds; + const Poppler::Page::SearchFlags mode3 = Poppler::Page::IgnoreCase | Poppler::Page::WholeWorlds; + + double left, top, right, bottom; + + QCOMPARE( page->search(QLatin1String("brown"), left, top, right, bottom, direction, mode0), true ); + QCOMPARE( page->search(QLatin1String("brOwn"), left, top, right, bottom, direction, mode0), false ); + + QCOMPARE( page->search(QLatin1String("brOwn"), left, top, right, bottom, direction, mode1), true ); + QCOMPARE( page->search(QLatin1String("brawn"), left, top, right, bottom, direction, mode1), false ); + + QCOMPARE( page->search(QLatin1String("brown"), left, top, right, bottom, direction, mode2), true ); + QCOMPARE( page->search(QLatin1String("own"), left, top, right, bottom, direction, mode2), false ); + + QCOMPARE( page->search(QLatin1String("brOwn"), left, top, right, bottom, direction, mode3), true ); + QCOMPARE( page->search(QLatin1String("Own"), left, top, right, bottom, direction, mode3), false ); } QTEST_MAIN(TestSearch)
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
