glib/demo/find.c                    |   83 +++++++++++-----
 glib/poppler-page.cc                |  144 +++++++++++++++++++++++++---
 glib/poppler-page.h                 |    4 
 glib/poppler-private.h              |   19 +++
 glib/poppler.h                      |    7 +
 glib/reference/poppler-sections.txt |    2 
 poppler/TextOutputDev.cc            |  182 +++++++++++++++++++++++++++++-------
 poppler/TextOutputDev.h             |   15 ++
 qt5/src/poppler-page-private.h      |    4 
 qt5/src/poppler-page.cc             |   39 +++++--
 qt5/src/poppler-qt5.h               |   12 +-
 qt5/tests/check_search.cpp          |  128 +++++++++++++++++++++----
 qt6/src/poppler-page-private.h      |    4 
 qt6/src/poppler-page.cc             |   35 +++++-
 qt6/src/poppler-qt6.h               |   12 +-
 qt6/tests/check_search.cpp          |   85 ++++++++++++++++
 16 files changed, 652 insertions(+), 123 deletions(-)

New commits:
commit e3fed321f230a4a91df873e6d9a213ba8dad6694
Author: Nelson Benítez León <nbenit...@gmail.com>
Date:   Sun Apr 25 22:24:50 2021 +0000

    find, glib: Enhance find to support multi-line matching
    
    On the backend side, adds 3 new parameters to TextPage::findText(),
    one bool to enable the feature, one out PDFRectangle to store
    the part of the match that falls on the next line, and one out
    bool to inform whether hyphen was present and ignored at end of
    the previous match part.
    
    For the glib binding, this extends the public PopplerRectangle
    struct by new members to hold additional information about
    whether the rectangle belongs to a group of rectangles for the
    same match, and whether a hyphen was ignored at the end of the
    line. Since PopplerRectangle is public ABI, this is done by making
    the public PopplerRectangle API return the enlarged struct, and
    internally casting to the new struct when required, the new
    members are accessible only via accessor functions.
    
    For Qt5 Qt6 bindings, this commit only implements the new flag
    Poppler::Page::AcrossLines (but no new function and no new
    return data type) and if this flag is passed, the returned
    list of rectangles will also include rectangles for the
    second part of across-line matches.
    
    This minimum Qt bindings still allows for the creation of
    tests for this feature (using the Qt test framework) which
    this commit *do includes*. But a more complete binding (with
    a new return type that includes 'matchContinued' and 'ignoredHypen'
    boolean fields) is left to do for qt backend maintainers
    if they want to use this feature in eg. Okular.
    
    So, as mentioned, this commit incorporates tests for the
    implemented across-line matching feature, and the tests do
    also check for two included aspects of this feature, which are:
    
     - Ignoring hyphen character while matching when 1) it's the
       last character of the line and 2) its corresponding matching
       character in the search term is not an hyphen too.
    
     - Any whitespace characters in the search term will be allowed
       to match on the logic position where the lines split (i.e. what
       would normally be the newline character in a text file, but
       PDF text does not include newline characters between lines).
    
    Regarding the enhancement to findText() function which implements
    matching across lines, just two more notes:
    
     - It won't match on text spanning more than two lines, i.e. it
       only matches text spanning from end of one line to start of
       next line.
    
     - It does not supports finding backwards, if findText() receives
       both <backward> and <matchAcrossLines> parameters as true, it
       will ignore the <matchAcrossLines> parameter. Implementing
       <matchAcrossLines> with backwards direction is possible, but
       it will make an already complex function like findText() to be
       even more complex, for little gain as eg. Evince does not even
       use the <backward> parameter of findText().
    
    Fixes poppler issues #744 and #755
    Related Evince issue https://gitlab.gnome.org/GNOME/evince/issues/333

diff --git a/glib/demo/find.c b/glib/demo/find.c
index b3ae9101..080bc196 100644
--- a/glib/demo/find.c
+++ b/glib/demo/find.c
@@ -85,6 +85,23 @@ static void pgd_find_update_progress(PgdFindDemo *demo, gint 
scanned)
     g_free(str);
 }
 
+static void pgd_find_append_match(PgdFindDemo *demo, GtkTreeModel *model, 
GtkTreeIter *iter_child, PopplerRectangle *rect, int match_id)
+{
+    char *x1, *y1, *x2, *y2, *str;
+    str = g_strdup_printf("Match %d", match_id + 1);
+    x1 = g_strdup_printf("%.2f", rect->x1);
+    y1 = g_strdup_printf("%.2f", rect->y1);
+    x2 = g_strdup_printf("%.2f", rect->x2);
+    y2 = g_strdup_printf("%.2f", rect->y2);
+    gtk_tree_store_set(GTK_TREE_STORE(model), iter_child, TITLE_COLUMN, str, 
X1_COLUMN, x1, Y1_COLUMN, y1, X2_COLUMN, x2, Y2_COLUMN, y2, VISIBLE_COLUMN, 
TRUE, PAGE_COLUMN, demo->page_index, PAGE_RECT, rect, -1);
+    g_free(str);
+    g_free(x1);
+    g_free(y1);
+    g_free(x2);
+    g_free(y2);
+    g_object_weak_ref(G_OBJECT(model), (GWeakNotify)poppler_rectangle_free, 
rect);
+}
+
 static gboolean pgd_find_find_text(PgdFindDemo *demo)
 {
     PopplerPage *page;
@@ -103,46 +120,31 @@ static gboolean pgd_find_find_text(PgdFindDemo *demo)
     matches = poppler_page_find_text_with_options(page, 
gtk_entry_get_text(GTK_ENTRY(demo->entry)), demo->options);
     g_timer_stop(timer);
     if (matches) {
-        GtkTreeIter iter;
+        GtkTreeIter iter, iter_child;
         gchar *str;
         GList *l;
         gdouble height;
         gint n_match = 0;
 
-        str = g_strdup_printf("%d matches found on page %d in %.4f seconds", 
g_list_length(matches), demo->page_index + 1, g_timer_elapsed(timer, NULL));
-
         gtk_tree_store_append(GTK_TREE_STORE(model), &iter, NULL);
-        gtk_tree_store_set(GTK_TREE_STORE(model), &iter, TITLE_COLUMN, str, 
VISIBLE_COLUMN, FALSE, PAGE_COLUMN, demo->page_index, -1);
-        g_free(str);
-
         poppler_page_get_size(page, NULL, &height);
-
         for (l = matches; l && l->data; l = g_list_next(l)) {
             PopplerRectangle *rect = (PopplerRectangle *)l->data;
-            GtkTreeIter iter_child;
-            gchar *x1, *y1, *x2, *y2;
             gdouble tmp;
-
-            str = g_strdup_printf("Match %d", ++n_match);
-            x1 = g_strdup_printf("%.2f", rect->x1);
-            y1 = g_strdup_printf("%.2f", rect->y1);
-            x2 = g_strdup_printf("%.2f", rect->x2);
-            y2 = g_strdup_printf("%.2f", rect->y2);
-
             tmp = rect->y1;
             rect->y1 = height - rect->y2;
             rect->y2 = height - tmp;
-
             gtk_tree_store_append(GTK_TREE_STORE(model), &iter_child, &iter);
-            gtk_tree_store_set(GTK_TREE_STORE(model), &iter_child, 
TITLE_COLUMN, str, X1_COLUMN, x1, Y1_COLUMN, y1, X2_COLUMN, x2, Y2_COLUMN, y2, 
VISIBLE_COLUMN, TRUE, PAGE_COLUMN, demo->page_index, PAGE_RECT, rect, -1);
-            g_free(str);
-            g_free(x1);
-            g_free(y1);
-            g_free(x2);
-            g_free(y2);
-            g_object_weak_ref(G_OBJECT(model), 
(GWeakNotify)poppler_rectangle_free, rect);
+            pgd_find_append_match(demo, model, &iter_child, rect, n_match);
+            if (!poppler_rectangle_find_get_match_continued(rect))
+                ++n_match;
         }
         g_list_free(matches);
+
+        str = g_strdup_printf("%d matches found on page %d in %.4f seconds", 
n_match, demo->page_index + 1, g_timer_elapsed(timer, NULL));
+
+        gtk_tree_store_set(GTK_TREE_STORE(model), &iter, TITLE_COLUMN, str, 
VISIBLE_COLUMN, FALSE, PAGE_COLUMN, demo->page_index, -1);
+        g_free(str);
     }
 
     g_timer_destroy(timer);
@@ -154,6 +156,11 @@ static gboolean pgd_find_find_text(PgdFindDemo *demo)
     return demo->page_index < demo->n_pages;
 }
 
+static void find_text_idle_finish(PgdFindDemo *demo)
+{
+    demo->idle_id = 0;
+}
+
 static cairo_surface_t *pgd_find_render_page(PgdFindDemo *demo)
 {
     cairo_t *cr;
@@ -252,7 +259,7 @@ static void pgd_find_button_clicked(GtkButton *button, 
PgdFindDemo *demo)
     pgd_find_update_progress(demo, demo->page_index);
     if (demo->idle_id > 0)
         g_source_remove(demo->idle_id);
-    demo->idle_id = g_idle_add((GSourceFunc)pgd_find_find_text, demo);
+    demo->idle_id = g_idle_add_full(G_PRIORITY_DEFAULT_IDLE, 
(GSourceFunc)pgd_find_find_text, demo, (GDestroyNotify)find_text_idle_finish);
 }
 
 static void pgd_find_button_sensitivity_cb(GtkWidget *button, GtkEntry *entry)
@@ -309,6 +316,22 @@ static void pgd_find_backwards_toggled(GtkToggleButton 
*togglebutton, PgdFindDem
         demo->options &= ~POPPLER_FIND_BACKWARDS;
 }
 
+static void pgd_find_multiline_toggled(GtkToggleButton *togglebutton, 
PgdFindDemo *demo)
+{
+    if (gtk_toggle_button_get_active(togglebutton))
+        demo->options |= POPPLER_FIND_MULTILINE;
+    else
+        demo->options &= ~POPPLER_FIND_MULTILINE;
+}
+
+static void pgd_find_ignore_diacritics_toggled(GtkToggleButton *togglebutton, 
PgdFindDemo *demo)
+{
+    if (gtk_toggle_button_get_active(togglebutton))
+        demo->options |= POPPLER_FIND_IGNORE_DIACRITICS;
+    else
+        demo->options &= ~POPPLER_FIND_IGNORE_DIACRITICS;
+}
+
 static void pgd_find_whole_words_toggled(GtkToggleButton *togglebutton, 
PgdFindDemo *demo)
 {
     if (gtk_toggle_button_get_active(togglebutton))
@@ -345,6 +368,16 @@ GtkWidget *pgd_find_create_widget(PopplerDocument 
*document)
 
     hbox = gtk_box_new(GTK_ORIENTATION_HORIZONTAL, 6);
 
+    checkbutton = gtk_check_button_new_with_label("Multi-line");
+    g_signal_connect(checkbutton, "toggled", 
G_CALLBACK(pgd_find_multiline_toggled), demo);
+    gtk_box_pack_start(GTK_BOX(hbox), checkbutton, FALSE, FALSE, 0);
+    gtk_widget_show(checkbutton);
+
+    checkbutton = gtk_check_button_new_with_label("Ignore diacritics");
+    g_signal_connect(checkbutton, "toggled", 
G_CALLBACK(pgd_find_ignore_diacritics_toggled), demo);
+    gtk_box_pack_start(GTK_BOX(hbox), checkbutton, FALSE, FALSE, 0);
+    gtk_widget_show(checkbutton);
+
     demo->entry = gtk_entry_new();
     gtk_box_pack_start(GTK_BOX(hbox), demo->entry, FALSE, TRUE, 0);
     gtk_widget_show(demo->entry);
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 3332a9eb..7536cfcb 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -47,6 +47,8 @@ enum
     PROP_LABEL
 };
 
+static PopplerRectangleExtended *poppler_rectangle_extended_new();
+
 typedef struct _PopplerPageClass PopplerPageClass;
 struct _PopplerPageClass
 {
@@ -615,12 +617,7 @@ GList *poppler_page_get_selection_region(PopplerPage 
*page, gdouble scale, Poppl
     for (const PDFRectangle *selection_rect : *list) {
         PopplerRectangle *rect;
 
-        rect = poppler_rectangle_new();
-
-        rect->x1 = selection_rect->x1;
-        rect->y1 = selection_rect->y1;
-        rect->x2 = selection_rect->x2;
-        rect->y2 = selection_rect->y2;
+        rect = poppler_rectangle_new_from_pdf_rectangle(selection_rect);
 
         region = g_list_prepend(region, rect);
 
@@ -811,15 +808,33 @@ char *poppler_page_get_text_for_area(PopplerPage *page, 
PopplerRectangle *area)
  * returns a #GList of rectangles for each occurrence of the text on the page.
  * The coordinates are in PDF points.
  *
- * Return value: (element-type PopplerRectangle) (transfer full): a #GList of 
#PopplerRectangle,
+ * When %POPPLER_FIND_MULTILINE is passed in @options, matches may span more 
than
+ * one line. In this case, the returned list will contain one #PopplerRectangle
+ * for each part of a match. The function 
poppler_rectangle_find_get_match_continued()
+ * will return %TRUE for all rectangles belonging to the same match, except for
+ * the last one. If a hyphen was ignored at the end of the part of the match,
+ * poppler_rectangle_find_get_ignored_hyphen() will return %TRUE for that
+ * rectangle.
+ *
+ * Note that currently matches spanning more than two lines are not found.
+ * (This limitation may be lifted in a future version.)
+ *
+ * Note also that currently finding multi-line matches backwards is not
+ * implemented; if you pass %POPPLER_FIND_BACKWARDS and %POPPLER_FIND_MULTILINE
+ * together, %POPPLER_FIND_MULTILINE will be ignored.
+ *
+ * Return value: (element-type PopplerRectangle) (transfer full): a newly 
allocated list
+ * of newly allocated #PopplerRectangle. Free with g_list_free_full() using 
poppler_rectangle_free().
  *
  * Since: 0.22
  **/
 GList *poppler_page_find_text_with_options(PopplerPage *page, const char 
*text, PopplerFindFlags options)
 {
-    PopplerRectangle *match;
+    PopplerRectangleExtended *match;
     GList *matches;
     double xMin, yMin, xMax, yMax;
+    PDFRectangle continueMatch;
+    bool ignoredHyphen;
     gunichar *ucs4;
     glong ucs4_len;
     double height;
@@ -835,22 +850,46 @@ GList *poppler_page_find_text_with_options(PopplerPage 
*page, const char *text,
     ucs4 = g_utf8_to_ucs4_fast(text, -1, &ucs4_len);
     poppler_page_get_size(page, nullptr, &height);
 
+    const bool multiline = (options & POPPLER_FIND_MULTILINE);
     backwards = options & POPPLER_FIND_BACKWARDS;
     matches = nullptr;
     xMin = 0;
     yMin = backwards ? height : 0;
 
+    continueMatch.x1 = G_MAXDOUBLE; // we use this to detect valid returned 
values
+
     while (text_dev->findText(ucs4, ucs4_len, false, true, // startAtTop, 
stopAtBottom
                               start_at_last,
                               false, // stopAtLast
-                              options & POPPLER_FIND_CASE_SENSITIVE, options & 
POPPLER_FIND_IGNORE_DIACRITICS, backwards, options & 
POPPLER_FIND_WHOLE_WORDS_ONLY, &xMin, &yMin, &xMax, &yMax)) {
-        match = poppler_rectangle_new();
+                              options & POPPLER_FIND_CASE_SENSITIVE, options & 
POPPLER_FIND_IGNORE_DIACRITICS, options & POPPLER_FIND_MULTILINE, backwards, 
options & POPPLER_FIND_WHOLE_WORDS_ONLY, &xMin, &yMin, &xMax, &yMax, 
&continueMatch,
+                              &ignoredHyphen)) {
+        match = poppler_rectangle_extended_new();
         match->x1 = xMin;
         match->y1 = height - yMax;
         match->x2 = xMax;
         match->y2 = height - yMin;
+        match->match_continued = false;
+        match->ignored_hyphen = false;
         matches = g_list_prepend(matches, match);
         start_at_last = TRUE;
+
+        if (continueMatch.x1 != G_MAXDOUBLE) {
+            // received rect for next-line part of a multi-line match, add it.
+            if (multiline) {
+                match->match_continued = true;
+                match->ignored_hyphen = ignoredHyphen;
+                match = poppler_rectangle_extended_new();
+                match->x1 = continueMatch.x1;
+                match->y1 = height - continueMatch.y1;
+                match->x2 = continueMatch.x2;
+                match->y2 = height - continueMatch.y2;
+                match->match_continued = false;
+                match->ignored_hyphen = false;
+                matches = g_list_prepend(matches, match);
+            }
+
+            continueMatch.x1 = G_MAXDOUBLE;
+        }
     }
 
     g_free(ucs4);
@@ -1565,6 +1604,22 @@ void poppler_page_remove_annot(PopplerPage *page, 
PopplerAnnot *annot)
 
 G_DEFINE_BOXED_TYPE(PopplerRectangle, poppler_rectangle, 
poppler_rectangle_copy, poppler_rectangle_free)
 
+static PopplerRectangleExtended *poppler_rectangle_extended_new()
+{
+    return g_slice_new0(PopplerRectangleExtended);
+}
+
+PopplerRectangle *poppler_rectangle_new_from_pdf_rectangle(const PDFRectangle 
*rect)
+{
+    auto r = poppler_rectangle_extended_new();
+    r->x1 = rect->x1;
+    r->y1 = rect->y1;
+    r->x2 = rect->x2;
+    r->y2 = rect->y2;
+
+    return reinterpret_cast<PopplerRectangle *>(r);
+}
+
 /**
  * poppler_rectangle_new:
  *
@@ -1574,36 +1629,95 @@ G_DEFINE_BOXED_TYPE(PopplerRectangle, 
poppler_rectangle, poppler_rectangle_copy,
  */
 PopplerRectangle *poppler_rectangle_new(void)
 {
-    return g_slice_new0(PopplerRectangle);
+    return reinterpret_cast<PopplerRectangle 
*>(poppler_rectangle_extended_new());
 }
 
 /**
  * poppler_rectangle_copy:
  * @rectangle: a #PopplerRectangle to copy
  *
- * Creates a copy of @rectangle
+ * Creates a copy of @rectangle.
  *
+ * Note that you must only use this function on an allocated PopplerRectangle, 
as
+ * returned by poppler_rectangle_new(), poppler_rectangle_copy(), or the list 
elements
+ * returned from poppler_page_find_text() or 
poppler_page_find_text_with_options().
  * Returns: a new allocated copy of @rectangle
  */
 PopplerRectangle *poppler_rectangle_copy(PopplerRectangle *rectangle)
 {
     g_return_val_if_fail(rectangle != nullptr, NULL);
 
-    return g_slice_dup(PopplerRectangle, rectangle);
+    auto ext_rectangle = reinterpret_cast<PopplerRectangleExtended 
*>(rectangle);
+    return reinterpret_cast<PopplerRectangle 
*>(g_slice_dup(PopplerRectangleExtended, ext_rectangle));
 }
 
 /**
  * poppler_rectangle_free:
  * @rectangle: a #PopplerRectangle
  *
- * Frees the given #PopplerRectangle
+ * Frees the given #PopplerRectangle.
+ *
+ * Note that you must only use this function on an allocated PopplerRectangle, 
as
+ * returned by poppler_rectangle_new(), poppler_rectangle_copy(), or the list 
elements
+ * returned from poppler_page_find_text() or 
poppler_page_find_text_with_options().
  */
 void poppler_rectangle_free(PopplerRectangle *rectangle)
 {
     g_slice_free(PopplerRectangle, rectangle);
 }
 
-/* PopplerPoint type */
+/**
+ * poppler_rectangle_find_get_match_continued:
+ * @rectangle: a #PopplerRectangle
+ *
+ * When using poppler_page_find_text_with_options() with the
+ * %POPPLER_FIND_MULTILINE flag, a match may span more than one line
+ * and thus consist of more than one rectangle. Every rectangle belonging
+ * to the same match will return %TRUE from this function, except for
+ * the last rectangle, where this function will return %FALSE.
+ *
+ * Note that you must only call this function on a #PopplerRectangle
+ * returned in the list from poppler_page_find_text() or
+ * poppler_page_find_text_with_options().
+ *
+ * Returns: whether there are more rectangles belonging to the same match
+ *
+ * Since: 21.05.0
+ */
+gboolean poppler_rectangle_find_get_match_continued(const PopplerRectangle 
*rectangle)
+{
+    g_return_val_if_fail(rectangle != nullptr, false);
+
+    auto ext_rectangle = reinterpret_cast<const PopplerRectangleExtended 
*>(rectangle);
+    return ext_rectangle->match_continued;
+}
+
+/**
+ * poppler_rectangle_find_get_ignored_hyphen:
+ * @rectangle: a #PopplerRectangle
+ *
+ * When using poppler_page_find_text_with_options() with the
+ * %POPPLER_FIND_MULTILINE flag, a match may span more than one line,
+ * and may have been formed by ignoring a hyphen at the end of the line.
+ * When this happens at the end of the line corresponding to @rectangle,
+ * this function returns %TRUE (and then 
poppler_rectangle_find_get_match_continued()
+ * will also return %TRUE); otherwise it returns %FALSE.
+ *
+ * Note that you must only call this function on a #PopplerRectangle
+ * returned in the list from poppler_page_find_text() or
+ * poppler_page_find_text_with_options().
+ *
+ * Returns: whether a hyphen was ignored at the end of the line corresponding 
to @rectangle.
+ *
+ * Since: 21.05.0
+ */
+gboolean poppler_rectangle_find_get_ignored_hyphen(const PopplerRectangle 
*rectangle)
+{
+    g_return_val_if_fail(rectangle != nullptr, false);
+
+    auto ext_rectangle = reinterpret_cast<const PopplerRectangleExtended 
*>(rectangle);
+    return ext_rectangle->ignored_hyphen;
+}
 
 G_DEFINE_BOXED_TYPE(PopplerPoint, poppler_point, poppler_point_copy, 
poppler_point_free)
 
diff --git a/glib/poppler-page.h b/glib/poppler-page.h
index 95b0cf9c..2d037d8e 100644
--- a/glib/poppler-page.h
+++ b/glib/poppler-page.h
@@ -140,6 +140,10 @@ POPPLER_PUBLIC
 PopplerRectangle *poppler_rectangle_copy(PopplerRectangle *rectangle);
 POPPLER_PUBLIC
 void poppler_rectangle_free(PopplerRectangle *rectangle);
+POPPLER_PUBLIC
+gboolean poppler_rectangle_find_get_match_continued(const PopplerRectangle 
*rectangle);
+POPPLER_PUBLIC
+gboolean poppler_rectangle_find_get_ignored_hyphen(const PopplerRectangle 
*rectangle);
 
 /* A point on a page, with coordinates in PDF points. */
 #define POPPLER_TYPE_POINT (poppler_point_get_type())
diff --git a/glib/poppler-private.h b/glib/poppler-private.h
index 10272716..02967fbf 100644
--- a/glib/poppler-private.h
+++ b/glib/poppler-private.h
@@ -112,6 +112,25 @@ struct _PopplerStructureElement
     const StructElement *elem;
 };
 
+/*
+ * PopplerRectangleExtended:
+ *
+ * The real type behind the public PopplerRectangle.
+ * Must be ABI compatible to it!
+ */
+typedef struct
+{
+    /*< private >*/
+    double x1;
+    double y1;
+    double x2;
+    double y2;
+    bool match_continued; /* Described in 
poppler_rectangle_find_get_match_continued() */
+    bool ignored_hyphen; /* Described in 
poppler_rectangle_find_get_ignored_hyphen() */
+} PopplerRectangleExtended;
+
+PopplerRectangle *poppler_rectangle_new_from_pdf_rectangle(const PDFRectangle 
*rect);
+
 GList *_poppler_document_get_layers(PopplerDocument *document);
 GList *_poppler_document_get_layer_rbgroup(PopplerDocument *document, Layer 
*layer);
 PopplerPage *_poppler_page_new(PopplerDocument *document, Page *page, int 
index);
diff --git a/glib/poppler.h b/glib/poppler.h
index 5692c28d..35a3bfd4 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -157,6 +157,10 @@ typedef enum /*< flags >*/
  * @POPPLER_FIND_IGNORE_DIACRITICS: do diacritics insensitive search,
  * i.e. ignore accents, umlauts, diaeresis,etc. while matching. This
  * option will be ignored if the search term is not pure ascii. Since 0.73.
+ * @POPPLER_FIND_MULTILINE: allows to match on text spanning from
+ * end of a line to the next line. (Currently it won't match on text spanning
+ * more than two lines.) Automatically ignores hyphen at end of line, and
+ * allows whitespace in search term to match on newline char. Since: 21.05.0.
  *
  * Flags using while searching text in a page
  *
@@ -168,7 +172,8 @@ typedef enum /*< flags >*/
     POPPLER_FIND_CASE_SENSITIVE = 1 << 0,
     POPPLER_FIND_BACKWARDS = 1 << 1,
     POPPLER_FIND_WHOLE_WORDS_ONLY = 1 << 2,
-    POPPLER_FIND_IGNORE_DIACRITICS = 1 << 3
+    POPPLER_FIND_IGNORE_DIACRITICS = 1 << 3,
+    POPPLER_FIND_MULTILINE = 1 << 4
 } PopplerFindFlags;
 
 typedef struct _PopplerDocument PopplerDocument;
diff --git a/glib/reference/poppler-sections.txt 
b/glib/reference/poppler-sections.txt
index b6f8ecc9..8ad07b09 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -92,6 +92,8 @@ poppler_quadrilateral_copy
 poppler_quadrilateral_free
 poppler_quadrilateral_new
 poppler_rectangle_copy
+poppler_rectangle_find_get_match_continued
+poppler_rectangle_find_get_ignored_hyphen
 poppler_rectangle_free
 poppler_rectangle_new
 poppler_text_attributes_copy
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 956c1328..7bc1b920 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -3798,22 +3798,62 @@ void TextPage::coalesce(bool physLayout, double 
fixedPitch, bool doHTML)
 #endif
 }
 
+void TextPage::adjustRotation(TextLine *line, int start, int end, double 
*xMin, double *xMax, double *yMin, double *yMax)
+{
+    switch (line->rot) {
+    case 0:
+        *xMin = line->edge[start];
+        *xMax = line->edge[end];
+        *yMin = line->yMin;
+        *yMax = line->yMax;
+        break;
+    case 1:
+        *xMin = line->xMin;
+        *xMax = line->xMax;
+        *yMin = line->edge[start];
+        *yMax = line->edge[end];
+        break;
+    case 2:
+        *xMin = line->edge[end];
+        *xMax = line->edge[start];
+        *yMin = line->yMin;
+        *yMax = line->yMax;
+        break;
+    case 3:
+        *xMin = line->xMin;
+        *xMax = line->xMax;
+        *yMin = line->edge[end];
+        *yMax = line->edge[start];
+        break;
+    }
+}
+
 bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool 
stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool 
backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double 
*yMax)
 {
-    return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, 
caseSensitive, false, backward, wholeWord, xMin, yMin, xMax, yMax);
+    return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, 
caseSensitive, false, false, backward, wholeWord, xMin, yMin, xMax, yMax, 
nullptr, nullptr);
 }
 
 bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool 
stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool 
ignoreDiacritics, bool backward, bool wholeWord, double *xMin, double *yMin, 
double *xMax,
                         double *yMax)
+{
+    return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, 
caseSensitive, ignoreDiacritics, false, backward, wholeWord, xMin, yMin, xMax, 
yMax, nullptr, nullptr);
+}
+
+bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool 
stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool 
ignoreDiacritics, bool matchAcrossLines, bool backward, bool wholeWord, double 
*xMin,
+                        double *yMin, double *xMax, double *yMax, PDFRectangle 
*continueMatch, bool *ignoredHyphen)
 {
     TextBlock *blk;
     TextLine *line;
     Unicode *s2, *txt, *reordered;
     Unicode *p;
+    Unicode *nextline;
+    int nextline_len;
+    bool nextlineAfterHyphen = false;
     int txtSize, m, i, j, k;
     double xStart, yStart, xStop, yStop;
     double xMin0, yMin0, xMax0, yMax0;
     double xMin1, yMin1, xMax1, yMax1;
+    double xMin2, yMin2, xMax2, yMax2;
     bool found;
 
     if (len == 0) {
@@ -3824,6 +3864,11 @@ bool TextPage::findText(const Unicode *s, int len, bool 
startAtTop, bool stopAtB
         return false;
     }
 
+    if (matchAcrossLines && backward) {
+        // matchAcrossLines is unimplemented for backward search
+        matchAcrossLines = false;
+    }
+
     // handle right-to-left text
     reordered = (Unicode *)gmallocn(len, sizeof(Unicode));
     reorderText(s, len, nullptr, primaryLR, nullptr, reordered);
@@ -3907,6 +3952,12 @@ bool TextPage::findText(const Unicode *s, int len, bool 
startAtTop, bool stopAtB
 
             if (!line->normalized)
                 line->normalized = unicodeNormalizeNFKC(line->text, line->len, 
&line->normalized_len, &line->normalized_idx, true);
+
+            if (matchAcrossLines && line->next && !line->next->normalized)
+                line->next->normalized = 
unicodeNormalizeNFKC(line->next->text, line->next->len, 
&line->next->normalized_len, &line->next->normalized_idx, true);
+            nextline = nullptr;
+            nextline_len = 0;
+
             // convert the line to uppercase
             m = line->normalized_len;
 
@@ -3917,6 +3968,9 @@ bool TextPage::findText(const Unicode *s, int len, bool 
startAtTop, bool stopAtB
                     m = line->ascii_len;
                 else
                     ignoreDiacritics = false;
+
+                if (matchAcrossLines && line->next && 
!line->next->ascii_translation)
+                    unicodeToAscii7(line->next->normalized, 
line->next->normalized_len, &line->next->ascii_translation, 
&line->next->ascii_len, line->next->normalized_idx, &line->next->ascii_idx);
             }
             if (!caseSensitive) {
                 if (m > txtSize) {
@@ -3929,65 +3983,111 @@ bool TextPage::findText(const Unicode *s, int len, 
bool startAtTop, bool stopAtB
                     else
                         txt[k] = unicodeToUpper(line->normalized[k]);
                 }
+                if (matchAcrossLines && line->next) {
+                    nextline_len = ignoreDiacritics ? line->next->ascii_len : 
line->next->normalized_len;
+                    nextline = (Unicode *)gmallocn(nextline_len, 
sizeof(Unicode));
+                    for (k = 0; k < nextline_len; ++k) {
+                        nextline[k] = ignoreDiacritics ? 
unicodeToUpper(line->next->ascii_translation[k]) : 
unicodeToUpper(line->next->normalized[k]);
+                    }
+                }
             } else {
                 if (ignoreDiacritics)
                     txt = line->ascii_translation;
                 else
                     txt = line->normalized;
+
+                if (matchAcrossLines && line->next) {
+                    nextline_len = ignoreDiacritics ? line->next->ascii_len : 
line->next->normalized_len;
+                    nextline = ignoreDiacritics ? 
line->next->ascii_translation : line->next->normalized;
+                }
             }
 
             // search each position in this line
             j = backward ? m - len : 0;
             p = txt + j;
-            while (backward ? j >= 0 : j <= m - len) {
-                if (!wholeWord || ((j == 0 || !unicodeTypeAlphaNum(txt[j - 
1])) && (j + len == m || !unicodeTypeAlphaNum(txt[j + len])))) {
+            while (backward ? j >= 0 : j <= m - (nextline ? 1 : len)) {
+                bool wholeWordStartIsOk, wholeWordEndIsOk;
+                if (wholeWord) {
+                    wholeWordStartIsOk = j == 0 || !unicodeTypeAlphaNum(txt[j 
- 1]);
+                    if (nextline)
+                        wholeWordEndIsOk = true; // word end may be in next 
line, so we'll check it later
+                    else
+                        wholeWordEndIsOk = j + len == m || 
!unicodeTypeAlphaNum(txt[j + len]);
+                }
+                if (!wholeWord || (wholeWordStartIsOk && wholeWordEndIsOk)) {
+                    int n = 0;
+                    bool spaceConsumedByNewline = false;
+                    bool found_it;
 
                     // compare the strings
                     for (k = 0; k < len; ++k) {
-                        if (p[k] != s2[k]) {
+                        bool last_char_of_line = j + k == m - 1;
+                        bool last_char_of_search_term = k == len - 1;
+
+                        if (p[k] != s2[k] || (nextline && last_char_of_line && 
!last_char_of_search_term)) {
+                            // now check if the comparison failed at the 
end-of-line hyphen,
+                            // and if so, keep on comparing at the next line
+                            nextlineAfterHyphen = false;
+
+                            if (s2[k] == p[k]) {
+                                if (p[k] != (Unicode)'-' && 
!UnicodeIsWhitespace(s2[k + 1])) {
+                                    break;
+                                }
+                                k++;
+                            } else if (p[k] != (Unicode)'-' || 
UnicodeIsWhitespace(s2[k]))
+                                break;
+                            else
+                                nextlineAfterHyphen = true;
+
+                            for (; n < nextline_len && k < len; ++k, ++n) {
+                                if (nextline[n] != s2[k]) {
+                                    if (!spaceConsumedByNewline && !n && 
UnicodeIsWhitespace(s2[k])) {
+                                        n = -1;
+                                        spaceConsumedByNewline = true;
+                                        continue;
+                                    }
+                                    break;
+                                }
+                            }
                             break;
                         }
                     }
 
+                    found_it = k == len;
+                    if (found_it && nextline && wholeWord) { // check word end 
for nextline case
+                        if (n) // Match ended at next line
+                            wholeWordEndIsOk = n == nextline_len || 
!unicodeTypeAlphaNum(nextline[n]);
+                        else // Match ended on same line
+                            wholeWordEndIsOk = j + len == m || 
!unicodeTypeAlphaNum(txt[j + len]);
+
+                        if (!wholeWordEndIsOk)
+                            found_it = false;
+                    }
                     // found it
-                    if (k == len) {
+                    if (found_it) {
+                        bool nextLineMatch = (bool)n;
+                        if (spaceConsumedByNewline)
+                            k--;
                         // where s2 matches a subsequence of a compatibility 
equivalence
                         // decomposition, highlight the entire glyph, since we 
don't know
                         // the internal layout of subglyph components
                         int normStart, normAfterEnd;
                         if (ignoreDiacritics) {
                             normStart = line->ascii_idx[j];
-                            normAfterEnd = line->ascii_idx[j + len - 1] + 1;
+                            if (nextline)
+                                normAfterEnd = line->ascii_idx[j + k - n];
+                            else
+                                normAfterEnd = line->ascii_idx[j + len - 1] + 
1;
                         } else {
                             normStart = line->normalized_idx[j];
-                            normAfterEnd = line->normalized_idx[j + len - 1] + 
1;
-                        }
-                        switch (line->rot) {
-                        case 0:
-                            xMin1 = line->edge[normStart];
-                            xMax1 = line->edge[normAfterEnd];
-                            yMin1 = line->yMin;
-                            yMax1 = line->yMax;
-                            break;
-                        case 1:
-                            xMin1 = line->xMin;
-                            xMax1 = line->xMax;
-                            yMin1 = line->edge[normStart];
-                            yMax1 = line->edge[normAfterEnd];
-                            break;
-                        case 2:
-                            xMin1 = line->edge[normAfterEnd];
-                            xMax1 = line->edge[normStart];
-                            yMin1 = line->yMin;
-                            yMax1 = line->yMax;
-                            break;
-                        case 3:
-                            xMin1 = line->xMin;
-                            xMax1 = line->xMax;
-                            yMin1 = line->edge[normAfterEnd];
-                            yMax1 = line->edge[normStart];
-                            break;
+                            if (nextline)
+                                normAfterEnd = line->normalized_idx[j + k - n];
+                            else
+                                normAfterEnd = line->normalized_idx[j + len - 
1] + 1;
                         }
+
+                        adjustRotation(line, normStart, normAfterEnd, &xMin1, 
&xMax1, &yMin1, &yMax1);
+
                         if (backward) {
                             if ((startAtTop || yMin1 < yStart || (yMin1 == 
yStart && xMin1 < xStart)) && (stopAtBottom || yMin1 > yStop || (yMin1 == yStop 
&& xMin1 > xStop))) {
                                 if (!found || yMin1 > yMin0 || (yMin1 == yMin0 
&& xMin1 > xMin0)) {
@@ -4006,6 +4106,18 @@ bool TextPage::findText(const Unicode *s, int len, bool 
startAtTop, bool stopAtB
                                     yMin0 = yMin1;
                                     yMax0 = yMax1;
                                     found = true;
+                                    if (nextLineMatch) { // set the out 
parameters
+                                        if (ignoredHyphen)
+                                            *ignoredHyphen = 
nextlineAfterHyphen;
+
+                                        if (continueMatch) {
+                                            adjustRotation(line->next, 0, n, 
&xMin2, &xMax2, &yMin2, &yMax2);
+                                            continueMatch->x1 = xMin2;
+                                            continueMatch->y1 = yMax2;
+                                            continueMatch->x2 = xMax2;
+                                            continueMatch->y2 = yMin2;
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -4019,6 +4131,10 @@ bool TextPage::findText(const Unicode *s, int len, bool 
startAtTop, bool stopAtB
                     ++p;
                 }
             }
+
+            if (nextline && nextline != line->next->ascii_translation && 
nextline != line->next->normalized) {
+                gfree(nextline);
+            }
         }
     }
 
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 2c39c67d..9e2d8275 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -613,6 +613,20 @@ public:
     bool findText(const Unicode *s, int len, bool startAtTop, bool 
stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool 
ignoreDiacritics, bool backward, bool wholeWord, double *xMin, double *yMin, 
double *xMax,
                   double *yMax);
 
+    // Adds new parameter <matchAcrossLines>, which allows <s> to match on text
+    // spanning from end of a line to the next line. In that case, the rect for
+    // the part of match that falls on the next line will be stored in
+    // <continueMatch>, and if hyphenation (i.e. ignoring hyphen at end of 
line)
+    // was used while matching at the end of the line prior to <continueMatch>,
+    // then <ignoredHyphen> will be true, otherwise will be false.
+    // Only finding across two lines is supported, i.e. it won't match where 
<s>
+    // spans more than two lines.
+    //
+    // <matchAcrossLines> will be ignored if <backward> is true (as that
+    // combination has not been implemented yet).
+    bool findText(const Unicode *s, int len, bool startAtTop, bool 
stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool 
ignoreDiacritics, bool matchAcrossLines, bool backward, bool wholeWord, double 
*xMin, double *yMin,
+                  double *xMax, double *yMax, PDFRectangle *continueMatch, 
bool *ignoredHyphen);
+
     // Get the text which is inside the specified rectangle.
     GooString *getText(double xMin, double yMin, double xMax, double yMax, 
EndOfLineKind textEOL) const;
 
@@ -656,6 +670,7 @@ private:
     void clear();
     void assignColumns(TextLineFrag *frags, int nFrags, bool rot) const;
     int dumpFragment(const Unicode *text, int len, const UnicodeMap *uMap, 
GooString *s) const;
+    void adjustRotation(TextLine *line, int start, int end, double *xMin, 
double *xMax, double *yMin, double *yMax);
 
     bool rawOrder; // keep text in content stream order
     bool discardDiag; // discard diagonal text
diff --git a/qt5/src/poppler-page-private.h b/qt5/src/poppler-page-private.h
index e1312d44..6f1e668e 100644
--- a/qt5/src/poppler-page-private.h
+++ b/qt5/src/poppler-page-private.h
@@ -49,8 +49,8 @@ public:
     static Link *convertLinkActionToLink(::LinkAction *a, DocumentData 
*parentDoc, const QRectF &linkArea);
 
     TextPage *prepareTextSearch(const QString &text, Page::Rotation rotate, 
QVector<Unicode> *u);
-    bool performSingleTextSearch(TextPage *textPage, QVector<Unicode> &u, 
double &sLeft, double &sTop, double &sRight, double &sBottom, 
Page::SearchDirection direction, bool sCase, bool sWords, bool sDiacritics);
-    QList<QRectF> performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics);
+    bool performSingleTextSearch(TextPage *textPage, QVector<Unicode> &u, 
double &sLeft, double &sTop, double &sRight, double &sBottom, 
Page::SearchDirection direction, bool sCase, bool sWords, bool sDiacritics, 
bool sAcrossLines);
+    QList<QRectF> performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics, bool 
sAcrossLines);
 };
 
 }
diff --git a/qt5/src/poppler-page.cc b/qt5/src/poppler-page.cc
index c433ada4..bae438b7 100644
--- a/qt5/src/poppler-page.cc
+++ b/qt5/src/poppler-page.cc
@@ -50,6 +50,7 @@
 #include <QtGui/QPainter>
 
 #include <config.h>
+#include <cfloat>
 #include <poppler-config.h>
 #include <PDFDoc.h>
 #include <Catalog.h>
@@ -359,24 +360,28 @@ inline TextPage *PageData::prepareTextSearch(const 
QString &text, Page::Rotation
     return textPage;
 }
 
-inline bool PageData::performSingleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double 
&sBottom, Page::SearchDirection direction, bool sCase, bool sWords, bool 
sDiacritics = false)
+inline bool PageData::performSingleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double 
&sBottom, Page::SearchDirection direction, bool sCase, bool sWords, bool 
sDiacritics,
+                                              bool sAcrossLines)
 {
     if (direction == Page::FromTop)
-        return textPage->findText(u.data(), u.size(), true, true, false, 
false, sCase, sDiacritics, false, sWords, &sLeft, &sTop, &sRight, &sBottom);
+        return textPage->findText(u.data(), u.size(), true, true, false, 
false, sCase, sDiacritics, sAcrossLines, false, sWords, &sLeft, &sTop, &sRight, 
&sBottom, nullptr, nullptr);
     else if (direction == Page::NextResult)
-        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, false, sWords, &sLeft, &sTop, &sRight, &sBottom);
+        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, sAcrossLines, false, sWords, &sLeft, &sTop, &sRight, 
&sBottom, nullptr, nullptr);
     else if (direction == Page::PreviousResult)
-        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, true, sWords, &sLeft, &sTop, &sRight, &sBottom);
+        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, sAcrossLines, true, sWords, &sLeft, &sTop, &sRight, 
&sBottom, nullptr, nullptr);
 
     return false;
 }
 
-inline QList<QRectF> PageData::performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics = false)
+inline QList<QRectF> PageData::performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics, bool 
sAcrossLines)
 {
     QList<QRectF> results;
     double sLeft = 0.0, sTop = 0.0, sRight = 0.0, sBottom = 0.0;
+    bool sIgnoredHyphen = false;
+    PDFRectangle continueMatch;
+    continueMatch.x1 = DBL_MAX; // we use this to detect valid return values
 
-    while (textPage->findText(u.data(), u.size(), false, true, true, false, 
sCase, sDiacritics, false, sWords, &sLeft, &sTop, &sRight, &sBottom)) {
+    while (textPage->findText(u.data(), u.size(), false, true, true, false, 
sCase, sDiacritics, sAcrossLines, false, sWords, &sLeft, &sTop, &sRight, 
&sBottom, &continueMatch, &sIgnoredHyphen)) {
         QRectF result;
 
         result.setLeft(sLeft);
@@ -385,6 +390,18 @@ inline QList<QRectF> 
PageData::performMultipleTextSearch(TextPage *textPage, QVe
         result.setBottom(sBottom);
 
         results.append(result);
+
+        if (sAcrossLines && continueMatch.x1 != DBL_MAX) {
+            QRectF resultN;
+
+            resultN.setLeft(continueMatch.x1);
+            resultN.setTop(continueMatch.y1);
+            resultN.setRight(continueMatch.x2);
+            resultN.setBottom(continueMatch.y1);
+
+            results.append(resultN);
+            continueMatch.x1 = DBL_MAX;
+        }
     }
 
     return results;
@@ -647,7 +664,7 @@ bool Page::search(const QString &text, double &sLeft, 
double &sTop, double &sRig
     QVector<Unicode> u;
     TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u);
 
-    const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, 
sTop, sRight, sBottom, direction, sCase, false);
+    const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, 
sTop, sRight, sBottom, direction, sCase, false, false, false);
 
     textPage->decRefCnt();
 
@@ -659,11 +676,12 @@ bool Page::search(const QString &text, double &sLeft, 
double &sTop, double &sRig
     const bool sCase = flags.testFlag(IgnoreCase) ? false : true;
     const bool sWords = flags.testFlag(WholeWords) ? true : false;
     const bool sDiacritics = flags.testFlag(IgnoreDiacritics) ? true : false;
+    const bool sAcrossLines = flags.testFlag(AcrossLines) ? true : false;
 
     QVector<Unicode> u;
     TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u);
 
-    const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, 
sTop, sRight, sBottom, direction, sCase, sWords, sDiacritics);
+    const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, 
sTop, sRight, sBottom, direction, sCase, sWords, sDiacritics, sAcrossLines);
 
     textPage->decRefCnt();
 
@@ -677,7 +695,7 @@ QList<QRectF> Page::search(const QString &text, SearchMode 
caseSensitive, Rotati
     QVector<Unicode> u;
     TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u);
 
-    const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, 
u, sCase, false);
+    const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, 
u, sCase, false, false, false);
 
     textPage->decRefCnt();
 
@@ -689,11 +707,12 @@ QList<QRectF> Page::search(const QString &text, 
SearchFlags flags, Rotation rota
     const bool sCase = flags.testFlag(IgnoreCase) ? false : true;
     const bool sWords = flags.testFlag(WholeWords) ? true : false;
     const bool sDiacritics = flags.testFlag(IgnoreDiacritics) ? true : false;
+    const bool sAcrossLines = flags.testFlag(AcrossLines) ? true : false;
 
     QVector<Unicode> u;
     TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u);
 
-    const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, 
u, sCase, sWords, sDiacritics);
+    const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, 
u, sCase, sWords, sDiacritics, sAcrossLines);
 
     textPage->decRefCnt();
 
diff --git a/qt5/src/poppler-qt5.h b/qt5/src/poppler-qt5.h
index 70d2566a..d2c55b3d 100644
--- a/qt5/src/poppler-qt5.h
+++ b/qt5/src/poppler-qt5.h
@@ -763,9 +763,12 @@ rather unexpected results.
         NoSearchFlags = 0x00000000, ///< since 0.63
         IgnoreCase = 0x00000001, ///< Case differences are ignored
         WholeWords = 0x00000002, ///< Only whole words are matched
-        IgnoreDiacritics = 0x00000004 ///< Diacritic differences (eg. accents, 
umlauts, diaeresis) are ignored. \since 0.73
-                                      ///< This option will have no effect if 
the search term contains characters which
-                                      ///< are not pure ascii.
+        IgnoreDiacritics = 0x00000004, ///< Diacritic differences (eg. 
accents, umlauts, diaeresis) are ignored. \since 0.73
+                                       ///< This option will have no effect if 
the search term contains characters which
+                                       ///< are not pure ascii.
+        AcrossLines = 0x00000008 ///< Allows to match on text spanning from 
end of a line to the next line.
+                                 ///< It won't match on text spanning more 
than two lines. Automatically ignores hyphen
+                                 ///< at end of line, and allows whitespace in 
search term to match on newline. \since 21.05.0
     };
     Q_DECLARE_FLAGS(SearchFlags, SearchFlag)
 
@@ -812,6 +815,9 @@ rather unexpected results.
     /**
        Returns a list of all occurrences of the specified text on the page.
 
+       if SearchFlags::AcrossLines is given in \param flags, then rects may 
just
+       be parts of the text itself if it's split between multiple lines.
+
        \param text the text to search
        \param flags the flags to consider during matching
        \param rotate the rotation to apply for the search order
diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp
index 56cb53fc..7b379ad0 100644
--- a/qt5/tests/check_search.cpp
+++ b/qt5/tests/check_search.cpp
@@ -2,12 +2,15 @@
 
 #include <poppler-qt5.h>
 
+// clazy:excludeall=qstring-allocations
+
 class TestSearch : public QObject
 {
     Q_OBJECT
 public:
     TestSearch(QObject *parent = nullptr) : QObject(parent) { }
 private slots:
+    void testAcrossLinesSearch(); // leave it first
     void bug7063();
     void testNextAndPrevious();
     void testWholeWordsOnly();
@@ -33,12 +36,12 @@ void TestSearch::bug7063()
 
     QCOMPARE(page->search(QStringLiteral(u"latin1:"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), false);
 
-    QCOMPARE(page->search(QString::fromUtf8("é"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("à"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("ç"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("search \"é\", \"à\" or \"ç\""), 
rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), false); // 
clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(QString::fromUtf8("é"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("à"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("ç"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("search \"é\", \"à\" or \"ç\""), 
rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), false);
 
     QCOMPARE(page->search(QStringLiteral(u"non-ascii:"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
 
@@ -47,12 +50,12 @@ void TestSearch::bug7063()
 
     QCOMPARE(page->search(QStringLiteral(u"latin1:"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), false);
 
-    QCOMPARE(page->search(QString::fromUtf8("é"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("à"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("ç"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("search \"é\", \"à\" or \"ç\""), 
rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true); // 
clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), false); // 
clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(QString::fromUtf8("é"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("à"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("ç"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("search \"é\", \"à\" or \"ç\""), 
rectLeft, rectTop, rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("¥µ©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), true);
+    QCOMPARE(page->search(QString::fromUtf8("¥©"), rectLeft, rectTop, 
rectRight, rectBottom, Poppler::Page::FromTop), false);
 }
 
 void TestSearch::testNextAndPrevious()
@@ -201,17 +204,17 @@ void TestSearch::testIgnoreDiacritics()
     QCOMPARE(page->search(QStringLiteral("ciguena"), left, top, right, bottom, 
direction, mode0), false);
     QCOMPARE(page->search(QStringLiteral("Ciguena"), left, top, right, bottom, 
direction, mode1), false);
     QCOMPARE(page->search(QStringLiteral("ciguena"), left, top, right, bottom, 
direction, mode1), true);
-    QCOMPARE(page->search(QString::fromUtf8("cigüeña"), left, top, right, 
bottom, direction, mode1), true); // clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("cigüena"), left, top, right, 
bottom, direction, mode1), false); // clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("Cigüeña"), left, top, right, 
bottom, direction, mode1), false); // clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(QString::fromUtf8("cigüeña"), left, top, right, 
bottom, direction, mode1), true);
+    QCOMPARE(page->search(QString::fromUtf8("cigüena"), left, top, right, 
bottom, direction, mode1), false);
+    QCOMPARE(page->search(QString::fromUtf8("Cigüeña"), left, top, right, 
bottom, direction, mode1), false);
     QCOMPARE(page->search(QStringLiteral("Ciguena"), left, top, right, bottom, 
direction, mode2), true);
     QCOMPARE(page->search(QStringLiteral("ciguena"), left, top, right, bottom, 
direction, mode2), true);
     QCOMPARE(page->search(QStringLiteral("Ciguena"), left, top, right, bottom, 
direction, mode3), true);
     QCOMPARE(page->search(QStringLiteral("ciguena"), left, top, right, bottom, 
direction, mode3), true);
 
-    QCOMPARE(page->search(QString::fromUtf8("cigüeña"), left, top, right, 
bottom, direction, mode4), true); // clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("Cigüeña"), left, top, right, 
bottom, direction, mode4), true); // clazy:exclude=qstring-allocations
-    QCOMPARE(page->search(QString::fromUtf8("cigüena"), left, top, right, 
bottom, direction, mode4), false); // clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(QString::fromUtf8("cigüeña"), left, top, right, 
bottom, direction, mode4), true);
+    QCOMPARE(page->search(QString::fromUtf8("Cigüeña"), left, top, right, 
bottom, direction, mode4), true);
+    QCOMPARE(page->search(QString::fromUtf8("cigüena"), left, top, right, 
bottom, direction, mode4), false);
     QCOMPARE(page->search(QStringLiteral("Ciguena"), left, top, right, bottom, 
direction, mode4), false);
 
     QCOMPARE(page->search(QStringLiteral("kopfe"), left, top, right, bottom, 
direction, mode2), true);
@@ -253,7 +256,7 @@ void TestSearch::testRussianSearch()
     double l, t, r, b; // left, top, right, bottom
 
     // In the searched page 5, these two words do exist: простой and Простой
-    const QString str = QString::fromUtf8("простой"); // 
clazy:exclude=qstring-allocations
+    const QString str = QString::fromUtf8("простой");
     QCOMPARE(page->search(str, l, t, r, b, direction, mode0), true);
     QCOMPARE(page->search(str, l, t, r, b, direction, mode1), true);
     QCOMPARE(page->search(str, l, t, r, b, direction, mode2), true);
@@ -272,12 +275,95 @@ void TestSearch::testDeseretSearch()
 
     double l, t, r, b; // left, top, right, bottom
 
-    const QString str = QString::fromUtf8("𐐐𐐯𐑊𐐬"); // 
clazy:exclude=qstring-allocations
+    const QString str = QString::fromUtf8("𐐐𐐯𐑊𐐬");
     QCOMPARE(page->search(str, l, t, r, b, Poppler::Page::FromTop, 
Poppler::Page::NoSearchFlags), true);
 
-    const QString str2 = QString::fromUtf8("𐐸𐐯𐑊𐐬"); // 
clazy:exclude=qstring-allocations
+    const QString str2 = QString::fromUtf8("𐐸𐐯𐑊𐐬");
     QCOMPARE(page->search(str2, l, t, r, b, Poppler::Page::FromTop, 
Poppler::Page::IgnoreCase), true);
 }
 
+void TestSearch::testAcrossLinesSearch()
+{
+    // Test for searching across lines with new flag Poppler::Page::AcrossLines
+    // and its automatic features like ignoring hyphen at end of line or 
allowing
+    // whitespace in the search term to match on newline character.
+    QScopedPointer<Poppler::Document> 
document(Poppler::Document::load(TESTDATADIR 
"/unittestcases/searchAcrossLines.pdf"));
+    QVERIFY(document);
+
+    QScopedPointer<Poppler::Page> page(document->page(1));
+    QVERIFY(page);
+
+    const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop;
+
+    const Poppler::Page::SearchFlags empty = Poppler::Page::NoSearchFlags;
+    const Poppler::Page::SearchFlags mode0 = Poppler::Page::AcrossLines;
+    const Poppler::Page::SearchFlags mode1 = Poppler::Page::AcrossLines | 
Poppler::Page::IgnoreDiacritics;
+    const Poppler::Page::SearchFlags mode2 = Poppler::Page::AcrossLines | 
Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase;
+    const Poppler::Page::SearchFlags mode2W = mode2 | 
Poppler::Page::WholeWords;
+
+    double l, t, r, b; // left, top, right, bottom
+
+    // In the searched page, each of "re-conocimiento" "PRUE-BA" "imáge-nes" 
happen split across lines
+    const QString str1 = QString::fromUtf8("reconocimiento");
+    const QString str2 = QString::fromUtf8("IMagenes");
+    // Test it cannot be found with empty search flags
+    QCOMPARE(page->search(str1, l, t, r, b, direction, empty), false);
+    // Test it is found with AcrossLines option
+    QCOMPARE(page->search(str1, l, t, r, b, direction, mode0), true);
+    // Test AcrossLines with IgnoreDiacritics and IgnoreCase options
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode0), false);
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode1), false);
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode2), true);
+    // Test with WholeWords too
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode2W), true);
+
+    // Now test that AcrossLines also allows whitespace in the search term to 
match on newline char.
+    // In the searched page, "podrá" ends a line and "acordar" starts the next 
line, so we
+    // now test we match it with "podrá acordar"
+    const QString str3 = QString::fromUtf8("podrá acordar,");
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode2W), true);
+    // now test it also works with IgnoreDiacritics and IgnoreCase
+    const QString str4 = QString::fromUtf8("PODRA acordar");
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode0), false);
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode1), false);
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode2W), false); // 
false as it lacks ending comma
+
+    // Now test that when a hyphen char in the search term matches a hyphen at 
end of line,
+    // then we don't automatically ignore it, but treat it as a normal char.
+    // In the searched page, "CC BY-NC-SA 4.0" is split across two lines on 
the second hyphen
+    const QString str5 = QString::fromUtf8("CC BY-NC-SA 4.0");
+    QScopedPointer<Poppler::Page> page0(document->page(0));
+    QVERIFY(page0);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode2W), true);
+    QCOMPARE(page0->search(QString::fromUtf8("NC-SA"), l, t, r, b, direction, 
mode2W), false);
+    // Searching for "CC BY-NCSA 4.0" should also match, because hyphen is now 
ignored at end of line
+    const QString str6 = QString::fromUtf8("CC BY-NCSA 4.0");
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2W), true);
+
+    // Now for completeness, we will match the full text of two lines
+    const QString full2lines = QString::fromUtf8("Las pruebas se practicarán 
en vista pública, si bien, excepcionalmente, el Tribunal podrá acordar, 
mediante providencia, que determinadas pruebas se celebren fuera del acto de 
juicio");
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2W), true);
+    // And now the full text of two lines split by a hyphenated word
+    const QString full2linesHyphenated = QString::fromUtf8("Consiste 
básicamente en información digitalizada, codificados y alojados en un elemento 
contenedor digital (equipos, dispositivos periféricos, unidades de memoria, 
unidades "
+                                                           "virtualizadas, 
tramas");
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode0), 
true);
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), 
true);
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), 
true);
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, 
mode2W), true);
+}
+
 QTEST_GUILESS_MAIN(TestSearch)
 #include "check_search.moc"
diff --git a/qt6/src/poppler-page-private.h b/qt6/src/poppler-page-private.h
index e1312d44..6f1e668e 100644
--- a/qt6/src/poppler-page-private.h
+++ b/qt6/src/poppler-page-private.h
@@ -49,8 +49,8 @@ public:
     static Link *convertLinkActionToLink(::LinkAction *a, DocumentData 
*parentDoc, const QRectF &linkArea);
 
     TextPage *prepareTextSearch(const QString &text, Page::Rotation rotate, 
QVector<Unicode> *u);
-    bool performSingleTextSearch(TextPage *textPage, QVector<Unicode> &u, 
double &sLeft, double &sTop, double &sRight, double &sBottom, 
Page::SearchDirection direction, bool sCase, bool sWords, bool sDiacritics);
-    QList<QRectF> performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics);
+    bool performSingleTextSearch(TextPage *textPage, QVector<Unicode> &u, 
double &sLeft, double &sTop, double &sRight, double &sBottom, 
Page::SearchDirection direction, bool sCase, bool sWords, bool sDiacritics, 
bool sAcrossLines);
+    QList<QRectF> performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics, bool 
sAcrossLines);
 };
 
 }
diff --git a/qt6/src/poppler-page.cc b/qt6/src/poppler-page.cc
index f1f4116c..c561377d 100644
--- a/qt6/src/poppler-page.cc
+++ b/qt6/src/poppler-page.cc
@@ -50,6 +50,7 @@
 #include <QtGui/QPainter>
 
 #include <config.h>
+#include <cfloat>
 #include <poppler-config.h>
 #include <PDFDoc.h>
 #include <Catalog.h>
@@ -359,24 +360,28 @@ inline TextPage *PageData::prepareTextSearch(const 
QString &text, Page::Rotation
     return textPage;
 }
 
-inline bool PageData::performSingleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double 
&sBottom, Page::SearchDirection direction, bool sCase, bool sWords, bool 
sDiacritics = false)
+inline bool PageData::performSingleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, double &sLeft, double &sTop, double &sRight, double 
&sBottom, Page::SearchDirection direction, bool sCase, bool sWords, bool 
sDiacritics,
+                                              bool sAcrossLines)
 {
     if (direction == Page::FromTop)
-        return textPage->findText(u.data(), u.size(), true, true, false, 
false, sCase, sDiacritics, false, sWords, &sLeft, &sTop, &sRight, &sBottom);
+        return textPage->findText(u.data(), u.size(), true, true, false, 
false, sCase, sDiacritics, sAcrossLines, false, sWords, &sLeft, &sTop, &sRight, 
&sBottom, nullptr, nullptr);
     else if (direction == Page::NextResult)
-        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, false, sWords, &sLeft, &sTop, &sRight, &sBottom);
+        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, sAcrossLines, false, sWords, &sLeft, &sTop, &sRight, 
&sBottom, nullptr, nullptr);
     else if (direction == Page::PreviousResult)
-        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, true, sWords, &sLeft, &sTop, &sRight, &sBottom);
+        return textPage->findText(u.data(), u.size(), false, true, true, 
false, sCase, sDiacritics, sAcrossLines, true, sWords, &sLeft, &sTop, &sRight, 
&sBottom, nullptr, nullptr);
 
     return false;
 }
 
-inline QList<QRectF> PageData::performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics = false)
+inline QList<QRectF> PageData::performMultipleTextSearch(TextPage *textPage, 
QVector<Unicode> &u, bool sCase, bool sWords, bool sDiacritics, bool 
sAcrossLines)
 {
     QList<QRectF> results;
     double sLeft = 0.0, sTop = 0.0, sRight = 0.0, sBottom = 0.0;
+    bool sIgnoredHyphen = false;
+    PDFRectangle continueMatch;
+    continueMatch.x1 = DBL_MAX; // we use this to detect valid return values
 
-    while (textPage->findText(u.data(), u.size(), false, true, true, false, 
sCase, sDiacritics, false, sWords, &sLeft, &sTop, &sRight, &sBottom)) {
+    while (textPage->findText(u.data(), u.size(), false, true, true, false, 
sCase, sDiacritics, sAcrossLines, false, sWords, &sLeft, &sTop, &sRight, 
&sBottom, &continueMatch, &sIgnoredHyphen)) {
         QRectF result;
 
         result.setLeft(sLeft);
@@ -385,6 +390,18 @@ inline QList<QRectF> 
PageData::performMultipleTextSearch(TextPage *textPage, QVe
         result.setBottom(sBottom);
 
         results.append(result);
+
+        if (sAcrossLines && continueMatch.x1 != DBL_MAX) {
+            QRectF resultN;
+
+            resultN.setLeft(continueMatch.x1);
+            resultN.setTop(continueMatch.y1);
+            resultN.setRight(continueMatch.x2);
+            resultN.setBottom(continueMatch.y1);
+
+            results.append(resultN);
+            continueMatch.x1 = DBL_MAX;
+        }
     }
 
     return results;
@@ -645,11 +662,12 @@ bool Page::search(const QString &text, double &sLeft, 
double &sTop, double &sRig
     const bool sCase = flags.testFlag(IgnoreCase) ? false : true;
     const bool sWords = flags.testFlag(WholeWords) ? true : false;
     const bool sDiacritics = flags.testFlag(IgnoreDiacritics) ? true : false;
+    const bool sAcrossLines = flags.testFlag(AcrossLines) ? true : false;
 
     QVector<Unicode> u;
     TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u);
 
-    const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, 
sTop, sRight, sBottom, direction, sCase, sWords, sDiacritics);
+    const bool found = m_page->performSingleTextSearch(textPage, u, sLeft, 
sTop, sRight, sBottom, direction, sCase, sWords, sDiacritics, sAcrossLines);
 
     textPage->decRefCnt();
 
@@ -661,11 +679,12 @@ QList<QRectF> Page::search(const QString &text, 
SearchFlags flags, Rotation rota
     const bool sCase = flags.testFlag(IgnoreCase) ? false : true;
     const bool sWords = flags.testFlag(WholeWords) ? true : false;
     const bool sDiacritics = flags.testFlag(IgnoreDiacritics) ? true : false;
+    const bool sAcrossLines = flags.testFlag(AcrossLines) ? true : false;
 
     QVector<Unicode> u;
     TextPage *textPage = m_page->prepareTextSearch(text, rotate, &u);
 
-    const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, 
u, sCase, sWords, sDiacritics);
+    const QList<QRectF> results = m_page->performMultipleTextSearch(textPage, 
u, sCase, sWords, sDiacritics, sAcrossLines);
 
     textPage->decRefCnt();
 
diff --git a/qt6/src/poppler-qt6.h b/qt6/src/poppler-qt6.h
index 5d38fbdd..c069e393 100644
--- a/qt6/src/poppler-qt6.h
+++ b/qt6/src/poppler-qt6.h
@@ -730,9 +730,12 @@ rather unexpected results.
         NoSearchFlags = 0x00000000,
         IgnoreCase = 0x00000001, ///< Case differences are ignored
         WholeWords = 0x00000002, ///< Only whole words are matched
-        IgnoreDiacritics = 0x00000004 ///< Diacritic differences (eg. accents, 
umlauts, diaeresis) are ignored.
-                                      ///< This option will have no effect if 
the search term contains characters which
-                                      ///< are not pure ascii.
+        IgnoreDiacritics = 0x00000004, ///< Diacritic differences (eg. 
accents, umlauts, diaeresis) are ignored.
+                                       ///< This option will have no effect if 
the search term contains characters which
+                                       ///< are not pure ascii.
+        AcrossLines = 0x00000008 ///< Allows to match on text spanning from 
end of a line to the next line.
+                                 ///< It won't match on text spanning more 
than two lines. Automatically ignores hyphen
+                                 ///< at end of line, and allows whitespace in 
search term to match on newline. \since 21.05.0
     };
     Q_DECLARE_FLAGS(SearchFlags, SearchFlag)
 
@@ -751,6 +754,9 @@ rather unexpected results.
     /**
        Returns a list of all occurrences of the specified text on the page.
 
+       if SearchFlags::AcrossLines is given in \param flags, then rects may 
just
+       be parts of the text itself if it's split between multiple lines.
+
        \param text the text to search
        \param flags the flags to consider during matching
        \param rotate the rotation to apply for the search order
diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp
index c2ba3993..d8534975 100644
--- a/qt6/tests/check_search.cpp
+++ b/qt6/tests/check_search.cpp
@@ -8,6 +8,7 @@ class TestSearch : public QObject
 public:
     TestSearch(QObject *parent = nullptr) : QObject(parent) { }
 private slots:
+    void testAcrossLinesSearch(); // leave it first
     void bug7063();
     void testNextAndPrevious();
     void testWholeWordsOnly();
@@ -279,5 +280,89 @@ void TestSearch::testDeseretSearch()
     QCOMPARE(page->search(str2, l, t, r, b, Poppler::Page::FromTop, 
Poppler::Page::IgnoreCase), true);
 }
 
+void TestSearch::testAcrossLinesSearch()
+{
+    // Test for searching across lines with new flag Poppler::Page::AcrossLines
+    // and its automatic features like ignoring hyphen at end of line or 
allowing
+    // whitespace in the search term to match on newline character.
+    QScopedPointer<Poppler::Document> 
document(Poppler::Document::load(TESTDATADIR 
"/unittestcases/searchAcrossLines.pdf"));
+    QVERIFY(document);
+
+    QScopedPointer<Poppler::Page> page(document->page(1));
+    QVERIFY(page);
+
+    const Poppler::Page::SearchDirection direction = Poppler::Page::FromTop;
+
+    const Poppler::Page::SearchFlags empty = Poppler::Page::NoSearchFlags;
+    const Poppler::Page::SearchFlags mode0 = Poppler::Page::AcrossLines;
+    const Poppler::Page::SearchFlags mode1 = Poppler::Page::AcrossLines | 
Poppler::Page::IgnoreDiacritics;
+    const Poppler::Page::SearchFlags mode2 = Poppler::Page::AcrossLines | 
Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase;
+    const Poppler::Page::SearchFlags mode2W = mode2 | 
Poppler::Page::WholeWords;
+
+    double l, t, r, b; // left, top, right, bottom
+
+    // In the searched page, each of "re-conocimiento" "PRUE-BA" "imáge-nes" 
happen split across lines
+    const QString str1 = QString::fromUtf8("reconocimiento"); // 
clazy:exclude=qstring-allocations
+    const QString str2 = QString::fromUtf8("IMagenes"); // 
clazy:exclude=qstring-allocations
+    // Test it cannot be found with empty search flags
+    QCOMPARE(page->search(str1, l, t, r, b, direction, empty), false);
+    // Test it is found with AcrossLines option
+    QCOMPARE(page->search(str1, l, t, r, b, direction, mode0), true);
+    // Test AcrossLines with IgnoreDiacritics and IgnoreCase options
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode0), false);
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode1), false);
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode2), true);
+    // Test with WholeWords too
+    QCOMPARE(page->search(str2, l, t, r, b, direction, mode2W), true);
+
+    // Now test that AcrossLines also allows whitespace in the search term to 
match on newline char.
+    // In the searched page, "podrá" ends a line and "acordar" starts the next 
line, so we
+    // now test we match it with "podrá acordar"
+    const QString str3 = QString::fromUtf8("podrá acordar,"); // 
clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page->search(str3, l, t, r, b, direction, mode2W), true);
+    // now test it also works with IgnoreDiacritics and IgnoreCase
+    const QString str4 = QString::fromUtf8("PODRA acordar"); // 
clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode0), false);
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode1), false);
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page->search(str4, l, t, r, b, direction, mode2W), false); // 
false as it lacks ending comma
+
+    // Now test that when a hyphen char in the search term matches a hyphen at 
end of line,
+    // then we don't automatically ignore it, but treat it as a normal char.
+    // In the searched page, "CC BY-NC-SA 4.0" is split across two lines on 
the second hyphen
+    const QString str5 = QString::fromUtf8("CC BY-NC-SA 4.0"); // 
clazy:exclude=qstring-allocations
+    QScopedPointer<Poppler::Page> page0(document->page(0));
+    QVERIFY(page0);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page0->search(str5, l, t, r, b, direction, mode2W), true);
+    QCOMPARE(page0->search(QString::fromUtf8("NC-SA"), l, t, r, b, direction, 
mode2W), false); // clazy:exclude=qstring-allocations
+    // Searching for "CC BY-NCSA 4.0" should also match, because hyphen is now 
ignored at end of line
+    const QString str6 = QString::fromUtf8("CC BY-NCSA 4.0"); // 
clazy:exclude=qstring-allocations
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page0->search(str6, l, t, r, b, direction, mode2W), true);
+
+    // Now for completeness, we will match the full text of two lines
+    const QString full2lines = QString::fromUtf8(
+            "Las pruebas se practicarán en vista pública, si bien, 
excepcionalmente, el Tribunal podrá acordar, mediante providencia, que 
determinadas pruebas se celebren fuera del acto de juicio"); // 
clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode0), true);
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode1), true);
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2), true);
+    QCOMPARE(page->search(full2lines, l, t, r, b, direction, mode2W), true);
+    // And now the full text of two lines split by a hyphenated word
+    const QString full2linesHyphenated = QString::fromUtf8("Consiste 
básicamente en información digitalizada, codificados y alojados en un elemento 
contenedor digital (equipos, dispositivos periféricos, unidades de memoria, 
unidades "
+                                                           "virtualizadas, 
tramas"); // clazy:exclude=qstring-allocations
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode0), 
true);
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), 
true);
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), 
true);
+    QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, 
mode2W), true);
+}
+
 QTEST_GUILESS_MAIN(TestSearch)
 #include "check_search.moc"
_______________________________________________
poppler mailing list
poppler@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to