From: Радомир Хаџић <[email protected]>
Date: Fri, 30 Nov 2018 21:56:12 +0100
Subject: Searching for text in PDF files is wrong
To: gtk-app-devel-list <[email protected]>
Hi.
I use poppler_page_find_text() to find text in PDF files. This returns
GList of pointers to PopplerRectangles. Then I use
poppler_page_render_selection() to mark the found text.
What is wrong is that PopplerRectangles returned by
poppler_page_find_text() are incompatible with those that
poppler_page_render_selection() requests, which is why the wrong text
is selected.
I have found that to make those two compatible, I have to do the
following to PopplerRectangles returned by poppler_page_find_text():
1) SWAP(rectangle.x1, rectangle.x2);
2) SWAP(rectangle.y1, rectangle.y2);
3) rectangle.y1 = page_height - rectangle.y1;
4) rectangle.y2 = page_height - rectangle.y2;
But this does not solve the problem because the marked text cycles
between right and wrong again while resizing the window.
I have created a small program that illustrates the problem. Here it
is: https://pastebin.com/h3F56Yv7 (I've also sent an attachment but
last time you didn't get it so this paste is a fallback in case you
don't get it again.)
You ought to supply two arguments when running the program: the
absolute path to a PDF file and the text you want to search for,
respectively. Pay attention to the selected text with and without
lines 54-57.
How can I make the found text to be marked properly? This "workaround"
does not work very well and it is an ugly solution anyway.
/* OS: Arch Linux
* Kernel: Linux 4.19.4
* Compiler: GCC 8.2.1
* GUI toolkit: GTK+ 3.24
* PDF renderer: Poppler 0.71
*
* Compile with "gcc main.c `pkg-config --cflags --libs gtk+-3.0 poppler-glib`"
* Run with "a.out /absolute/path/to/file.pdf text-to-find"
*/
#include <gtk/gtk.h>
#include <poppler.h>
#include <string.h>
#define PAGE 0
#define SWAP(x, y) do {x = x + y;y = x - y;x = x - y;} while (0)
struct Program
{
GtkWidget *window;
GtkWidget *drawing_area;
PopplerDocument *doc;
PopplerPage *page;
GList *selections;
};
// render page and found text onto drawing area
static gboolean render_page(GtkWidget *drawing_area, cairo_t *cr, struct Program *program)
{
PopplerColor fg, bg;
fg.red = 0xffff;
fg.blue = 0xffff;
fg.green = 0xffff;
bg.red = 0x0;
bg.blue = 0x0;
bg.green = 0x0;
double page_width, page_height;
poppler_page_get_size(program->page, &page_width, &page_height);
cairo_set_source_rgb(cr, 1.0, 1.0, 1.0);
cairo_rectangle(cr, 0.0, 0.0, page_width, page_height);
cairo_fill(cr);
poppler_page_render(program->page, cr);
for (GList *item = program->selections;item != NULL;item = item->next)
{
PopplerRectangle *rectangle = (PopplerRectangle *) item->data;
// comment out these lines to get the right selection!
/*
SWAP(rectangle->x1, rectangle->x2);
SWAP(rectangle->y1, rectangle->y2);
rectangle->y1 = page_height - rectangle->y1;
rectangle->y2 = page_height - rectangle->y2;
*/
poppler_page_render_selection(program->page, cr, rectangle, NULL, POPPLER_SELECTION_GLYPH, &fg, &bg);
}
return TRUE;
}
// open document and page
static gboolean open_page(struct Program *program, char *filename)
{
char *filename_uri = (char *) g_malloc(sizeof(char) * (strlen(filename) + strlen("file://") + 1));
strcpy(filename_uri, "file://");
strcat(filename_uri, filename);
program->doc = poppler_document_new_from_file(filename_uri, NULL, NULL);
if (program->doc == NULL)
return FALSE;
program->page = poppler_document_get_page(program->doc , PAGE);
g_free(filename_uri);
return TRUE;
}
// search for text
static void find_text(struct Program *program, char *text)
{
program->selections = poppler_page_find_text(program->page, text);
}
// create and show window
static void create_window(struct Program *program)
{
program->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
gtk_window_set_default_size(GTK_WINDOW(program->window), 400, 600);
g_signal_connect(program->window, "delete-event", G_CALLBACK(gtk_main_quit), NULL);
program->drawing_area = gtk_drawing_area_new();
g_signal_connect(program->drawing_area, "draw", G_CALLBACK(render_page), program);
gtk_container_add(GTK_CONTAINER(program->window), program->drawing_area);
gtk_widget_show_all(program->window);
}
int main(int argc, char **argv)
{
if (argc != 3)
return -1;
gtk_init(&argc, &argv);
struct Program program;
create_window(&program);
if (!open_page(&program, argv[1]))
return -1;
find_text(&program, argv[2]);
gtk_widget_queue_draw(program.drawing_area);
gtk_main();
return 0;
}
_______________________________________________
poppler mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/poppler