From: Радомир Хаџић <[email protected]>
Date: Fri, 30 Nov 2018 21:56:12 +0100
Subject: Searching for text in PDF files is wrong
To: gtk-app-devel-list <[email protected]>

Hi.

I use poppler_page_find_text() to find text in PDF files. This returns
GList of pointers to PopplerRectangles. Then I use
poppler_page_render_selection() to mark the found text.

What is wrong is that PopplerRectangles returned by
poppler_page_find_text() are incompatible with those that
poppler_page_render_selection() requests, which is why the wrong text
is selected.

I have found that to make those two compatible, I have to do the
following to PopplerRectangles returned by poppler_page_find_text():
1) SWAP(rectangle.x1, rectangle.x2);
2) SWAP(rectangle.y1, rectangle.y2);
3) rectangle.y1 = page_height - rectangle.y1;
4) rectangle.y2 = page_height - rectangle.y2;
But this does not solve the problem because the marked text cycles
between right and wrong again while resizing the window.

I have created a small program that illustrates the problem. Here it
is: https://pastebin.com/h3F56Yv7 (I've also sent an attachment but
last time you didn't get it so this paste is a fallback in case you
don't get it again.)
You ought to supply two arguments when running the program: the
absolute path to a PDF file and the text you want to search for,
respectively. Pay attention to the selected text with and without
lines 54-57.

How can I make the found text to be marked properly? This "workaround"
does not work very well and it is an ugly solution anyway.
/* OS: Arch Linux
 * Kernel: Linux 4.19.4
 * Compiler: GCC 8.2.1
 * GUI toolkit: GTK+ 3.24
 * PDF renderer: Poppler 0.71
 *
 * Compile with "gcc main.c `pkg-config --cflags --libs gtk+-3.0 poppler-glib`"
 * Run with "a.out /absolute/path/to/file.pdf text-to-find"
 */

#include <gtk/gtk.h>
#include <poppler.h>
#include <string.h>
#define PAGE 0
#define SWAP(x, y) do {x = x + y;y = x - y;x = x - y;} while (0)

struct Program
{
	GtkWidget *window;
	GtkWidget *drawing_area;
	PopplerDocument *doc;
	PopplerPage *page;
	GList *selections;
};

// render page and found text onto drawing area
static gboolean render_page(GtkWidget *drawing_area, cairo_t *cr, struct Program *program)
{
	PopplerColor fg, bg;

	fg.red = 0xffff;
	fg.blue = 0xffff;
	fg.green = 0xffff;

	bg.red = 0x0;
	bg.blue = 0x0;
	bg.green = 0x0;

	double page_width, page_height;
	poppler_page_get_size(program->page, &page_width, &page_height);

	cairo_set_source_rgb(cr, 1.0, 1.0, 1.0);
	cairo_rectangle(cr, 0.0, 0.0, page_width, page_height);
	cairo_fill(cr);

	poppler_page_render(program->page, cr);

	for (GList *item = program->selections;item != NULL;item = item->next)
	{
		PopplerRectangle *rectangle = (PopplerRectangle *) item->data;

		// comment out these lines to get the right selection!
		/*
		SWAP(rectangle->x1, rectangle->x2);
		SWAP(rectangle->y1, rectangle->y2);
		rectangle->y1 = page_height - rectangle->y1;
		rectangle->y2 = page_height - rectangle->y2;
		*/

		poppler_page_render_selection(program->page, cr, rectangle, NULL, POPPLER_SELECTION_GLYPH, &fg, &bg);
	}

	return TRUE;
}

// open document and page
static gboolean open_page(struct Program *program, char *filename)
{
	char *filename_uri = (char *) g_malloc(sizeof(char) * (strlen(filename) + strlen("file://") + 1));
	strcpy(filename_uri, "file://");
	strcat(filename_uri, filename);

	program->doc = poppler_document_new_from_file(filename_uri, NULL, NULL);
	if (program->doc == NULL)
		return FALSE;

	program->page = poppler_document_get_page(program->doc , PAGE);

	g_free(filename_uri);

	return TRUE;
}

// search for text
static void find_text(struct Program *program, char *text)
{
	program->selections = poppler_page_find_text(program->page, text);
}

// create and show window
static void create_window(struct Program *program)
{
	program->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
	gtk_window_set_default_size(GTK_WINDOW(program->window), 400, 600);
	g_signal_connect(program->window, "delete-event", G_CALLBACK(gtk_main_quit), NULL);

	program->drawing_area = gtk_drawing_area_new();
	g_signal_connect(program->drawing_area, "draw", G_CALLBACK(render_page), program);

	gtk_container_add(GTK_CONTAINER(program->window), program->drawing_area);

	gtk_widget_show_all(program->window);
}

int main(int argc, char **argv)
{
	if (argc != 3)
		return -1;

	gtk_init(&argc, &argv);

	struct Program program;

	create_window(&program);

	if (!open_page(&program, argv[1]))
		return -1;

	find_text(&program, argv[2]);

	gtk_widget_queue_draw(program.drawing_area);

	gtk_main();

	return 0;
}
_______________________________________________
poppler mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to