glib/demo/Makefile.am | 4 glib/demo/find.c | 282 ++++++++++++++++++++++++++++++++++++++++++++++ glib/demo/find.h | 31 +++++ glib/demo/main.c | 6 glib/demo/text.c | 175 ++++++++++++++++++++++++++++ glib/demo/text.h | 31 +++++ glib/poppler-page.cc | 148 ++++++++++++++++-------- glib/poppler-private.h | 4 poppler/CairoOutputDev.cc | 64 +++++++++- poppler/CairoOutputDev.h | 14 +- poppler/TextOutputDev.cc | 218 +++++++++++++++++++++-------------- poppler/TextOutputDev.h | 45 +++++-- qt4/src/poppler-page.cc | 2 13 files changed, 870 insertions(+), 154 deletions(-)
New commits: commit ba91b889c3b50239e339938f3c9d31fffcd87d44 Author: Carlos Garcia Campos <[email protected]> Date: Sat Dec 20 19:29:40 2008 +0100 Make destructor private in TextPage diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 2bb91c2..f2b7eab 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -466,9 +466,6 @@ public: // Constructor. TextPage(GBool rawOrderA); - // Destructor. - ~TextPage(); - void incRefCnt(); void decRefCnt(); @@ -563,7 +560,10 @@ public: #endif private: - + + // Destructor. + ~TextPage(); + void clear(); void assignColumns(TextLineFrag *frags, int nFrags, int rot); int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GooString *s); commit 0da16537aa83f6ed6d8895c7e54266263a71c1cf Author: Carlos Garcia Campos <[email protected]> Date: Fri Dec 19 19:08:21 2008 +0100 Refactor actual text code adding a new ActualText class It's used by both Text and Cairo ouput devices avoiding duplicated code in such classes. diff --git a/poppler/CairoOutputDev.cc b/poppler/CairoOutputDev.cc index 42bcaac..843b40b 100644 --- a/poppler/CairoOutputDev.cc +++ b/poppler/CairoOutputDev.cc @@ -138,7 +138,7 @@ CairoOutputDev::CairoOutputDev() { knockoutCount = 0; text = NULL; - actualTextBMCLevel = 0; + actualText = NULL; } CairoOutputDev::~CairoOutputDev() { @@ -157,7 +157,9 @@ CairoOutputDev::~CairoOutputDev() { if (shape) cairo_pattern_destroy (shape); if (text) - text->decRefCnt(); + text->decRefCnt(); + if (actualText) + delete actualText; } void CairoOutputDev::setCairo(cairo_t *cairo) @@ -185,11 +187,15 @@ void CairoOutputDev::setTextPage(TextPage *text) { if (this->text) this->text->decRefCnt(); + if (actualText) + delete actualText; if (text) { this->text = text; this->text->incRefCnt(); + actualText = new ActualText(text); } else { this->text = NULL; + actualText = NULL; } } @@ -608,28 +614,7 @@ void CairoOutputDev::drawChar(GfxState *state, double x, double y, if (!text) return; - - if (actualTextBMCLevel == 0) { - text->addChar(state, x, y, dx, dy, code, nBytes, u, uLen); - } else { - // Inside ActualText span. - if (newActualTextSpan) { - actualText_x = x; - actualText_y = y; - actualText_dx = dx; - actualText_dy = dy; - newActualTextSpan = gFalse; - } else { - if (x < actualText_x) - actualText_x = x; - if (y < actualText_y) - actualText_y = y; - if (x + dx > actualText_x + actualText_dx) - actualText_dx = x + dx - actualText_x; - if (y + dy > actualText_y + actualText_dy) - actualText_dy = y + dy - actualText_y; - } - } + actualText->addChar (state, x, y, dx, dy, code, nBytes, u, uLen); } void CairoOutputDev::endString(GfxState *state) @@ -774,81 +759,14 @@ void CairoOutputDev::endTextObject(GfxState *state) { void CairoOutputDev::beginMarkedContent(char *name, Dict *properties) { - Object obj; - - if (!text) - return; - - if (actualTextBMCLevel > 0) { - // Already inside a ActualText span. - actualTextBMCLevel++; - return; - } - - if (properties->lookup("ActualText", &obj)) { - if (obj.isString()) { - actualText = obj.getString(); - actualTextBMCLevel = 1; - newActualTextSpan = gTrue; - } - } + if (text) + actualText->beginMC(properties); } void CairoOutputDev::endMarkedContent(GfxState *state) { - char *uniString = NULL; - Unicode *uni; - int length, i; - - if (!text) - return; - - if (actualTextBMCLevel > 0) { - actualTextBMCLevel--; - if (actualTextBMCLevel == 0) { - // ActualText span closed. Output the span text and the - // extents of all the glyphs inside the span - - if (newActualTextSpan) { - // No content inside span. - actualText_x = state->getCurX(); - actualText_y = state->getCurY(); - actualText_dx = 0; - actualText_dy = 0; - } - - if (!actualText->hasUnicodeMarker()) { - if (actualText->getLength() > 0) { - //non-unicode string -- assume pdfDocEncoding and - //try to convert to UTF16BE - uniString = pdfDocEncodingToUTF16(actualText, &length); - } else { - length = 0; - } - } else { - uniString = actualText->getCString(); - length = actualText->getLength(); - } - - if (length < 3) - length = 0; - else - length = length/2 - 1; - uni = new Unicode[length]; - for (i = 0 ; i < length; i++) - uni[i] = (uniString[2 + i*2]<<8) + uniString[2 + i*2+1]; - - text->addChar(state, - actualText_x, actualText_y, - actualText_dx, actualText_dy, - 0, 1, uni, length); - - delete [] uni; - if (!actualText->hasUnicodeMarker()) - delete [] uniString; - delete actualText; - } - } + if (text) + actualText->endMC(state); } static inline int splashRound(SplashCoord x) { diff --git a/poppler/CairoOutputDev.h b/poppler/CairoOutputDev.h index 3273d74..1e410c1 100644 --- a/poppler/CairoOutputDev.h +++ b/poppler/CairoOutputDev.h @@ -258,12 +258,7 @@ protected: GBool prescaleImages; TextPage *text; // text for the current page - int actualTextBMCLevel; // > 0 when inside ActualText span. Incremented - // for each nested BMC inside the span. - GooString *actualText; // replacement text for the span - GBool newActualTextSpan; // true at start of span. used to init the extent - double actualText_x, actualText_y; // extent of the text inside the span - double actualText_dx, actualText_dy; + ActualText *actualText; cairo_pattern_t *group; cairo_pattern_t *shape; diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 1cf3319..816cc8c 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4494,6 +4494,118 @@ TextWordList *TextPage::makeWordList(GBool physLayout) { #endif //------------------------------------------------------------------------ +// ActualText +//------------------------------------------------------------------------ +ActualText::ActualText(TextPage *out) { + out->incRefCnt(); + text = out; + actualText = NULL; + actualTextBMCLevel = 0; +} + +ActualText::~ActualText() { + if (actualText) + delete actualText; + text->decRefCnt(); +} + +void ActualText::addChar(GfxState *state, double x, double y, + double dx, double dy, + CharCode c, int nBytes, Unicode *u, int uLen) { + if (actualTextBMCLevel == 0) { + text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); + } else { + // Inside ActualText span. + if (newActualTextSpan) { + actualText_x = x; + actualText_y = y; + actualText_dx = dx; + actualText_dy = dy; + newActualTextSpan = gFalse; + } else { + if (x < actualText_x) + actualText_x = x; + if (y < actualText_y) + actualText_y = y; + if (x + dx > actualText_x + actualText_dx) + actualText_dx = x + dx - actualText_x; + if (y + dy > actualText_y + actualText_dy) + actualText_dy = y + dy - actualText_y; + } + } +} + +void ActualText::beginMC(Dict *properties) { + if (actualTextBMCLevel > 0) { + // Already inside a ActualText span. + actualTextBMCLevel++; + return; + } + + Object obj; + if (properties->lookup("ActualText", &obj)) { + if (obj.isString()) { + actualText = obj.getString(); + actualTextBMCLevel = 1; + newActualTextSpan = gTrue; + } + } +} + +void ActualText::endMC(GfxState *state) { + char *uniString = NULL; + Unicode *uni; + int length, i; + + if (actualTextBMCLevel > 0) { + actualTextBMCLevel--; + if (actualTextBMCLevel == 0) { + // ActualText span closed. Output the span text and the + // extents of all the glyphs inside the span + + if (newActualTextSpan) { + // No content inside span. + actualText_x = state->getCurX(); + actualText_y = state->getCurY(); + actualText_dx = 0; + actualText_dy = 0; + } + + if (!actualText->hasUnicodeMarker()) { + if (actualText->getLength() > 0) { + //non-unicode string -- assume pdfDocEncoding and + //try to convert to UTF16BE + uniString = pdfDocEncodingToUTF16(actualText, &length); + } else { + length = 0; + } + } else { + uniString = actualText->getCString(); + length = actualText->getLength(); + } + + if (length < 3) + length = 0; + else + length = length/2 - 1; + uni = new Unicode[length]; + for (i = 0 ; i < length; i++) + uni[i] = (uniString[2 + i*2]<<8) + uniString[2 + i*2+1]; + + text->addChar(state, + actualText_x, actualText_y, + actualText_dx, actualText_dy, + 0, 1, uni, length); + + delete [] uni; + if (!actualText->hasUnicodeMarker()) + delete [] uniString; + delete actualText; + } + } +} + +//------------------------------------------------------------------------ // TextOutputDev //------------------------------------------------------------------------ @@ -4532,7 +4644,7 @@ TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA, // set up text object text = new TextPage(rawOrderA); - actualTextBMCLevel = 0; + actualText = new ActualText(text); } TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, @@ -4544,8 +4656,8 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, rawOrder = rawOrderA; doHTML = gFalse; text = new TextPage(rawOrderA); + actualText = new ActualText(text); ok = gTrue; - actualTextBMCLevel = 0; } TextOutputDev::~TextOutputDev() { @@ -4558,6 +4670,7 @@ TextOutputDev::~TextOutputDev() { if (text) { text->decRefCnt(); } + delete actualText; } void TextOutputDev::startPage(int pageNum, GfxState *state) { @@ -4586,100 +4699,17 @@ void TextOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode c, int nBytes, Unicode *u, int uLen) { - if (actualTextBMCLevel == 0) { - text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); - } else { - // Inside ActualText span. - if (newActualTextSpan) { - actualText_x = x; - actualText_y = y; - actualText_dx = dx; - actualText_dy = dy; - newActualTextSpan = gFalse; - } else { - if (x < actualText_x) - actualText_x = x; - if (y < actualText_y) - actualText_y = y; - if (x + dx > actualText_x + actualText_dx) - actualText_dx = x + dx - actualText_x; - if (y + dy > actualText_y + actualText_dy) - actualText_dy = y + dy - actualText_y; - } - } + actualText->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); } void TextOutputDev::beginMarkedContent(char *name, Dict *properties) { - Object obj; - - if (actualTextBMCLevel > 0) { - // Already inside a ActualText span. - actualTextBMCLevel++; - return; - } - - if (properties->lookup("ActualText", &obj)) { - if (obj.isString()) { - actualText = obj.getString(); - actualTextBMCLevel = 1; - newActualTextSpan = gTrue; - } - } + actualText->beginMC(properties); } void TextOutputDev::endMarkedContent(GfxState *state) { - char *uniString = NULL; - Unicode *uni; - int length, i; - - if (actualTextBMCLevel > 0) { - actualTextBMCLevel--; - if (actualTextBMCLevel == 0) { - // ActualText span closed. Output the span text and the - // extents of all the glyphs inside the span - - if (newActualTextSpan) { - // No content inside span. - actualText_x = state->getCurX(); - actualText_y = state->getCurY(); - actualText_dx = 0; - actualText_dy = 0; - } - - if (!actualText->hasUnicodeMarker()) { - if (actualText->getLength() > 0) { - //non-unicode string -- assume pdfDocEncoding and - //try to convert to UTF16BE - uniString = pdfDocEncodingToUTF16(actualText, &length); - } else { - length = 0; - } - } else { - uniString = actualText->getCString(); - length = actualText->getLength(); - } - - if (length < 3) - length = 0; - else - length = length/2 - 1; - uni = new Unicode[length]; - for (i = 0 ; i < length; i++) - uni[i] = (uniString[2 + i*2]<<8) + uniString[2 + i*2+1]; - - text->addChar(state, - actualText_x, actualText_y, - actualText_dx, actualText_dy, - 0, 1, uni, length); - - delete [] uni; - if (!actualText->hasUnicodeMarker()) - delete [] uniString; - delete actualText; - } - } + actualText->endMC(state); } void TextOutputDev::stroke(GfxState *state) { diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 2b019ff..2bb91c2 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -614,6 +614,33 @@ private: }; //------------------------------------------------------------------------ +// ActualText +//------------------------------------------------------------------------ + +class ActualText { +public: + // Create an ActualText + ActualText(TextPage *out); + ~ActualText(); + + void addChar(GfxState *state, double x, double y, + double dx, double dy, + CharCode c, int nBytes, Unicode *u, int uLen); + void beginMC(Dict *properties); + void endMC(GfxState *state); + +private: + TextPage *text; + int actualTextBMCLevel; // > 0 when inside ActualText span. Incremented + // for each nested BMC inside the span. + GooString *actualText; // replacement text for the span + GBool newActualTextSpan; // true at start of span. used to init the extent + double actualText_x, actualText_y; // extent of the text inside the span + double actualText_dx, actualText_dy; +}; + + +//------------------------------------------------------------------------ // TextOutputDev //------------------------------------------------------------------------ @@ -755,12 +782,7 @@ private: GBool doHTML; // extra processing for HTML conversion GBool ok; // set up ok? - int actualTextBMCLevel; // > 0 when inside ActualText span. Incremented - // for each nested BMC inside the span. - GooString *actualText; // replacement text for the span - GBool newActualTextSpan; // true at start of span. used to init the extent - double actualText_x, actualText_y; // extent of the text inside the span - double actualText_dx, actualText_dy; + ActualText *actualText; }; #endif commit 0f8ab301c633133eea3dbd4f2254f31c50e3c4a9 Author: Carlos Garcia Campos <[email protected]> Date: Sun Dec 14 13:12:34 2008 +0100 [glib-demo] Add find demo diff --git a/glib/demo/Makefile.am b/glib/demo/Makefile.am index 958be90..13bccb3 100644 --- a/glib/demo/Makefile.am +++ b/glib/demo/Makefile.am @@ -16,6 +16,8 @@ poppler_glib_demo_SOURCES = \ annots.c \ attachments.c \ attachments.h \ + find.h \ + find.c \ fonts.h \ fonts.c \ forms.h \ diff --git a/glib/demo/find.c b/glib/demo/find.c new file mode 100644 index 0000000..08e41e2 --- /dev/null +++ b/glib/demo/find.c @@ -0,0 +1,282 @@ +/* + * Copyright (C) 2008 Carlos Garcia Campos <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "find.h" + +enum { + TITLE_COLUMN, + X1_COLUMN, + Y1_COLUMN, + X2_COLUMN, + Y2_COLUMN, + + VISIBLE_COLUMN, + N_COLUMNS +}; + +typedef struct { + PopplerDocument *doc; + + GtkTreeModel *model; + GtkWidget *entry; + GtkWidget *progress; + + gint n_pages; + gint page_index; + + guint idle_id; +} PgdFindDemo; + +static void +pgd_find_free (PgdFindDemo *demo) +{ + if (!demo) + return; + + if (demo->idle_id > 0) { + g_source_remove (demo->idle_id); + demo->idle_id = 0; + } + + if (demo->doc) { + g_object_unref (demo->doc); + demo->doc = NULL; + } + + if (demo->model) { + g_object_unref (demo->model); + demo->model = NULL; + } + + g_free (demo); +} + +static void +pgd_find_update_progress (PgdFindDemo *demo, + gint scanned) +{ + gchar *str; + + str = g_strdup_printf ("Searching ... (%d%%)", + MIN (scanned * 100 / demo->n_pages, 100)); + gtk_progress_bar_set_text (GTK_PROGRESS_BAR (demo->progress), str); + gtk_progress_bar_set_fraction (GTK_PROGRESS_BAR (demo->progress), + MIN ((gdouble)scanned / demo->n_pages, 1.0)); + g_free (str); +} + +static gboolean +pgd_find_find_text (PgdFindDemo *demo) +{ + PopplerPage *page; + GList *matches; + GTimer *timer; + + page = poppler_document_get_page (demo->doc, demo->page_index); + if (!page) { + demo->page_index++; + return demo->page_index < demo->n_pages; + } + + timer = g_timer_new (); + matches = poppler_page_find_text (page, gtk_entry_get_text (GTK_ENTRY (demo->entry))); + g_timer_stop (timer); + if (matches) { + GtkTreeIter iter; + gchar *str; + GList *l; + gint n_match = 0; + + str = g_strdup_printf ("%d matches found on page %d in %.4f seconds", + g_list_length (matches), demo->page_index + 1, + g_timer_elapsed (timer, NULL)); + + gtk_tree_store_append (GTK_TREE_STORE (demo->model), &iter, NULL); + gtk_tree_store_set (GTK_TREE_STORE (demo->model), &iter, + TITLE_COLUMN, str, + VISIBLE_COLUMN, FALSE, + -1); + g_free (str); + + for (l = matches; l && l->data; l = g_list_next (l)) { + PopplerRectangle *rect = (PopplerRectangle *)l->data; + GtkTreeIter iter_child; + gchar *x1, *y1, *x2, *y2; + + str = g_strdup_printf ("Match %d", ++n_match); + x1 = g_strdup_printf ("%.2f", rect->x1); + y1 = g_strdup_printf ("%.2f", rect->y1); + x2 = g_strdup_printf ("%.2f", rect->x2); + y2 = g_strdup_printf ("%.2f", rect->y2); + + gtk_tree_store_append (GTK_TREE_STORE (demo->model), &iter_child, &iter); + gtk_tree_store_set (GTK_TREE_STORE (demo->model), &iter_child, + TITLE_COLUMN, str, + X1_COLUMN, x1, + Y1_COLUMN, y1, + X2_COLUMN, x2, + Y2_COLUMN, y2, + VISIBLE_COLUMN, TRUE, + -1); + g_free (str); + g_free (x1); + g_free (y1); + g_free (x2); + g_free (y2); + g_free (rect); + } + g_list_free (matches); + } + + g_timer_destroy (timer); + g_object_unref (page); + + demo->page_index++; + pgd_find_update_progress (demo, demo->page_index); + + return demo->page_index < demo->n_pages; +} + +static void +pgd_find_button_clicked (GtkButton *button, + PgdFindDemo *demo) +{ + gtk_tree_store_clear (GTK_TREE_STORE (demo->model)); + demo->page_index = 0; + pgd_find_update_progress (demo, demo->page_index); + if (demo->idle_id > 0) + g_source_remove (demo->idle_id); + demo->idle_id = g_idle_add ((GSourceFunc)pgd_find_find_text, demo); +} + +static void +pgd_find_button_sensitivity_cb (GtkWidget *button, + GtkEntry *entry) +{ + const gchar *text; + + text = gtk_entry_get_text (entry); + gtk_widget_set_sensitive (button, text != NULL && text[0] != '\0'); +} + +GtkWidget * +pgd_find_create_widget (PopplerDocument *document) +{ + PgdFindDemo *demo; + GtkWidget *vbox, *hbox; + GtkWidget *button; + GtkWidget *swindow; + GtkWidget *treeview; + GtkCellRenderer *renderer; + + demo = g_new0 (PgdFindDemo, 1); + + demo->doc = g_object_ref (document); + + demo->n_pages = poppler_document_get_n_pages (document); + + vbox = gtk_vbox_new (FALSE, 12); + + hbox = gtk_hbox_new (FALSE, 6); + + demo->entry = gtk_entry_new (); + gtk_box_pack_start (GTK_BOX (hbox), demo->entry, FALSE, TRUE, 0); + gtk_widget_show (demo->entry); + + demo->progress = gtk_progress_bar_new (); + gtk_progress_bar_set_ellipsize (GTK_PROGRESS_BAR (demo->progress), + PANGO_ELLIPSIZE_END); + gtk_box_pack_start (GTK_BOX (hbox), demo->progress, TRUE, TRUE, 0); + gtk_widget_show (demo->progress); + + button = gtk_button_new_with_label ("Find"); + gtk_widget_set_sensitive (button, FALSE); + g_signal_connect (G_OBJECT (button), "clicked", + G_CALLBACK (pgd_find_button_clicked), + (gpointer)demo); + g_signal_connect_swapped (G_OBJECT (demo->entry), "changed", + G_CALLBACK (pgd_find_button_sensitivity_cb), + (gpointer)button); + gtk_box_pack_end (GTK_BOX (hbox), button, FALSE, FALSE, 0); + gtk_widget_show (button); + + gtk_box_pack_start (GTK_BOX (vbox), hbox, FALSE, TRUE, 6); + gtk_widget_show (hbox); + + swindow = gtk_scrolled_window_new (NULL, NULL); + gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), + GTK_POLICY_AUTOMATIC, GTK_POLICY_AUTOMATIC); + + demo->model = GTK_TREE_MODEL ( + gtk_tree_store_new (N_COLUMNS, + G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_STRING, G_TYPE_STRING, + G_TYPE_BOOLEAN)); + treeview = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->model)); + gtk_tree_view_set_rules_hint (GTK_TREE_VIEW (treeview), TRUE); + gtk_tree_selection_set_mode (gtk_tree_view_get_selection (GTK_TREE_VIEW (treeview)), + GTK_SELECTION_NONE); + + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + TITLE_COLUMN, "Matches", + renderer, + "text", TITLE_COLUMN, + NULL); + + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + X1_COLUMN, "X1", + renderer, + "text", X1_COLUMN, + "visible", VISIBLE_COLUMN, + NULL); + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + Y1_COLUMN, "Y1", + renderer, + "text", Y1_COLUMN, + "visible", VISIBLE_COLUMN, + NULL); + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + X2_COLUMN, "X2", + renderer, + "text", X2_COLUMN, + "visible", VISIBLE_COLUMN, + NULL); + renderer = gtk_cell_renderer_text_new (); + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview), + Y2_COLUMN, "Y2", + renderer, + "text", Y2_COLUMN, + "visible", VISIBLE_COLUMN, + NULL); + gtk_container_add (GTK_CONTAINER (swindow), treeview); + gtk_widget_show (treeview); + + gtk_box_pack_start (GTK_BOX (vbox), swindow, TRUE, TRUE, 0); + gtk_widget_show (swindow); + + g_object_weak_ref (G_OBJECT (vbox), + (GWeakNotify)pgd_find_free, + (gpointer)demo); + + return vbox; +} diff --git a/glib/demo/find.h b/glib/demo/find.h new file mode 100644 index 0000000..36f95c1 --- /dev/null +++ b/glib/demo/find.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2008 Carlos Garcia Campos <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <gtk/gtk.h> +#include <poppler.h> + +#ifndef _FIND_H_ +#define _FIND_H_ + +G_BEGIN_DECLS + +GtkWidget *pgd_find_create_widget (PopplerDocument *document); + +G_END_DECLS + +#endif /* _FIND_H_ */ diff --git a/glib/demo/main.c b/glib/demo/main.c index b272114..cf5cb75 100644 --- a/glib/demo/main.c +++ b/glib/demo/main.c @@ -33,6 +33,7 @@ #include "attachments.h" #include "layers.h" #include "text.h" +#include "find.h" enum { PGD_TITLE_COLUMN, @@ -59,7 +60,8 @@ static const PopplerGlibDemo demo_list[] = { { "Annots", pgd_annots_create_widget }, { "Attachments", pgd_attachments_create_widget }, { "Layers", pgd_layers_create_widget }, - { "Text", pgd_text_create_widget } + { "Text", pgd_text_create_widget }, + { "Find", pgd_find_create_widget } }; static void commit 88df9e9aa9adb53f0a9714ea404d46f111495df3 Author: Carlos Garcia Campos <[email protected]> Date: Sun Dec 14 11:54:35 2008 +0100 [glib-demo] Add Text demo diff --git a/glib/demo/Makefile.am b/glib/demo/Makefile.am index 781c974..958be90 100644 --- a/glib/demo/Makefile.am +++ b/glib/demo/Makefile.am @@ -34,6 +34,8 @@ poppler_glib_demo_SOURCES = \ page.c \ render.h \ render.c \ + text.h \ + text.c \ transitions.h \ transitions.c \ utils.h \ diff --git a/glib/demo/main.c b/glib/demo/main.c index ebbb595..b272114 100644 --- a/glib/demo/main.c +++ b/glib/demo/main.c @@ -32,6 +32,7 @@ #include "annots.h" #include "attachments.h" #include "layers.h" +#include "text.h" enum { PGD_TITLE_COLUMN, @@ -57,7 +58,8 @@ static const PopplerGlibDemo demo_list[] = { { "Images", pgd_images_create_widget }, { "Annots", pgd_annots_create_widget }, { "Attachments", pgd_attachments_create_widget }, - { "Layers", pgd_layers_create_widget } + { "Layers", pgd_layers_create_widget }, + { "Text", pgd_text_create_widget } }; static void diff --git a/glib/demo/text.c b/glib/demo/text.c new file mode 100644 index 0000000..e119082 --- /dev/null +++ b/glib/demo/text.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2008 Carlos Garcia Campos <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <string.h> + +#include "text.h" + +typedef struct { + PopplerDocument *doc; + + GtkWidget *timer_label; + GtkTextBuffer *buffer; + + gint page; +} PgdTextDemo; + +static void +pgd_text_free (PgdTextDemo *demo) +{ + if (!demo) + return; + + if (demo->doc) { + g_object_unref (demo->doc); + demo->doc = NULL; + } + + if (demo->buffer) { + g_object_unref (demo->buffer); + demo->buffer = NULL; + } + + g_free (demo); +} + +static void +pgd_text_get_text (GtkWidget *button, + PgdTextDemo *demo) +{ + PopplerPage *page; + PopplerRectangle rect; + gdouble width, height; + gchar *text; + GTimer *timer; + + page = poppler_document_get_page (demo->doc, demo->page); + if (!page) + return; + + poppler_page_get_size (page, &width, &height); + rect.x1 = rect.y1 = 0; + rect.x2 = width; + rect.y2 = height; + + timer = g_timer_new (); + text = poppler_page_get_text (page, POPPLER_SELECTION_GLYPH, &rect); + g_timer_stop (timer); + + if (text) { + gchar *str; + + str = g_strdup_printf ("<i>got text in %.4f seconds</i>", + g_timer_elapsed (timer, NULL)); + gtk_label_set_markup (GTK_LABEL (demo->timer_label), str); + g_free (str); + } else { + gtk_label_set_markup (GTK_LABEL (demo->timer_label), "<i>No text found</i>"); + } + + g_timer_destroy (timer); + g_object_unref (page); + + if (text) { + gtk_text_buffer_set_text (demo->buffer, text, strlen (text)); + g_free (text); + } +} + +static void +pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton, + PgdTextDemo *demo) +{ + demo->page = (gint)gtk_spin_button_get_value (spinbutton) - 1; +} + +GtkWidget * +pgd_text_create_widget (PopplerDocument *document) +{ + PgdTextDemo *demo; + GtkWidget *label; + GtkWidget *vbox; + GtkWidget *hbox, *page_selector; + GtkWidget *button; + GtkWidget *swindow, *textview; + gchar *str; + gint n_pages; + + demo = g_new0 (PgdTextDemo, 1); + + demo->doc = g_object_ref (document); + + n_pages = poppler_document_get_n_pages (document); + + vbox = gtk_vbox_new (FALSE, 12); + + hbox = gtk_hbox_new (FALSE, 6); + + label = gtk_label_new ("Page:"); + gtk_box_pack_start (GTK_BOX (hbox), label, FALSE, TRUE, 0); + gtk_widget_show (label); + + page_selector = gtk_spin_button_new_with_range (1, n_pages, 1); + g_signal_connect (G_OBJECT (page_selector), "value-changed", + G_CALLBACK (pgd_text_page_selector_value_changed), + (gpointer)demo); + gtk_box_pack_start (GTK_BOX (hbox), page_selector, FALSE, TRUE, 0); + gtk_widget_show (page_selector); + + str = g_strdup_printf ("of %d", n_pages); + label = gtk_label_new (str); + gtk_box_pack_start (GTK_BOX (hbox), label, FALSE, TRUE, 0); + gtk_widget_show (label); + g_free (str); + + button = gtk_button_new_with_label ("Get Text"); + g_signal_connect (G_OBJECT (button), "clicked", + G_CALLBACK (pgd_text_get_text), + (gpointer)demo); + gtk_box_pack_end (GTK_BOX (hbox), button, FALSE, FALSE, 0); + gtk_widget_show (button); + + gtk_box_pack_start (GTK_BOX (vbox), hbox, FALSE, TRUE, 0); + gtk_widget_show (hbox); + + demo->timer_label = gtk_label_new (NULL); + gtk_label_set_markup (GTK_LABEL (demo->timer_label), "<i>No text found</i>"); + g_object_set (G_OBJECT (demo->timer_label), "xalign", 1.0, NULL); + gtk_box_pack_start (GTK_BOX (vbox), demo->timer_label, FALSE, TRUE, 0); + gtk_widget_show (demo->timer_label); + + swindow = gtk_scrolled_window_new (NULL, NULL); + gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + demo->buffer = gtk_text_buffer_new (NULL); + textview = gtk_text_view_new_with_buffer (demo->buffer); + + gtk_container_add (GTK_CONTAINER (swindow), textview); + gtk_widget_show (textview); + + gtk_box_pack_start (GTK_BOX (vbox), swindow, TRUE, TRUE, 0); + gtk_widget_show (swindow); + + g_object_weak_ref (G_OBJECT (vbox), + (GWeakNotify)pgd_text_free, + demo); + + return vbox; +} diff --git a/glib/demo/text.h b/glib/demo/text.h new file mode 100644 index 0000000..87a1143 --- /dev/null +++ b/glib/demo/text.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2008 Carlos Garcia Campos <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <gtk/gtk.h> +#include <poppler.h> + +#ifndef _TEXT_H_ +#define _TEXT_H_ + +G_BEGIN_DECLS + +GtkWidget *pgd_text_create_widget (PopplerDocument *document); + +G_END_DECLS + +#endif /* _TEXT_H_ */ commit 5b0f2355d55a5104820fd0bf16b4e76b25959de4 Author: Carlos Garcia Campos <[email protected]> Date: Sun Dec 14 11:49:00 2008 +0100 [glib] Use TextPage instead of TextOutputDev when cairo is enabled diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 405e0e6..29d124d 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -71,10 +71,15 @@ poppler_page_finalize (GObject *object) if (page->annots != NULL) delete page->annots; +#if defined (HAVE_CAIRO) + if (page->text != NULL) + page->text->decRefCnt(); +#else if (page->gfx != NULL) - delete page->gfx; + delete page->gfx; if (page->text_dev != NULL) delete page->text_dev; +#endif /* page->page is owned by the document */ } @@ -230,6 +235,7 @@ poppler_page_get_transition (PopplerPage *page) return transition; } +#if !defined (HAVE_CAIRO) static TextOutputDev * poppler_page_get_text_output_dev (PopplerPage *page) { @@ -254,9 +260,28 @@ poppler_page_get_text_output_dev (PopplerPage *page) return page->text_dev; } +#endif /* !defined (HAVE_CAIRO) */ #if defined (HAVE_CAIRO) +static TextPage * +poppler_page_get_text_page (PopplerPage *page) +{ + if (page->text == NULL) { + cairo_t *cr; + cairo_surface_t *surface; + + surface = cairo_image_surface_create (CAIRO_FORMAT_RGB24, 1, 1); + cr = cairo_create (surface); + poppler_page_render (page, cr); + cairo_destroy (cr); + cairo_surface_destroy (surface); + + } + + return page->text; +} + #ifdef POPPLER_WITH_GDK typedef struct { unsigned char *cairo_data; @@ -515,6 +540,9 @@ _poppler_page_render (PopplerPage *page, output_dev->setCairo (cairo); output_dev->setPrinting (printing); + if (!printing) + output_dev->setTextPage (page->text); + /* NOTE: instead of passing -1 we should/could use cairo_clip_extents() * to get a bounding box */ cairo_save (cairo); @@ -530,7 +558,8 @@ _poppler_page_render (PopplerPage *page, printing ? poppler_print_annot_cb : NULL, NULL); cairo_restore (cairo); - output_dev->setCairo (NULL); + output_dev->setCairo (NULL); + output_dev->setTextPage (NULL); } /** @@ -549,6 +578,9 @@ poppler_page_render (PopplerPage *page, { g_return_if_fail (POPPLER_IS_PAGE (page)); + if (!page->text) + page->text = new TextPage(gFalse); + _poppler_page_render (page, cairo, gFalse); } @@ -670,8 +702,8 @@ poppler_page_render_selection (PopplerPage *page, PopplerColor *glyph_color, PopplerColor *background_color) { - TextOutputDev *text_dev; CairoOutputDev *output_dev; + TextPage *text; SelectionStyle selection_style = selectionStyleGlyph; PDFRectangle pdf_selection(selection->x1, selection->y1, selection->x2, selection->y2); @@ -704,24 +736,15 @@ poppler_page_render_selection (PopplerPage *page, break; } - text_dev = poppler_page_get_text_output_dev (page); output_dev = page->document->output_dev; output_dev->setCairo (cairo); - text_dev->drawSelection (output_dev, 1.0, 0, - &pdf_selection, selection_style, - &gfx_glyph_color, &gfx_background_color); + text = poppler_page_get_text_page (page); + text->drawSelection (output_dev, 1.0, 0, + &pdf_selection, selection_style, + &gfx_glyph_color, &gfx_background_color); output_dev->setCairo (NULL); - - /* We'll need a function to destroy page->text_dev and page->gfx - * when the application wants to get rid of them. - * - * Two improvements: 1) make GfxFont refcounted and let TextPage and - * friends hold a reference to the GfxFonts they need so we can free - * up Gfx early. 2) use a TextPage directly when rendering the page - * so we don't have to use TextOutputDev and render a second - * time. */ } #endif /* HAVE_CAIRO */ @@ -886,7 +909,6 @@ poppler_page_render_selection_to_pixbuf (PopplerPage *page, GdkColor *glyph_color, GdkColor *background_color) { - TextOutputDev *text_dev; OutputDev *output_dev; OutputDevData data; SelectionStyle selection_style = selectionStyleGlyph; @@ -921,19 +943,24 @@ poppler_page_render_selection_to_pixbuf (PopplerPage *page, break; } - text_dev = poppler_page_get_text_output_dev (page); output_dev = page->document->output_dev; poppler_page_prepare_output_dev (page, scale, rotation, TRUE, &data); - text_dev->drawSelection (output_dev, scale, rotation, +#if defined (HAVE_CAIRO) + TextPage *text; + + text = poppler_page_get_text_page (page); + text->drawSelection (output_dev, scale, rotation, &pdf_selection, selection_style, &gfx_glyph_color, &gfx_background_color); +#else + TextOutputDev *text_dev; - poppler_page_copy_to_pixbuf (page, pixbuf, &data); - - poppler_page_set_selection_alpha (page, scale, pixbuf, style, selection); - + text_dev = poppler_page_get_text_output_dev (page); + text_dev->drawSelection (output_dev, scale, rotation, + &pdf_selection, selection_style, + &gfx_glyph_color, &gfx_background_color); /* We'll need a function to destroy page->text_dev and page->gfx * when the application wants to get rid of them. * @@ -942,6 +969,11 @@ poppler_page_render_selection_to_pixbuf (PopplerPage *page, * up Gfx early. 2) use a TextPage directly when rendering the page * so we don't have to use TextOutputDev and render a second * time. */ +#endif + + poppler_page_copy_to_pixbuf (page, pixbuf, &data); + + poppler_page_set_selection_alpha (page, scale, pixbuf, style, selection); } #endif /* POPPLER_WITH_GDK */ @@ -1013,7 +1045,6 @@ poppler_page_get_selection_region (PopplerPage *page, PopplerSelectionStyle style, PopplerRectangle *selection) { - TextOutputDev *text_dev; PDFRectangle poppler_selection; SelectionStyle selection_style = selectionStyleGlyph; GooList *list; @@ -1037,11 +1068,21 @@ poppler_page_get_selection_region (PopplerPage *page, selection_style = selectionStyleLine; break; } - + +#if defined (HAVE_CAIRO) + TextPage *text; + + text = poppler_page_get_text_page (page); + list = text->getSelectionRegion(&poppler_selection, + selection_style, scale); +#else + TextOutputDev *text_dev; + text_dev = poppler_page_get_text_output_dev (page); - list = text_dev->getSelectionRegion(&poppler_selection, + list = text_dev->getSelectionRegion(&poppler_selection, selection_style, scale); - +#endif + for (i = 0; i < list->getLength(); i++) { PDFRectangle *selection_rect = (PDFRectangle *) list->get(i); PopplerRectangle *rect; @@ -1089,7 +1130,6 @@ poppler_page_get_text (PopplerPage *page, PopplerSelectionStyle style, PopplerRectangle *selection) { - TextOutputDev *text_dev; GooString *sel_text; double height; char *result; @@ -1099,9 +1139,7 @@ poppler_page_get_text (PopplerPage *page, g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE); g_return_val_if_fail (selection != NULL, NULL); - text_dev = poppler_page_get_text_output_dev (page); poppler_page_get_size (page, NULL, &height); - pdf_selection.x1 = selection->x1; pdf_selection.y1 = height - selection->y2; pdf_selection.x2 = selection->x2; @@ -1120,7 +1158,18 @@ poppler_page_get_text (PopplerPage *page, break; } +#if defined (HAVE_CAIRO) + TextPage *text; + + text = poppler_page_get_text_page (page); + sel_text = text->getSelectionText (&pdf_selection, selection_style); +#else + TextOutputDev *text_dev; + + text_dev = poppler_page_get_text_output_dev (page); sel_text = text_dev->getSelectionText (&pdf_selection, selection_style); +#endif + result = g_strdup (sel_text->getCString ()); delete sel_text; @@ -1142,35 +1191,41 @@ poppler_page_find_text (PopplerPage *page, const char *text) { PopplerRectangle *match; - TextOutputDev *output_dev; - PDFDoc *doc; GList *matches; double xMin, yMin, xMax, yMax; gunichar *ucs4; glong ucs4_len; double height; - +#if defined (HAVE_CAIRO) + TextPage *text_dev; +#else + TextOutputDev *text_dev; +#endif + g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE); g_return_val_if_fail (text != NULL, FALSE); - ucs4 = g_utf8_to_ucs4_fast (text, -1, &ucs4_len); - - output_dev = new TextOutputDev (NULL, gTrue, gFalse, gFalse); - doc = page->document->doc; +#if defined (HAVE_CAIRO) + text_dev = poppler_page_get_text_page (page); +#else + text_dev = new TextOutputDev (NULL, gTrue, gFalse, gFalse); + page->page->display (text_dev, 72, 72, 0, + gFalse, gTrue, gFalse, + page->document->doc->getCatalog()); +#endif + ucs4 = g_utf8_to_ucs4_fast (text, -1, &ucs4_len); poppler_page_get_size (page, NULL, &height); - page->page->display (output_dev, 72, 72, 0, gFalse, - gTrue, gFalse, doc->getCatalog()); matches = NULL; xMin = 0; yMin = 0; - while (output_dev->findText (ucs4, ucs4_len, - gFalse, gTrue, // startAtTop, stopAtBottom - gTrue, gFalse, // startAtLast, stopAtLast - gFalse, gFalse, // caseSensitive, backwards - &xMin, &yMin, &xMax, &yMax)) + while (text_dev->findText (ucs4, ucs4_len, + gFalse, gTrue, // startAtTop, stopAtBottom + gTrue, gFalse, // startAtLast, stopAtLast + gFalse, gFalse, // caseSensitive, backwards + &xMin, &yMin, &xMax, &yMax)) { match = g_new (PopplerRectangle, 1); match->x1 = xMin; @@ -1180,7 +1235,10 @@ poppler_page_find_text (PopplerPage *page, matches = g_list_prepend (matches, match); } - delete output_dev; +#if !defined (HAVE_CAIRO) + delete text_dev; +#endif + g_free (ucs4); return g_list_reverse (matches); diff --git a/glib/poppler-private.h b/glib/poppler-private.h index 663b8e8..c4380ea 100644 --- a/glib/poppler-private.h +++ b/glib/poppler-private.h @@ -59,8 +59,12 @@ struct _PopplerPage PopplerDocument *document; Page *page; int index; +#if defined (HAVE_CAIRO) + TextPage *text; +#else TextOutputDev *text_dev; Gfx *gfx; +#endif Annots *annots; }; commit 3ced71fb68d62308db7b9535367eafefb55d1cde Author: Carlos Garcia Campos <[email protected]> Date: Sun Dec 14 11:18:00 2008 +0100 Add optionally text support to CairoOutputDev If a TextPage is set, it'll be used when rendering so that we don't need to use TextOutputDev and render again. diff --git a/poppler/CairoOutputDev.cc b/poppler/CairoOutputDev.cc index 8ac2201..42bcaac 100644 --- a/poppler/CairoOutputDev.cc +++ b/poppler/CairoOutputDev.cc @@ -49,6 +49,7 @@ #include "Link.h" #include "CharCodeToUnicode.h" #include "FontEncodingTables.h" +#include "PDFDocEncoding.h" #include <fofi/FoFiTrueType.h> #include <splash/SplashBitmap.h> #include "CairoOutputDev.h" @@ -135,6 +136,9 @@ CairoOutputDev::CairoOutputDev() { shape = NULL; cairo_shape = NULL; knockoutCount = 0; + + text = NULL; + actualTextBMCLevel = 0; } CairoOutputDev::~CairoOutputDev() { @@ -152,6 +156,8 @@ CairoOutputDev::~CairoOutputDev() { cairo_pattern_destroy (mask); if (shape) cairo_pattern_destroy (shape); + if (text) + text->decRefCnt(); } void CairoOutputDev::setCairo(cairo_t *cairo) @@ -175,6 +181,18 @@ void CairoOutputDev::setCairo(cairo_t *cairo) } } +void CairoOutputDev::setTextPage(TextPage *text) +{ + if (this->text) + this->text->decRefCnt(); + if (text) { + this->text = text; + this->text->incRefCnt(); + } else { + this->text = NULL; + } +} + void CairoOutputDev::startDoc(XRef *xrefA, Catalog *catalogA, CairoFontEngine *parentFontEngine) { xref = xrefA; @@ -197,6 +215,16 @@ void CairoOutputDev::startPage(int pageNum, GfxState *state) { cairo_pattern_destroy(stroke_pattern); stroke_pattern = cairo_pattern_create_rgb(0., 0., 0.); + + if (text) + text->startPage(state); +} + +void CairoOutputDev::endPage() { + if (text) { + text->endPage(); + text->coalesce(gTrue, gFalse); + } } void CairoOutputDev::drawLink(Link *link, Catalog *catalog) { @@ -416,6 +444,10 @@ void CairoOutputDev::updateFont(GfxState *state) { needFontUpdate = gFalse; + //FIXME: use cairo font engine? + if (text) + text->updateFont(state); + currentFont = fontEngine->getFont (state->getFont(), xref, catalog, printing); if (!currentFont) @@ -567,13 +599,37 @@ void CairoOutputDev::drawChar(GfxState *state, double x, double y, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen) { - if (!currentFont) + if (currentFont) { + glyphs[glyphCount].index = currentFont->getGlyph (code, u, uLen); + glyphs[glyphCount].x = x - originX; + glyphs[glyphCount].y = y - originY; + glyphCount++; + } + + if (!text) return; - glyphs[glyphCount].index = currentFont->getGlyph (code, u, uLen); - glyphs[glyphCount].x = x - originX; - glyphs[glyphCount].y = y - originY; - glyphCount++; + if (actualTextBMCLevel == 0) { + text->addChar(state, x, y, dx, dy, code, nBytes, u, uLen); + } else { + // Inside ActualText span. + if (newActualTextSpan) { + actualText_x = x; + actualText_y = y; + actualText_dx = dx; + actualText_dy = dy; + newActualTextSpan = gFalse; + } else { + if (x < actualText_x) + actualText_x = x; + if (y < actualText_y) + actualText_y = y; + if (x + dx > actualText_x + actualText_dx) + actualText_dx = x + dx - actualText_x; + if (y + dy > actualText_y + actualText_dy) + actualText_dy = y + dy - actualText_y; + } + } } void CairoOutputDev::endString(GfxState *state) @@ -714,7 +770,85 @@ void CairoOutputDev::endTextObject(GfxState *state) { cairo_path_destroy (textClipPath); textClipPath = NULL; } +} + +void CairoOutputDev::beginMarkedContent(char *name, Dict *properties) +{ + Object obj; + + if (!text) + return; + + if (actualTextBMCLevel > 0) { + // Already inside a ActualText span. + actualTextBMCLevel++; + return; + } + + if (properties->lookup("ActualText", &obj)) { + if (obj.isString()) { + actualText = obj.getString(); + actualTextBMCLevel = 1; + newActualTextSpan = gTrue; + } + } +} + +void CairoOutputDev::endMarkedContent(GfxState *state) +{ + char *uniString = NULL; + Unicode *uni; + int length, i; + + if (!text) + return; + + if (actualTextBMCLevel > 0) { + actualTextBMCLevel--; + if (actualTextBMCLevel == 0) { + // ActualText span closed. Output the span text and the + // extents of all the glyphs inside the span + + if (newActualTextSpan) { + // No content inside span. + actualText_x = state->getCurX(); + actualText_y = state->getCurY(); + actualText_dx = 0; + actualText_dy = 0; + } + if (!actualText->hasUnicodeMarker()) { + if (actualText->getLength() > 0) { + //non-unicode string -- assume pdfDocEncoding and + //try to convert to UTF16BE + uniString = pdfDocEncodingToUTF16(actualText, &length); + } else { + length = 0; + } + } else { + uniString = actualText->getCString(); + length = actualText->getLength(); + } + + if (length < 3) + length = 0; + else + length = length/2 - 1; + uni = new Unicode[length]; + for (i = 0 ; i < length; i++) + uni[i] = (uniString[2 + i*2]<<8) + uniString[2 + i*2+1]; + + text->addChar(state, + actualText_x, actualText_y, + actualText_dx, actualText_dy, + 0, 1, uni, length); + + delete [] uni; + if (!actualText->hasUnicodeMarker()) + delete [] uniString; + delete actualText; + } + } } static inline int splashRound(SplashCoord x) { diff --git a/poppler/CairoOutputDev.h b/poppler/CairoOutputDev.h index 46be9dc..3273d74 100644 --- a/poppler/CairoOutputDev.h +++ b/poppler/CairoOutputDev.h @@ -36,6 +36,7 @@ #include "goo/gtypes.h" #include <cairo-ft.h> #include "OutputDev.h" +#include "TextOutputDev.h" #include "GfxState.h" class GfxState; @@ -107,7 +108,7 @@ public: virtual void startPage(int pageNum, GfxState *state); // End a page. - virtual void endPage() { } + virtual void endPage(); //----- link borders virtual void drawLink(Link *link, Catalog *catalog); @@ -158,6 +159,10 @@ public: virtual void endType3Char(GfxState *state); virtual void endTextObject(GfxState *state); + //----- grouping operators + virtual void beginMarkedContent(char *name, Dict *properties); + virtual void endMarkedContent(GfxState *state); + //----- image drawing virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, @@ -186,7 +191,6 @@ public: int maskWidth, int maskHeight, GBool maskInvert); - //----- transparency groups and soft masks virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/, GfxColorSpace * /*blendingColorSpace*/, @@ -205,13 +209,14 @@ public: double llx, double lly, double urx, double ury); //----- special access - + // Called to indicate that a new PDF document has been loaded. void startDoc(XRef *xrefA, Catalog *catalogA, CairoFontEngine *fontEngine = NULL); GBool isReverseVideo() { return gFalse; } void setCairo (cairo_t *cr); + void setTextPage (TextPage *text); void setPrinting (GBool printing) { this->printing = printing; needFontUpdate = gTrue; } void setInType3Char(GBool inType3Char) { this->inType3Char = inType3Char; } @@ -252,6 +257,14 @@ protected: GBool prescaleImages; + TextPage *text; // text for the current page + int actualTextBMCLevel; // > 0 when inside ActualText span. Incremented + // for each nested BMC inside the span. + GooString *actualText; // replacement text for the span + GBool newActualTextSpan; // true at start of span. used to init the extent + double actualText_x, actualText_y; // extent of the text inside the span + double actualText_dx, actualText_dy; + cairo_pattern_t *group; cairo_pattern_t *shape; cairo_pattern_t *mask; commit 0bdad35cc4cfdb8da5acaf44678920b7a0025f99 Author: Carlos Garcia Campos <[email protected]> Date: Sun Dec 14 11:14:12 2008 +0100 Add refcount support to TextPage diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 9b49ad9..1cf3319 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -1772,6 +1772,7 @@ TextWord *TextWordList::get(int idx) { TextPage::TextPage(GBool rawOrderA) { int rot; + refCnt = 1; rawOrder = rawOrderA; curWord = NULL; charPos = 0; @@ -1810,6 +1811,15 @@ TextPage::~TextPage() { deleteGooList(links, TextLink); } +void TextPage::incRefCnt() { + refCnt++; +} + +void TextPage::decRefCnt() { + if (--refCnt == 0) + delete this; +} + void TextPage::startPage(GfxState *state) { clear(); if (state) { @@ -4546,7 +4556,7 @@ TextOutputDev::~TextOutputDev() { fclose((FILE *)outputStream); } if (text) { - delete text; + text->decRefCnt(); } } diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 3dc5f6f..2b019ff 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -469,6 +469,9 @@ public: // Destructor. ~TextPage(); + void incRefCnt(); + void decRefCnt(); + // Start a new page. void startPage(GfxState *state); @@ -599,6 +602,8 @@ private: GooList *underlines; // [TextUnderline] GooList *links; // [TextLink] + int refCnt; + friend class TextLine; friend class TextLineFrag; friend class TextBlock; diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc index e6d5ed9..50c733e 100644 --- a/qt4/src/poppler-page.cc +++ b/qt4/src/poppler-page.cc @@ -335,7 +335,7 @@ bool Page::search(const QString &text, QRectF &rect, SearchDirection direction, found = textPage->findText( u.data(), len, gTrue, gFalse, gFalse, gTrue, sCase, gFalse, &sLeft, &sTop, &sRight, &sBottom ); - delete textPage; + textPage->decRefCnt(); rect.setLeft( sLeft ); rect.setTop( sTop ); _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
