A Dijous, 26 d'agost de 2010, Paweł Wiejacha va escriure:
> On 26.08.2010 23:36, Albert Astals Cid wrote:
> > Do you really have a PDF with a Dictionary of 2000 entries?
>
> Sure. http://students.mimuw.edu.pl/~pw248348/opos/perf_tests.tar.gz
> perf_tests/pdfs/104418018297-AttenInSuspensionsIrregularlyShapedSedimentPar
> ticles.pdf page 6
Attached my suggestion as patch for a Dict sort for "big" Dicts (>= 32
entries) (tests show little or no speed increase on small dicts).
Comments?
Albert
>
> regards,
> Paweł Wiejacha.
> _______________________________________________
> poppler mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/poppler
diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index a899bca..8174c40 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -29,6 +29,7 @@
#pragma implementation
#endif
+#include <algorithm>
#include <stddef.h>
#include <string.h>
#include "goo/gmem.h"
@@ -40,11 +41,37 @@
// Dict
//------------------------------------------------------------------------
+static const int SORT_LENGTH_LOWER_LIMIT = 32;
+
+static bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2)
+{
+ return strcmp(e1.key, e2.key) < 0;
+}
+
+static int binarySearch(const char *key, DictEntry *entries, int length)
+{
+ int first = 0;
+ int end = length - 1;
+ while (first <= end) {
+ const int middle = (first + end) / 2;
+ const int res = strcmp(key, entries[middle].key);
+ if (res == 0) {
+ return middle;
+ } else if (res < 0) {
+ end = middle - 1;
+ } else {
+ first = middle + 1;
+ }
+ }
+ return -1;
+}
+
Dict::Dict(XRef *xrefA) {
xref = xrefA;
entries = NULL;
size = length = 0;
ref = 1;
+ sorted = gFalse;
}
Dict::Dict(Dict* dictA) {
@@ -52,6 +79,7 @@ Dict::Dict(Dict* dictA) {
size = length = dictA->length;
ref = 1;
+ sorted = dictA->sorted;
entries = (DictEntry *)gmallocn(size, sizeof(DictEntry));
for (int i=0; i<length; i++) {
entries[i].key = strdup(dictA->entries[i].key);
@@ -70,6 +98,12 @@ Dict::~Dict() {
}
void Dict::add(char *key, Object *val) {
+ if (sorted) {
+ // We use add on very few occasions so
+ // virtually this will never be hit
+ sorted = gFalse;
+ }
+
if (length == size) {
if (length == 0) {
size = 8;
@@ -84,16 +118,38 @@ void Dict::add(char *key, Object *val) {
}
inline DictEntry *Dict::find(char *key) {
+ if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT)
+ {
+ sorted = true;
+ std::sort(entries, entries+length, cmpDictEntries);
+ }
+
+ if (sorted) {
+ const int pos = binarySearch(key, entries, length);
+ if (pos != -1) {
+ return &entries[pos];
+ }
+ } else {
int i;
for (i = length - 1; i >=0; --i) {
if (!strcmp(key, entries[i].key))
return &entries[i];
}
+ }
return NULL;
}
void Dict::remove(char *key) {
+ if (sorted) {
+ const int pos = binarySearch(key, entries, length);
+ if (pos != -1) {
+ length -= 1;
+ if (pos != length) {
+ memmove(&entries[pos], &entries[pos + 1], (length - pos) * sizeof(DictEntry));
+ }
+ }
+ } else {
int i;
bool found = false;
DictEntry tmp;
@@ -112,6 +168,7 @@ void Dict::remove(char *key) {
if (i!=length) //don't copy the last entry if it is deleted
entries[i] = tmp;
}
+}
void Dict::set(char *key, Object *val) {
DictEntry *e;
diff --git a/poppler/Dict.h b/poppler/Dict.h
index a76bc89..dbb3893 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -89,6 +89,7 @@ public:
private:
+ GBool sorted;
XRef *xref; // the xref table for this PDF file
DictEntry *entries; // array of entries
int size; // size of <entries> array
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler