Also cleaned up the includes a bit...

Am 08.05.2018 um 08:01 schrieb Adam Reichold:
> Hello again again,
> 
> Am 08.05.2018 um 07:37 schrieb Adam Reichold:
>> Hello again,
>>
>> Am 08.05.2018 um 00:12 schrieb Albert Astals Cid:
>>> El dilluns, 7 de maig de 2018, a les 23:03:00 CEST, Adam Reichold va 
>>> escriure:
>>>> Hello,
>>>>
>>>> it is probably already too much purely technical code churn for this
>>>> release cycle, but the use of memmove got me thinking how an
>>>> implementation of Dict with more STL usage would look like.
>>>>
>>>> The result is attached, requesting comments on whether this is
>>>> considered useful or not.
>>>
>>> The remove implementation is wrong since it breaks sorting.
>>
>> Could you explain in more detail? My thinking was that any sublist of a
>> sorted list is also sorted and hence removing an element of a list
>> cannot break its sorting? (And since we do not increase the size by
>> removing an element, we cannot reach SORT_LENGTH_LOWER_LIMIT if we were
>> not there already.)
> 
> Sorry, that took a minute... Because of the swap... But actually, there
> already is std::vector::erase which does all the tricks like using
> memmove if it is statically safe, so it should actually be simpler.
> 
> Fixed version attached...
> 
>> Best regards,
>> Adam
>>
>>> Cheers,
>>>   Albert
>>>
>>>>
>>>> Best regards,
>>>> Adam
>>>>
>>>> Am 07.05.2018 um 19:14 schrieb Albert Astals Cid:
>>>>>  poppler/Array.cc |    2 +-
>>>>>  poppler/Dict.cc  |    2 +-
>>>>>  2 files changed, 2 insertions(+), 2 deletions(-)
>>>>>
>>>>> New commits:
>>>>> commit 07b8f838b3d8859a3ad34a3140bb24475bd6ac2c
>>>>> Author: Albert Astals Cid <[email protected]>
>>>>> Date:   Mon May 7 19:13:07 2018 +0200
>>>>>
>>>>>     cast to void * to bypass new gcc -Wclass-memaccess warning
>>>>>     
>>>>>     Yes what we're doing there is a bit nasty but it works :D
>>>>>
>>>>> diff --git a/poppler/Array.cc b/poppler/Array.cc
>>>>> index 3276349f..e8aa34ea 100644
>>>>> --- a/poppler/Array.cc
>>>>> +++ b/poppler/Array.cc
>>>>> @@ -112,7 +112,7 @@ void Array::remove(int i) {
>>>>>
>>>>>  #endif
>>>>>  
>>>>>    }
>>>>>    --length;
>>>>>
>>>>> -  memmove( elems + i, elems + i + 1, sizeof(elems[0]) * (length - i) );
>>>>> +  memmove( static_cast<void*>(elems + i), elems + i + 1, sizeof(elems[0])
>>>>> * (length - i) );> 
>>>>>  }
>>>>>  
>>>>>  Object Array::get(int i, int recursion) const {
>>>>>
>>>>> diff --git a/poppler/Dict.cc b/poppler/Dict.cc
>>>>> index a431f7eb..bc86fd77 100644
>>>>> --- a/poppler/Dict.cc
>>>>> +++ b/poppler/Dict.cc
>>>>> @@ -201,7 +201,7 @@ void Dict::remove(const char *key) {
>>>>>
>>>>>        gfree(entries[pos].key);
>>>>>        entries[pos].val.free();
>>>>>        if (pos != length) {
>>>>>
>>>>> -        memmove(&entries[pos], &entries[pos + 1], (length - pos) *
>>>>> sizeof(DictEntry)); +        memmove(static_cast<void*>(&entries[pos]),
>>>>> &entries[pos + 1], (length - pos) * sizeof(DictEntry));> 
>>>>>        }
>>>>>      
>>>>>      }
>>>>>    
>>>>>    } else {
>>>>>
>>>>> _______________________________________________
>>>>> poppler mailing list
>>>>> [email protected]
>>>>> https://lists.freedesktop.org/mailman/listinfo/poppler
>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> poppler mailing list
>>> [email protected]
>>> https://lists.freedesktop.org/mailman/listinfo/poppler
>>>
>>
>>
>>
>> _______________________________________________
>> poppler mailing list
>> [email protected]
>> https://lists.freedesktop.org/mailman/listinfo/poppler
>>
>>
>>
>> _______________________________________________
>> poppler mailing list
>> [email protected]
>> https://lists.freedesktop.org/mailman/listinfo/poppler
From 62b907bd20dda803b1cddf28eda8e07500a84ea0 Mon Sep 17 00:00:00 2001
From: Adam Reichold <[email protected]>
Date: Mon, 7 May 2018 22:58:43 +0200
Subject: [PATCH] Try to simplify Dict by implementing it in terms of
 std::vector and std::string.

---
 poppler/Catalog.cc |   2 +-
 poppler/Catalog.h  |   2 +-
 poppler/Dict.cc    | 240 ++++++++++++++++-------------------------------------
 poppler/Dict.h     |  46 +++++-----
 poppler/Form.cc    |   2 +-
 poppler/Object.h   |   4 +-
 6 files changed, 99 insertions(+), 197 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index d259bda6..eba626e6 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -428,7 +428,7 @@ int Catalog::numDests()
   return obj->dictGetLength();
 }
 
-char *Catalog::getDestsName(int i)
+const char *Catalog::getDestsName(int i)
 {
   Object *obj;
 
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 0467159a..2c18b4a2 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -163,7 +163,7 @@ public:
   int numDests();
 
   // Get the i'th named destination name in name-dict
-  char *getDestsName(int i);
+  const char *getDestsName(int i);
 
   // Get the i'th named destination link destination in name-dict
   LinkDest *getDestsDest(int i);
diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index bc86fd77..5cf2cb25 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -35,10 +35,7 @@
 #endif
 
 #include <algorithm>
-#include <stddef.h>
-#include <string.h>
-#include "goo/gmem.h"
-#include "Object.h"
+
 #include "XRef.h"
 #include "Dict.h"
 
@@ -51,247 +48,150 @@
 // Dict
 //------------------------------------------------------------------------
 
-static const int SORT_LENGTH_LOWER_LIMIT = 32;
-
-static inline bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2)
-{
-  return strcmp(e1.key, e2.key) < 0;
-}
+constexpr int SORT_LENGTH_LOWER_LIMIT = 32;
 
-static int binarySearch(const char *key, DictEntry *entries, int length)
-{
-  int first = 0;
-  int end = length - 1;
-  while (first <= end) {
-    const int middle = (first + end) / 2;
-    const int res = strcmp(key, entries[middle].key);
-    if (res == 0) {
-      return middle;
-    } else if (res < 0) {
-      end = middle - 1;
-    } else {
-      first = middle + 1;
-    }
+struct Dict::CmpDictEntry {
+  bool operator()(const DictEntry &lhs, const DictEntry &rhs) const {
+    return lhs.first < rhs.first;
   }
-  return -1;
-}
+  bool operator()(const DictEntry &lhs, const char *rhs) const {
+    return lhs.first < rhs;
+  }
+  bool operator()(const char *lhs, const DictEntry &rhs) const {
+    return lhs < rhs.first;
+  }
+};
 
 Dict::Dict(XRef *xrefA) {
   xref = xrefA;
-  entries = nullptr;
-  size = length = 0;
   ref = 1;
-  sorted = gFalse;
 #ifdef MULTITHREADED
   gInitMutex(&mutex);
 #endif
+
+  sorted = false;
 }
 
-Dict::Dict(Dict* dictA) {
+Dict::Dict(const Dict* dictA) {
   xref = dictA->xref;
-  size = length = dictA->length;
   ref = 1;
 #ifdef MULTITHREADED
   gInitMutex(&mutex);
 #endif
 
-  sorted = dictA->sorted;
-  entries = (DictEntry *)gmallocn(size, sizeof(DictEntry));
-  for (int i=0; i<length; i++) {
-    entries[i].key = copyString(dictA->entries[i].key);
-    entries[i].val.initNullAfterMalloc();
-    entries[i].val = dictA->entries[i].val.copy();
+  entries.reserve(dictA->entries.size());
+  for (const auto& entry : dictA->entries) {
+    entries.emplace_back(entry.first, entry.second.copy());
   }
+
+  sorted = dictA->sorted;
 }
 
-Dict *Dict::copy(XRef *xrefA) {
+Dict *Dict::copy(XRef *xrefA) const {
   dictLocker();
   Dict *dictA = new Dict(this);
   dictA->xref = xrefA;
-  for (int i=0; i<length; i++) {
-    if (dictA->entries[i].val.getType() == objDict) {
-       Dict *copy = dictA->entries[i].val.getDict()->copy(xrefA);
-       dictA->entries[i].val = Object(copy);
+  for (auto &entry : dictA->entries) {
+    if (entry.second.getType() == objDict) {
+      entry.second = Object(entry.second.getDict()->copy(xrefA));
     }
   }
   return dictA;
 }
 
 Dict::~Dict() {
-  int i;
-
-  for (i = 0; i < length; ++i) {
-    gfree(entries[i].key);
-    entries[i].val.free();
-  }
-  gfree(entries);
 #ifdef MULTITHREADED
   gDestroyMutex(&mutex);
 #endif
 }
 
-int Dict::incRef() {
+void Dict::add(const char *key, Object &&val) {
   dictLocker();
-  ++ref;
-  return ref;
-}
-
-int Dict::decRef() {
-  dictLocker();
-  --ref;
-  return ref;
-}
-
-void Dict::add(char *key, Object &&val) {
-  dictLocker();
-  if (sorted) {
-    // We use add on very few occasions so
-    // virtually this will never be hit
-    sorted = gFalse;
-  }
-
-  if (length == size) {
-    if (length == 0) {
-      size = 8;
-    } else {
-      size *= 2;
+  if (entries.size() >= SORT_LENGTH_LOWER_LIMIT) {
+    if (!sorted) {
+      std::sort(entries.begin(), entries.end(), CmpDictEntry{});
+      sorted = true;
     }
-    entries = (DictEntry *)greallocn(entries, size, sizeof(DictEntry));
+    const auto pos = std::upper_bound(entries.begin(), entries.end(), key, CmpDictEntry{});
+    entries.emplace(pos, key, std::move(val));
+  } else {
+    entries.emplace_back(key, std::move(val));
+    sorted = false;
   }
-  entries[length].key = key;
-  entries[length].val.initNullAfterMalloc();
-  entries[length].val = std::move(val);
-  ++length;
 }
 
-inline DictEntry *Dict::find(const char *key) const {
-  if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT)
-  {
-      dictLocker();
-      sorted = gTrue;
-      std::sort(entries, entries+length, cmpDictEntries);
-  }
-
+inline const Dict::DictEntry *Dict::find(const char *key) const {
   if (sorted) {
-    const int pos = binarySearch(key, entries, length);
-    if (pos != -1) {
-      return &entries[pos];
+    const auto pos = std::lower_bound(entries.begin(), entries.end(), key, CmpDictEntry{});
+    if (pos != entries.end() && pos->first == key) {
+      return &*pos;
     }
   } else {
-    int i;
-
-    for (i = length - 1; i >=0; --i) {
-      if (!strcmp(key, entries[i].key))
-        return &entries[i];
+    const auto pos = std::find_if(entries.rbegin(), entries.rend(), [key](const DictEntry& entry) {
+      return entry.first == key;
+    });
+    if (pos != entries.rend()) {
+      return &*pos;
     }
   }
   return nullptr;
 }
 
-GBool Dict::hasKey(const char *key) const {
-  return find(key) != nullptr;
+inline Dict::DictEntry *Dict::find(const char *key) {
+  return const_cast<DictEntry *>(const_cast<const Dict *>(this)->find(key));
 }
 
 void Dict::remove(const char *key) {
   dictLocker();
-  if (sorted) {
-    const int pos = binarySearch(key, entries, length);
-    if (pos != -1) {
-      length -= 1;
-      gfree(entries[pos].key);
-      entries[pos].val.free();
-      if (pos != length) {
-        memmove(static_cast<void*>(&entries[pos]), &entries[pos + 1], (length - pos) * sizeof(DictEntry));
-      }
-    }
-  } else {
-    int i; 
-    bool found = false;
-    if(length == 0) {
-      return;
-    }
-
-    for(i=0; i<length; i++) {
-      if (!strcmp(key, entries[i].key)) {
-        found = true;
-        break;
-      }
-    }
-    if(!found) {
-      return;
-    }
-    //replace the deleted entry with the last entry
-    gfree(entries[i].key);
-    entries[i].val.free();
-    length -= 1;
-    if (i!=length) {
-      //don't copy the last entry if it is deleted
-      entries[i].key = entries[length].key;
-      entries[i].val = std::move(entries[length].val);
-    }
+  if (auto *entry = find(key)) {
+    entries.erase(std::vector<DictEntry>::iterator{entry});
   }
 }
 
 void Dict::set(const char *key, Object &&val) {
-  DictEntry *e;
   if (val.isNull()) {
     remove(key);
     return;
   }
-  e = find (key);
-  if (e) {
-    dictLocker();
-    e->val = std::move(val);
+  dictLocker();
+  if (auto *entry = find(key)) {
+    entry->second = std::move(val);
   } else {
-    add (copyString(key), std::move(val));
+    add(key, std::move(val));
   }
 }
 
 
 GBool Dict::is(const char *type) const {
-  DictEntry *e;
-
-  return (e = find("Type")) && e->val.isName(type);
+  if (const auto *entry = find("Type")) {
+    return entry->second.isName(type);
+  }
+  return gFalse;
 }
 
 Object Dict::lookup(const char *key, int recursion) const {
-  DictEntry *e;
-
-  return (e = find(key)) ? e->val.fetch(xref, recursion) : Object(objNull);
+  if (const auto *entry = find(key)) {
+    return entry->second.fetch(xref, recursion);
+  }
+  return Object(objNull);
 }
 
 Object Dict::lookupNF(const char *key) const {
-  DictEntry *e;
-
-  return (e = find(key)) ? e->val.copy() : Object(objNull);
+  if (const auto *entry = find(key)) {
+    return entry->second.copy();
+  }
+  return Object(objNull);
 }
 
 GBool Dict::lookupInt(const char *key, const char *alt_key, int *value) const
 {
-  GBool success = gFalse;
-  Object obj1 = lookup ((char *) key);
-  if (obj1.isNull () && alt_key != nullptr) {
-    obj1.free ();
-    obj1 = lookup ((char *) alt_key);
+  auto obj1 = lookup(key);
+  if (obj1.isNull() && alt_key != nullptr) {
+    obj1 = lookup(alt_key);
   }
-  if (obj1.isInt ()) {
-    *value = obj1.getInt ();
-    success = gTrue;
+  if (obj1.isInt()) {
+    *value = obj1.getInt();
+    return gTrue;
   }
-
-  obj1.free ();
-
-  return success;
-}
-
-char *Dict::getKey(int i) const {
-  return entries[i].key;
-}
-
-Object Dict::getVal(int i) const {
-  return entries[i].val.fetch(xref);
-}
-
-Object Dict::getValNF(int i) const {
-  return entries[i].val.copy();
+  return gFalse;
 }
diff --git a/poppler/Dict.h b/poppler/Dict.h
index 9fd732a1..276d8c58 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -33,6 +33,11 @@
 #pragma interface
 #endif
 
+#include <atomic>
+#include <string>
+#include <vector>
+#include <utility>
+
 #include "poppler-config.h"
 #include "Object.h"
 #include "goo/GooMutex.h"
@@ -41,18 +46,13 @@
 // Dict
 //------------------------------------------------------------------------
 
-struct DictEntry {
-  char *key;
-  Object val;
-};
-
 class Dict {
 public:
 
   // Constructor.
   Dict(XRef *xrefA);
-  Dict(Dict* dictA);
-  Dict *copy(XRef *xrefA);
+  Dict(const Dict *dictA);
+  Dict *copy(XRef *xrefA) const;
 
   // Destructor.
   ~Dict();
@@ -61,11 +61,11 @@ public:
   Dict& operator=(const Dict &) = delete;
 
   // Get number of entries.
-  int getLength() const { return length; }
+  int getLength() const { return static_cast<int>(entries.size()); }
 
-  // Add an entry.  NB: does not copy key.
+  // Add an entry.
   // val becomes a dead object after the call
-  void add(char *key, Object &&val);
+  void add(const char *key, Object &&val);
 
   // Update the value of an existing entry, otherwise create it
   // val becomes a dead object after the call
@@ -83,9 +83,9 @@ public:
   GBool lookupInt(const char *key, const char *alt_key, int *value) const;
 
   // Iterative accessors.
-  char *getKey(int i) const;
-  Object getVal(int i) const;
-  Object getValNF(int i) const;
+  const char *getKey(int i) const { return entries[i].first.c_str(); }
+  Object getVal(int i) const { return entries[i].second.fetch(xref); }
+  Object getValNF(int i) const { return entries[i].second.copy(); }
 
   // Set the xref pointer.  This is only used in one special case: the
   // trailer dictionary, which is read before the xref table is
@@ -94,26 +94,28 @@ public:
   
   XRef *getXRef() const { return xref; }
   
-  GBool hasKey(const char *key) const;
+  GBool hasKey(const char *key) const { return find(key) != nullptr; }
 
 private:
   friend class Object; // for incRef/decRef
 
   // Reference counting.
-  int incRef();
-  int decRef();
+  int incRef() { return ++ref; }
+  int decRef() { return --ref; }
+
+  using DictEntry = std::pair<std::string, Object>;
+  struct CmpDictEntry;
 
-  mutable GBool sorted;
+  bool sorted;
   XRef *xref;			// the xref table for this PDF file
-  DictEntry *entries;		// array of entries
-  int size;			// size of <entries> array
-  int length;			// number of entries in dictionary
-  int ref;			// reference count
+  std::vector<DictEntry> entries;
+  std::atomic_int ref;			// reference count
 #ifdef MULTITHREADED
   mutable GooMutex mutex;
 #endif
 
-  DictEntry *find(const char *key) const;
+  const DictEntry *find(const char *key) const;
+  DictEntry *find(const char *key);
 };
 
 #endif
diff --git a/poppler/Form.cc b/poppler/Form.cc
index 820944e8..a4aab1a5 100644
--- a/poppler/Form.cc
+++ b/poppler/Form.cc
@@ -193,7 +193,7 @@ FormWidgetButton::FormWidgetButton (PDFDoc *docA, Object *aobj, unsigned num, Re
     Object obj2 = obj1.dictLookup("N");
     if (obj2.isDict()) {
       for (int i = 0; i < obj2.dictGetLength(); i++) {
-        char *key = obj2.dictGetKey(i);
+        const char *key = obj2.dictGetKey(i);
         if (strcmp (key, "Off") != 0) {
           onStr = new GooString (key);
           break;
diff --git a/poppler/Object.h b/poppler/Object.h
index 97978f46..bcfd8ba9 100644
--- a/poppler/Object.h
+++ b/poppler/Object.h
@@ -269,7 +269,7 @@ public:
   GBool dictIs(const char *dictType) const;
   Object dictLookup(const char *key, int recursion = 0) const;
   Object dictLookupNF(const char *key) const;
-  char *dictGetKey(int i) const;
+  const char *dictGetKey(int i) const;
   Object dictGetVal(int i) const;
   Object dictGetValNF(int i) const;
 
@@ -374,7 +374,7 @@ inline Object Object::dictLookup(const char *key, int recursion) const
 inline Object Object::dictLookupNF(const char *key) const
   { OBJECT_TYPE_CHECK(objDict); return dict->lookupNF(key); }
 
-inline char *Object::dictGetKey(int i) const
+inline const char *Object::dictGetKey(int i) const
   { OBJECT_TYPE_CHECK(objDict); return dict->getKey(i); }
 
 inline Object Object::dictGetVal(int i) const
-- 
2.16.3

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
poppler mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to