Hi all,
  as an experiment and to encourage some discussion I prepared an
alternate implementation of TrieNode which uses a std::map instead of
an array to store a node's children.

The expected result is a worst case performance degradation on insert
and delete from O(N) to O(N log R) where N is the length of the
c-string being looked up, and R is the size of the alphabet (as R =
256, we're talking about 8x worse).

The expected benefit is a noticeable reduction in terms of memory use,
especially for sparse key-spaces; it'd be useful e.g. in some lookup
cases.

Comments?

-- 
    Francesco
=== modified file 'lib/libTrie/Trie.cc'
--- lib/libTrie/Trie.cc	2014-06-03 10:49:08 +0000
+++ lib/libTrie/Trie.cc	2014-06-03 14:16:21 +0000
@@ -48,7 +48,7 @@
         return head->add(aString, theLength, privatedata, transform);
     }
 
-    head = new TrieNode;
+    head = new node_type;
 
     return head->add(aString, theLength, privatedata, transform);
 }

=== modified file 'lib/libTrie/Trie.h'
--- lib/libTrie/Trie.h	2014-06-03 10:49:08 +0000
+++ lib/libTrie/Trie.h	2014-06-03 14:20:17 +0000
@@ -58,7 +58,8 @@
     bool add(char const *, size_t, void *);
 
 private:
-    TrieNode *head;
+    typedef MapTrieNode node_type;
+    node_type *head;
 
     /* transfor each 8 bits in the element */
     TrieCharTransform *transform;

=== modified file 'lib/libTrie/TrieNode.cc'
--- lib/libTrie/TrieNode.cc	2014-06-03 11:59:56 +0000
+++ lib/libTrie/TrieNode.cc	2014-06-03 13:09:24 +0000
@@ -24,13 +24,13 @@
 #include <unistd.h>
 #endif
 
-TrieNode::TrieNode() : _privateData(NULL)
+ArrayTrieNode::ArrayTrieNode() : _privateData(NULL)
 {
     for (int i = 0; i < 256; ++i)
         internal[i] = NULL;
 }
 
-TrieNode::~TrieNode()
+ArrayTrieNode::~ArrayTrieNode()
 {
     for (int i = 0; i < 256; ++i)
         delete internal[i];
@@ -38,7 +38,7 @@
 
 /* as for find */
 bool
-TrieNode::add(char const *aString, size_t theLength, void *privatedata, TrieCharTransform *transform)
+ArrayTrieNode::add(char const *aString, size_t theLength, void *privatedata, TrieCharTransform *transform)
 {
     /* We trust that privatedata and existant keys have already been checked */
 
@@ -46,7 +46,7 @@
         int index = transform ? (*transform)(*aString): *aString;
 
         if (!internal[index])
-            internal[index] = new TrieNode;
+            internal[index] = new ArrayTrieNode;
 
         return internal[index]->add(aString + 1, theLength - 1, privatedata, transform);
     } else {

=== modified file 'lib/libTrie/TrieNode.h'
--- lib/libTrie/TrieNode.h	2014-06-03 10:49:08 +0000
+++ lib/libTrie/TrieNode.h	2014-06-03 14:12:01 +0000
@@ -29,14 +29,14 @@
 * i.e. M-ary internal node sizes etc
 */
 
-class TrieNode
+class ArrayTrieNode
 {
 
 public:
-    TrieNode();
-    ~TrieNode();
-    TrieNode(TrieNode const &);
-    TrieNode &operator= (TrieNode const &);
+    ArrayTrieNode();
+    ~ArrayTrieNode();
+    ArrayTrieNode(ArrayTrieNode const &);
+    ArrayTrieNode &operator= (ArrayTrieNode const &);
 
     /* Find a string.
     * If found, return the private data.
@@ -58,7 +58,7 @@
     * internal[0] is the terminal node for
     * a string and may not be used
     */
-    TrieNode * internal[256];
+    ArrayTrieNode * internal[256];
 
     /* If a string ends here, non NULL */
     void *_privateData;
@@ -66,7 +66,7 @@
 
 /* recursive. TODO? make iterative */
 void *
-TrieNode::find (char const *aString, size_t theLength, TrieCharTransform *transform, bool const prefix) const
+ArrayTrieNode::find (char const *aString, size_t theLength, TrieCharTransform *transform, bool const prefix) const
 {
     if (theLength) {
         int index = -1;
@@ -92,4 +92,79 @@
         return _privateData;
     }
 }
+
+#include <map>
+class MapTrieNode {
+public:
+    MapTrieNode() : _privateData(NULL) {}
+    inline ~MapTrieNode();
+    MapTrieNode(MapTrieNode const &);
+    MapTrieNode &operator=(MapTrieNode const &);
+
+    inline void *find(char const *, size_t, TrieCharTransform *, bool const prefix) const;
+    inline bool add (char const *, size_t, void *, TrieCharTransform *);
+
+private:
+    typedef std::map<unsigned char, MapTrieNode *> internalmap;
+    internalmap internal;
+    void *_privateData;
+};
+
+MapTrieNode::~MapTrieNode()
+{
+    using std::map;
+    for (internalmap::iterator i =internal.begin(); i != internal.end(); ++i)
+        if (i->second)
+            delete i->second;
+}
+
+bool
+MapTrieNode::add(char const *aString, size_t theLength, void *privatedata, TrieCharTransform *transform)
+{
+    if (theLength) {
+        int index = transform ? (*transform)(*aString): *aString;
+        const internalmap::iterator i = internal.find(index);
+
+        if (i == internal.end())
+            internal[index] = new MapTrieNode;
+
+        return internal[index]->add(aString + 1, theLength - 1, privatedata, transform);
+    } else {
+        /* terminal node */
+
+        if (_privateData)
+            return false;
+
+        _privateData = privatedata;
+
+        return true;
+    }
+}
+
+void *
+MapTrieNode::find (char const *aString, size_t theLength, TrieCharTransform *transform, bool const prefix) const
+{
+    if (theLength) {
+        unsigned char pos = transform ? (*transform) (*aString) : *aString;
+
+        internalmap::const_iterator i = internal.find(pos);
+        if (i != internal.end()) {
+            void *result;
+            result = i->second->find(aString + 1, theLength - 1, transform, prefix);
+
+            if (result)
+                return result;
+        }
+
+        if (prefix)
+            return _privateData;
+
+        return NULL;
+    } else {
+        /* terminal node */
+        return _privateData;
+    }
+
+}
+
 #endif /* LIBTRIE_TRIENODE_H */

Reply via email to