Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/markos-scratch 
into lp:zorba.

Requested reviews:
  Markos Zaharioudakis (markos-za)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/112827

Optimized hash function used for nodes (fixes bug #1010051) + some 
hashmap/hashset cleanup
-- 
https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/112827
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-06-29 13:25:20 +0000
+++ ChangeLog	2012-06-29 18:25:24 +0000
@@ -17,6 +17,7 @@
   * Improved hoist rule: tighter hoisting of expressions (also fixes bug #967428,
     which is only a performance bug)
   * Optimized hash sets used by fn:distinct-values and nodes-distinct
+  * Optimized hash function used for nodes (also fixes bug #1010051)
   * No more serialization of compiler expressions.
   * Big rewrite of plan serializer internals, resulting in big performance improvement.
 

=== modified file 'src/runtime/sequences/SequencesImpl.cpp'
--- src/runtime/sequences/SequencesImpl.cpp	2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/SequencesImpl.cpp	2012-06-29 18:25:24 +0000
@@ -48,7 +48,6 @@
 #include "store/api/iterator.h"
 #include "store/api/item_factory.h"
 #include "store/api/pul.h"
-#include "store/util/hashset_node_handle.h"
 
 #include "context/static_context.h"
 #include "context/dynamic_context.h"

=== modified file 'src/runtime/sequences/SequencesImpl.h'
--- src/runtime/sequences/SequencesImpl.h	2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/SequencesImpl.h	2012-06-29 18:25:24 +0000
@@ -34,13 +34,6 @@
 namespace zorba
 {
 
-namespace store 
-{
-  class NodeHashSet;
-}
-
-class ValueCollCompareParam;
-
 
 /////////////////////////////////////////////////////////////////////////////////
 //                                                                             //

=== modified file 'src/runtime/sequences/pregenerated/sequences.h'
--- src/runtime/sequences/pregenerated/sequences.h	2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/pregenerated/sequences.h	2012-06-29 18:25:24 +0000
@@ -34,9 +34,7 @@
 
 
 namespace zorba {
-namespace store{
-  class NodeHashSet;
-}
+class NodeHandleHashSet;
 class AtomicItemHandleHashSet;
 /**
  * 
@@ -708,7 +706,7 @@
 class HashSemiJoinIteratorState : public PlanIteratorState
 {
 public:
-  store::NodeHashSet* theRightInput; //
+  NodeHandleHashSet* theRightInput; //
 
   HashSemiJoinIteratorState();
 

=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp	2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/sequences_impl.cpp	2012-06-29 18:25:24 +0000
@@ -53,10 +53,10 @@
 #include <store/api/item_factory.h>
 #include "store/api/temp_seq.h"
 #include <store/api/pul.h>
-#include <store/util/hashset_node_handle.h>
 
 #include <context/static_context.h>
 
+#include "zorbautils/hashset_node_itemh.h"
 #include "zorbautils/hashset_atomic_itemh.h"
 
 namespace zorbatm = zorba::time;
@@ -1106,7 +1106,7 @@
 ********************************************************************************/
 HashSemiJoinIteratorState::HashSemiJoinIteratorState()
 {
-  theRightInput = new store::NodeHashSet();
+  theRightInput = new NodeHandleHashSet(1024, false);
 }
 
 

=== modified file 'src/runtime/spec/sequences/sequences.xml'
--- src/runtime/spec/sequences/sequences.xml	2012-06-28 04:14:03 +0000
+++ src/runtime/spec/sequences/sequences.xml	2012-06-29 18:25:24 +0000
@@ -15,7 +15,7 @@
 <zorba:header>
     <zorba:include form="Quoted">runtime/base/narybase.h</zorba:include>
     <zorba:include form="Quoted">runtime/core/path_iterators.h</zorba:include>
-    <zorba:fwd-decl ns="store">NodeHashSet</zorba:fwd-decl>
+    <zorba:fwd-decl ns="zorba">NodeHandleHashSet</zorba:fwd-decl>
     <zorba:fwd-decl ns="zorba">AtomicItemHandleHashSet</zorba:fwd-decl>
 </zorba:header>
 
@@ -680,7 +680,7 @@
 
   <zorba:state generateInit="false" generateReset="false"
                generateConstructor="false" generateDestructor="false">
-    <zorba:member type="store::NodeHashSet*" name="theRightInput" brief=""/>
+    <zorba:member type="NodeHandleHashSet*" name="theRightInput" brief=""/>
   </zorba:state>
 
   <zorba:constructor>

=== modified file 'src/store/naive/node_items.cpp'
--- src/store/naive/node_items.cpp	2012-06-28 04:14:03 +0000
+++ src/store/naive/node_items.cpp	2012-06-29 18:25:24 +0000
@@ -471,9 +471,29 @@
 /*******************************************************************************
 
 ********************************************************************************/
-void XmlNode::setTreeInternal(const XmlTree* aNewTree)
-{
-  theUnion.treeRCPtr = (long*)aNewTree;
+bool XmlNode::equals(const store::Item* other, long, const XQPCollator*) const
+{
+  assert(!isConnectorNode());
+  return this == other;
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
+uint32_t XmlNode::hash(long, const XQPCollator*) const
+{
+  assert(!isConnectorNode());
+  return reinterpret_cast<uintptr_t>(this);
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
+void XmlNode::setTreeInternal(const XmlTree* newTree)
+{
+  theUnion.treeRCPtr = (long*)newTree;
 }
 
 

=== modified file 'src/store/naive/node_items.h'
--- src/store/naive/node_items.h	2012-06-28 04:14:03 +0000
+++ src/store/naive/node_items.h	2012-06-29 18:25:24 +0000
@@ -387,6 +387,7 @@
 
 private:
   void setTreeInternal(const XmlTree* t);
+
   void setTree(const XmlTree* t);
 
   void destroyInternal(bool removeType);
@@ -435,20 +436,9 @@
   bool equals(
       const store::Item* other,
       long timezone = 0,
-      const XQPCollator* aCollation = 0) const
-  {
-    assert(!isConnectorNode());
-    return this == other;
-  }
-
-  uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const
-  {
-    assert(!isConnectorNode());
-    XmlNode* node = const_cast<XmlNode*>(this);
-    return hashfun::h32((void*)(&node), sizeof(node), FNV_32_INIT);
-  }
-
-  inline long compare2(const XmlNode* other) const;
+      const XQPCollator* aCollation = 0) const;
+
+  uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const;
 
   void getBaseURI(zstring& uri) const
   {
@@ -524,6 +514,8 @@
   GuideNode* getDataGuide() const { return getTree()->getDataGuide(); }
 #endif
 
+  inline long compare2(const XmlNode* other) const;
+
   virtual XmlNode* copyInternal(
       InternalNode* rootParent,
       InternalNode* parent,

=== removed file 'src/store/util/hashset_node_handle.h'
--- src/store/util/hashset_node_handle.h	2012-06-28 04:14:03 +0000
+++ src/store/util/hashset_node_handle.h	1970-01-01 00:00:00 +0000
@@ -1,70 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ZORBA_STORE_UTIL_NODE_HANDLE_HASHSET
-#define ZORBA_STORE_UTIL_NODE_HANDLE_HASHSET
-
-#include "common/common.h"
-#include "zorbautils/hashset.h"
-
-namespace zorba { namespace store {
-
-
-/*******************************************************************************
-  A hash-based set container of node rchandles, where equality is based on
-  node identity
-********************************************************************************/
-class NodeHashSet
-{
-public:
-
-  class CompareFunction
-  {
-  public:
-    bool equal(const Item_t& t1, const Item_t& t2) const
-    {
-      return t1->equals(t2, 0);
-    }
-
-    uint32_t hash(const Item_t& t) const
-    {
-      return t->hash(0);
-    }
-  };
-
-private:
-  HashSet<Item_t, CompareFunction>  theSet;
-
-public:
-  NodeHashSet(ulong size = 1024) : theSet(size, false) 
-  {
-  }
-
-  void clear() { theSet.clear(); }
-
-  bool empty() const { return theSet.empty(); }
-
-  bool exists(const Item_t& key) { return theSet.exists(key); }
-
-  bool insert(Item_t& key) { return theSet.insert(key); }
-
-  bool erase(const Item_t& key) { return theSet.erase(key); }
-};
-
-} // namespace store
-} // namespace zorba
-
-#endif
-/* vim:set et sw=2 ts=2: */

=== modified file 'src/zorbautils/hashmap.h'
--- src/zorbautils/hashmap.h	2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashmap.h	2012-06-29 18:25:24 +0000
@@ -311,6 +311,15 @@
 /*******************************************************************************
 
 ********************************************************************************/
+Mutex* get_mutex() const
+{
+  return theMutexp;
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
 void set_load_factor(double v)
 {
   theLoadFactor = v;
@@ -410,13 +419,13 @@
   Return true if the set already contains an item that is "equal" to the given
   item; otherwise return false.
 ********************************************************************************/
-bool exists(const T& item)
+bool exists(const T& item) const
 {
   ulong hval = hash(item);
 
   SYNC_CODE(AutoMutex lock(theMutexp);)
 
-  HashEntry<T, V>* entry = bucket(hval);
+  const HashEntry<T, V>* entry = bucket(hval);
 
   if (entry->isFree())
     return false;

=== modified file 'src/zorbautils/hashmap_itemh.h'
--- src/zorbautils/hashmap_itemh.h	2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashmap_itemh.h	2012-06-29 18:25:24 +0000
@@ -62,14 +62,24 @@
 };
 
 
+/*******************************************************************************
+  A hash-based map mapping item handles to data items of type V. Equality is
+  based on the store::Item::equals() method.
+
+  It is used to map annotation names to annotation ids.
+
+  NOTE: Although the map uses raw item pointers instead of rchandles, reference
+        counting is still done, but done manually (see insert and clear methods)
+********************************************************************************/
 template <class V>
-class ItemHandleHashMap : public HashMap<store::Item*,
-                                         V,
-                                         ItemHandleHashMapCmp>
+class ItemHandleHashMap
 {
 public:
   typedef typename HashMap<store::Item*, V, ItemHandleHashMapCmp>::iterator iterator;
 
+private:
+  HashMap<store::Item*, V, ItemHandleHashMapCmp> theMap;
+
 public:
   ItemHandleHashMap(
         long timezone,
@@ -77,27 +87,18 @@
         ulong size,
         bool sync) 
     :
-    HashMap<store::Item*, V, ItemHandleHashMapCmp>(
-            ItemHandleHashMapCmp(timezone, collation),
-            size,
-            sync)
+    theMap(ItemHandleHashMapCmp(timezone, collation), size, sync)
   {
   }
   
  ~ItemHandleHashMap()
   {
-    iterator ite = this->begin();
-    iterator end = this->end();
-
-    for (; ite != end; ++ite)
-    {
-      (*ite).first->removeReference();
-    }
+    clear();
   }
 
   void clear()
   {
-    SYNC_CODE(AutoMutex lock(this->theMutexp);)
+    SYNC_CODE(AutoMutex lock(theMap.get_mutex());)
 
     iterator ite = this->begin();
     iterator end = this->end();
@@ -107,33 +108,38 @@
       (*ite).first->removeReference();
     }
 
-    HashMap<store::Item*, V, ItemHandleHashMapCmp>::clearNoSync();
+    theMap.clearNoSync();
   }
   
-  void eraseEntry(
-      HashEntry<store::Item*, V>* entry,
-      HashEntry<store::Item*, V>* preventry)
-  {
-    entry->theItem->removeReference();
+  iterator begin() const { return theMap.begin(); }
 
-    HashMap<store::Item*, V, ItemHandleHashMapCmp>::eraseEntry(entry, preventry);
-  }
+  iterator end() const { return theMap.end(); }
 
   iterator find(const store::Item_t& item)
   {
-    return HashMap<store::Item*, V, ItemHandleHashMapCmp>::find(item.getp());
+    return theMap.find(item.getp());
   }
 
   bool insert(const store::Item_t& item, V& value)
   {
-    bool inserted = 
-    HashMap<store::Item*, V, ItemHandleHashMapCmp>::insert(item.getp(), value);
+    bool inserted = theMap.insert(item.getp(), value);
                     
     if (inserted)
       item->addReference();
 
     return inserted;
   }
+
+  bool erase(const store::Item_t& key)
+  {
+    bool found = theMap.erase(key.getp());
+
+    if (found)
+      key->removeReference();
+
+    return found;
+  }
+
 };
 
 }

=== removed file 'src/zorbautils/hashmap_str_obj.h'
--- src/zorbautils/hashmap_str_obj.h	2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashmap_str_obj.h	1970-01-01 00:00:00 +0000
@@ -1,81 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#ifndef ZORBA_HASHMAP_CHAR_PTR_TO_CLASS_POINTER
-#define ZORBA_HASHMAP_CHAR_PTR_TO_CLASS_POINTER
-
-#include "zorbautils/hashmap.h"
-#include "zorbautils/hashfun.h"
-
-namespace zorba {
-
-class CompareCharPtr
-{
-public:
-  static uint32_t hash(const char* str)
-  {
-    return hashfun::h32(str, FNV_32_INIT);
-  }
-
-  static bool equal(const char* s1, const char* s2)
-  {
-    return !strcmp(s1, s2);
-  }
-};
-
-
-template<typename T>
-class HashCharPtrObj : public HashMap<const char *, T, CompareCharPtr>
-{
-protected:
-  bool theIsOwner;
-
-public:
-  HashCharPtrObj(bool sync, bool owner) 
-    :
-    HashMap<const char *, T, CompareCharPtr>(1024, sync),
-    theIsOwner(owner)
-  {
-  }
-
-  virtual ~HashCharPtrObj() 
-  {
-    if (theIsOwner)
-      freeAll();
-  }
-
-  void freeAll()
-  {
-    typename HashMap<const char *, T, CompareCharPtr>::iterator  it;
-    for(it = HashMap<const char *, T, CompareCharPtr>::begin();
-        it != HashMap<const char *, T, CompareCharPtr>::end();
-        ++it)
-    {
-      free((void*)(*it).first);
-    }
-  }
-};
-
-
-} // namespace zorba
-
-#endif
-/*
- * Local variables:
- * mode: c++
- * End:
- */
-/* vim:set et sw=2 ts=2: */

=== modified file 'src/zorbautils/hashset.h'
--- src/zorbautils/hashset.h	2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashset.h	2012-06-29 18:25:24 +0000
@@ -77,7 +77,7 @@
   Return true if the set already contains an item that is "equal" to the given
   item; otherwise return false.
 ********************************************************************************/
-bool exists(const T& item)
+bool exists(const T& item) const
 {
   return HashMap<T, DummyHashValue, C>::exists(item);
 }

=== modified file 'src/zorbautils/hashset_node_itemh.h'
--- src/zorbautils/hashset_node_itemh.h	2012-06-28 13:52:31 +0000
+++ src/zorbautils/hashset_node_itemh.h	2012-06-29 18:25:24 +0000
@@ -31,8 +31,10 @@
   A hash-based set container of item handles, where equality is based on
   object identity (i.e. pointer equality) rather than object value.
 
-  It is used by the NodeDistinctIterator and for the population of a general
-  index. 
+  It is used 
+  1. by the NodeDistinctIterator
+  2. for the population of a general index. 
+  3. by the HashSemiJoinIterator, which implements op:intersect and op:except.
 
   NOTE: Although the set uses raw item pointers instead of rchandles, reference
         counting is still done, but done manually (see insert and clear methods)
@@ -65,16 +67,20 @@
 
   void clear();
 
-  bool exists(store::Item* const key) 
+  bool empty() const { return theSet.empty(); }
+
+  bool exists(const store::Item_t& key) const 
   {
-    return theSet.exists(key); 
+    return theSet.exists(key.getp());
   }
 
-  bool insert(store::Item* key) 
+  bool insert(const store::Item_t& key)
   {
     assert(key->isNode());
 
-    bool inserted = theSet.insert(key);
+    store::Item* key2 = key.getp();
+
+    bool inserted = theSet.insert(key2);
 
     if (inserted) 
     {
@@ -82,6 +88,16 @@
     }
     return inserted;
   }
+
+  bool erase(const store::Item_t& key)
+  {
+    bool found = theSet.erase(key.getp());
+
+    if (found)
+      key->removeReference();
+
+    return found;
+  }
 };
 
 

-- 
Mailing list: https://launchpad.net/~zorba-coders
Post to     : zorba-coders@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zorba-coders
More help   : https://help.launchpad.net/ListHelp

Reply via email to