Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/markos-scratch into lp:zorba.
Requested reviews: Markos Zaharioudakis (markos-za) For more details, see: https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/112827 Optimized hash function used for nodes (fixes bug #1010051) + some hashmap/hashset cleanup -- https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/112827 Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog' --- ChangeLog 2012-06-29 13:25:20 +0000 +++ ChangeLog 2012-06-29 18:25:24 +0000 @@ -17,6 +17,7 @@ * Improved hoist rule: tighter hoisting of expressions (also fixes bug #967428, which is only a performance bug) * Optimized hash sets used by fn:distinct-values and nodes-distinct + * Optimized hash function used for nodes (also fixes bug #1010051) * No more serialization of compiler expressions. * Big rewrite of plan serializer internals, resulting in big performance improvement. === modified file 'src/runtime/sequences/SequencesImpl.cpp' --- src/runtime/sequences/SequencesImpl.cpp 2012-06-28 04:14:03 +0000 +++ src/runtime/sequences/SequencesImpl.cpp 2012-06-29 18:25:24 +0000 @@ -48,7 +48,6 @@ #include "store/api/iterator.h" #include "store/api/item_factory.h" #include "store/api/pul.h" -#include "store/util/hashset_node_handle.h" #include "context/static_context.h" #include "context/dynamic_context.h" === modified file 'src/runtime/sequences/SequencesImpl.h' --- src/runtime/sequences/SequencesImpl.h 2012-06-28 04:14:03 +0000 +++ src/runtime/sequences/SequencesImpl.h 2012-06-29 18:25:24 +0000 @@ -34,13 +34,6 @@ namespace zorba { -namespace store -{ - class NodeHashSet; -} - -class ValueCollCompareParam; - ///////////////////////////////////////////////////////////////////////////////// // // === modified file 'src/runtime/sequences/pregenerated/sequences.h' --- src/runtime/sequences/pregenerated/sequences.h 2012-06-28 04:14:03 +0000 +++ src/runtime/sequences/pregenerated/sequences.h 2012-06-29 18:25:24 +0000 @@ -34,9 +34,7 @@ namespace zorba { -namespace store{ - class NodeHashSet; -} +class NodeHandleHashSet; class AtomicItemHandleHashSet; /** * @@ -708,7 +706,7 @@ class HashSemiJoinIteratorState : public PlanIteratorState { public: - store::NodeHashSet* theRightInput; // + NodeHandleHashSet* theRightInput; // HashSemiJoinIteratorState(); === modified file 'src/runtime/sequences/sequences_impl.cpp' --- src/runtime/sequences/sequences_impl.cpp 2012-06-28 04:14:03 +0000 +++ src/runtime/sequences/sequences_impl.cpp 2012-06-29 18:25:24 +0000 @@ -53,10 +53,10 @@ #include <store/api/item_factory.h> #include "store/api/temp_seq.h" #include <store/api/pul.h> -#include <store/util/hashset_node_handle.h> #include <context/static_context.h> +#include "zorbautils/hashset_node_itemh.h" #include "zorbautils/hashset_atomic_itemh.h" namespace zorbatm = zorba::time; @@ -1106,7 +1106,7 @@ ********************************************************************************/ HashSemiJoinIteratorState::HashSemiJoinIteratorState() { - theRightInput = new store::NodeHashSet(); + theRightInput = new NodeHandleHashSet(1024, false); } === modified file 'src/runtime/spec/sequences/sequences.xml' --- src/runtime/spec/sequences/sequences.xml 2012-06-28 04:14:03 +0000 +++ src/runtime/spec/sequences/sequences.xml 2012-06-29 18:25:24 +0000 @@ -15,7 +15,7 @@ <zorba:header> <zorba:include form="Quoted">runtime/base/narybase.h</zorba:include> <zorba:include form="Quoted">runtime/core/path_iterators.h</zorba:include> - <zorba:fwd-decl ns="store">NodeHashSet</zorba:fwd-decl> + <zorba:fwd-decl ns="zorba">NodeHandleHashSet</zorba:fwd-decl> <zorba:fwd-decl ns="zorba">AtomicItemHandleHashSet</zorba:fwd-decl> </zorba:header> @@ -680,7 +680,7 @@ <zorba:state generateInit="false" generateReset="false" generateConstructor="false" generateDestructor="false"> - <zorba:member type="store::NodeHashSet*" name="theRightInput" brief=""/> + <zorba:member type="NodeHandleHashSet*" name="theRightInput" brief=""/> </zorba:state> <zorba:constructor> === modified file 'src/store/naive/node_items.cpp' --- src/store/naive/node_items.cpp 2012-06-28 04:14:03 +0000 +++ src/store/naive/node_items.cpp 2012-06-29 18:25:24 +0000 @@ -471,9 +471,29 @@ /******************************************************************************* ********************************************************************************/ -void XmlNode::setTreeInternal(const XmlTree* aNewTree) -{ - theUnion.treeRCPtr = (long*)aNewTree; +bool XmlNode::equals(const store::Item* other, long, const XQPCollator*) const +{ + assert(!isConnectorNode()); + return this == other; +} + + +/******************************************************************************* + +********************************************************************************/ +uint32_t XmlNode::hash(long, const XQPCollator*) const +{ + assert(!isConnectorNode()); + return reinterpret_cast<uintptr_t>(this); +} + + +/******************************************************************************* + +********************************************************************************/ +void XmlNode::setTreeInternal(const XmlTree* newTree) +{ + theUnion.treeRCPtr = (long*)newTree; } === modified file 'src/store/naive/node_items.h' --- src/store/naive/node_items.h 2012-06-28 04:14:03 +0000 +++ src/store/naive/node_items.h 2012-06-29 18:25:24 +0000 @@ -387,6 +387,7 @@ private: void setTreeInternal(const XmlTree* t); + void setTree(const XmlTree* t); void destroyInternal(bool removeType); @@ -435,20 +436,9 @@ bool equals( const store::Item* other, long timezone = 0, - const XQPCollator* aCollation = 0) const - { - assert(!isConnectorNode()); - return this == other; - } - - uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const - { - assert(!isConnectorNode()); - XmlNode* node = const_cast<XmlNode*>(this); - return hashfun::h32((void*)(&node), sizeof(node), FNV_32_INIT); - } - - inline long compare2(const XmlNode* other) const; + const XQPCollator* aCollation = 0) const; + + uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const; void getBaseURI(zstring& uri) const { @@ -524,6 +514,8 @@ GuideNode* getDataGuide() const { return getTree()->getDataGuide(); } #endif + inline long compare2(const XmlNode* other) const; + virtual XmlNode* copyInternal( InternalNode* rootParent, InternalNode* parent, === removed file 'src/store/util/hashset_node_handle.h' --- src/store/util/hashset_node_handle.h 2012-06-28 04:14:03 +0000 +++ src/store/util/hashset_node_handle.h 1970-01-01 00:00:00 +0000 @@ -1,70 +0,0 @@ -/* - * Copyright 2006-2008 The FLWOR Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef ZORBA_STORE_UTIL_NODE_HANDLE_HASHSET -#define ZORBA_STORE_UTIL_NODE_HANDLE_HASHSET - -#include "common/common.h" -#include "zorbautils/hashset.h" - -namespace zorba { namespace store { - - -/******************************************************************************* - A hash-based set container of node rchandles, where equality is based on - node identity -********************************************************************************/ -class NodeHashSet -{ -public: - - class CompareFunction - { - public: - bool equal(const Item_t& t1, const Item_t& t2) const - { - return t1->equals(t2, 0); - } - - uint32_t hash(const Item_t& t) const - { - return t->hash(0); - } - }; - -private: - HashSet<Item_t, CompareFunction> theSet; - -public: - NodeHashSet(ulong size = 1024) : theSet(size, false) - { - } - - void clear() { theSet.clear(); } - - bool empty() const { return theSet.empty(); } - - bool exists(const Item_t& key) { return theSet.exists(key); } - - bool insert(Item_t& key) { return theSet.insert(key); } - - bool erase(const Item_t& key) { return theSet.erase(key); } -}; - -} // namespace store -} // namespace zorba - -#endif -/* vim:set et sw=2 ts=2: */ === modified file 'src/zorbautils/hashmap.h' --- src/zorbautils/hashmap.h 2012-06-28 04:14:03 +0000 +++ src/zorbautils/hashmap.h 2012-06-29 18:25:24 +0000 @@ -311,6 +311,15 @@ /******************************************************************************* ********************************************************************************/ +Mutex* get_mutex() const +{ + return theMutexp; +} + + +/******************************************************************************* + +********************************************************************************/ void set_load_factor(double v) { theLoadFactor = v; @@ -410,13 +419,13 @@ Return true if the set already contains an item that is "equal" to the given item; otherwise return false. ********************************************************************************/ -bool exists(const T& item) +bool exists(const T& item) const { ulong hval = hash(item); SYNC_CODE(AutoMutex lock(theMutexp);) - HashEntry<T, V>* entry = bucket(hval); + const HashEntry<T, V>* entry = bucket(hval); if (entry->isFree()) return false; === modified file 'src/zorbautils/hashmap_itemh.h' --- src/zorbautils/hashmap_itemh.h 2012-06-28 04:14:03 +0000 +++ src/zorbautils/hashmap_itemh.h 2012-06-29 18:25:24 +0000 @@ -62,14 +62,24 @@ }; +/******************************************************************************* + A hash-based map mapping item handles to data items of type V. Equality is + based on the store::Item::equals() method. + + It is used to map annotation names to annotation ids. + + NOTE: Although the map uses raw item pointers instead of rchandles, reference + counting is still done, but done manually (see insert and clear methods) +********************************************************************************/ template <class V> -class ItemHandleHashMap : public HashMap<store::Item*, - V, - ItemHandleHashMapCmp> +class ItemHandleHashMap { public: typedef typename HashMap<store::Item*, V, ItemHandleHashMapCmp>::iterator iterator; +private: + HashMap<store::Item*, V, ItemHandleHashMapCmp> theMap; + public: ItemHandleHashMap( long timezone, @@ -77,27 +87,18 @@ ulong size, bool sync) : - HashMap<store::Item*, V, ItemHandleHashMapCmp>( - ItemHandleHashMapCmp(timezone, collation), - size, - sync) + theMap(ItemHandleHashMapCmp(timezone, collation), size, sync) { } ~ItemHandleHashMap() { - iterator ite = this->begin(); - iterator end = this->end(); - - for (; ite != end; ++ite) - { - (*ite).first->removeReference(); - } + clear(); } void clear() { - SYNC_CODE(AutoMutex lock(this->theMutexp);) + SYNC_CODE(AutoMutex lock(theMap.get_mutex());) iterator ite = this->begin(); iterator end = this->end(); @@ -107,33 +108,38 @@ (*ite).first->removeReference(); } - HashMap<store::Item*, V, ItemHandleHashMapCmp>::clearNoSync(); + theMap.clearNoSync(); } - void eraseEntry( - HashEntry<store::Item*, V>* entry, - HashEntry<store::Item*, V>* preventry) - { - entry->theItem->removeReference(); + iterator begin() const { return theMap.begin(); } - HashMap<store::Item*, V, ItemHandleHashMapCmp>::eraseEntry(entry, preventry); - } + iterator end() const { return theMap.end(); } iterator find(const store::Item_t& item) { - return HashMap<store::Item*, V, ItemHandleHashMapCmp>::find(item.getp()); + return theMap.find(item.getp()); } bool insert(const store::Item_t& item, V& value) { - bool inserted = - HashMap<store::Item*, V, ItemHandleHashMapCmp>::insert(item.getp(), value); + bool inserted = theMap.insert(item.getp(), value); if (inserted) item->addReference(); return inserted; } + + bool erase(const store::Item_t& key) + { + bool found = theMap.erase(key.getp()); + + if (found) + key->removeReference(); + + return found; + } + }; } === removed file 'src/zorbautils/hashmap_str_obj.h' --- src/zorbautils/hashmap_str_obj.h 2012-06-28 04:14:03 +0000 +++ src/zorbautils/hashmap_str_obj.h 1970-01-01 00:00:00 +0000 @@ -1,81 +0,0 @@ -/* - * Copyright 2006-2008 The FLWOR Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once -#ifndef ZORBA_HASHMAP_CHAR_PTR_TO_CLASS_POINTER -#define ZORBA_HASHMAP_CHAR_PTR_TO_CLASS_POINTER - -#include "zorbautils/hashmap.h" -#include "zorbautils/hashfun.h" - -namespace zorba { - -class CompareCharPtr -{ -public: - static uint32_t hash(const char* str) - { - return hashfun::h32(str, FNV_32_INIT); - } - - static bool equal(const char* s1, const char* s2) - { - return !strcmp(s1, s2); - } -}; - - -template<typename T> -class HashCharPtrObj : public HashMap<const char *, T, CompareCharPtr> -{ -protected: - bool theIsOwner; - -public: - HashCharPtrObj(bool sync, bool owner) - : - HashMap<const char *, T, CompareCharPtr>(1024, sync), - theIsOwner(owner) - { - } - - virtual ~HashCharPtrObj() - { - if (theIsOwner) - freeAll(); - } - - void freeAll() - { - typename HashMap<const char *, T, CompareCharPtr>::iterator it; - for(it = HashMap<const char *, T, CompareCharPtr>::begin(); - it != HashMap<const char *, T, CompareCharPtr>::end(); - ++it) - { - free((void*)(*it).first); - } - } -}; - - -} // namespace zorba - -#endif -/* - * Local variables: - * mode: c++ - * End: - */ -/* vim:set et sw=2 ts=2: */ === modified file 'src/zorbautils/hashset.h' --- src/zorbautils/hashset.h 2012-06-28 04:14:03 +0000 +++ src/zorbautils/hashset.h 2012-06-29 18:25:24 +0000 @@ -77,7 +77,7 @@ Return true if the set already contains an item that is "equal" to the given item; otherwise return false. ********************************************************************************/ -bool exists(const T& item) +bool exists(const T& item) const { return HashMap<T, DummyHashValue, C>::exists(item); } === modified file 'src/zorbautils/hashset_node_itemh.h' --- src/zorbautils/hashset_node_itemh.h 2012-06-28 13:52:31 +0000 +++ src/zorbautils/hashset_node_itemh.h 2012-06-29 18:25:24 +0000 @@ -31,8 +31,10 @@ A hash-based set container of item handles, where equality is based on object identity (i.e. pointer equality) rather than object value. - It is used by the NodeDistinctIterator and for the population of a general - index. + It is used + 1. by the NodeDistinctIterator + 2. for the population of a general index. + 3. by the HashSemiJoinIterator, which implements op:intersect and op:except. NOTE: Although the set uses raw item pointers instead of rchandles, reference counting is still done, but done manually (see insert and clear methods) @@ -65,16 +67,20 @@ void clear(); - bool exists(store::Item* const key) + bool empty() const { return theSet.empty(); } + + bool exists(const store::Item_t& key) const { - return theSet.exists(key); + return theSet.exists(key.getp()); } - bool insert(store::Item* key) + bool insert(const store::Item_t& key) { assert(key->isNode()); - bool inserted = theSet.insert(key); + store::Item* key2 = key.getp(); + + bool inserted = theSet.insert(key2); if (inserted) { @@ -82,6 +88,16 @@ } return inserted; } + + bool erase(const store::Item_t& key) + { + bool found = theSet.erase(key.getp()); + + if (found) + key->removeReference(); + + return found; + } };
-- Mailing list: https://launchpad.net/~zorba-coders Post to : zorba-coders@lists.launchpad.net Unsubscribe : https://launchpad.net/~zorba-coders More help : https://help.launchpad.net/ListHelp