This is an automated email from the ASF dual-hosted git repository. amc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push: new 5706e3ba67 libswoc: Remove legacy IntrusiveHashMap (#10104) 5706e3ba67 is described below commit 5706e3ba675844c5b6df5b51b93e478b87d591b0 Author: Alan M. Carroll <a...@apache.org> AuthorDate: Thu Jul 27 14:18:31 2023 -0500 libswoc: Remove legacy IntrusiveHashMap (#10104) --- .../internal-libraries/index.en.rst | 1 - .../internal-libraries/intrusive-hash-map.en.rst | 213 ------ include/tscore/IntrusiveHashMap.h | 724 --------------------- src/tscore/CMakeLists.txt | 1 - src/tscore/Makefile.am | 1 - src/tscore/unit_tests/test_IntrusiveHashMap.cc | 240 ------- 6 files changed, 1180 deletions(-) diff --git a/doc/developer-guide/internal-libraries/index.en.rst b/doc/developer-guide/internal-libraries/index.en.rst index 32dfb332b6..73ec59e528 100644 --- a/doc/developer-guide/internal-libraries/index.en.rst +++ b/doc/developer-guide/internal-libraries/index.en.rst @@ -35,6 +35,5 @@ development team. MemSpan.en TextView.en buffer-writer.en - intrusive-hash-map.en intrusive-list.en scalar.en diff --git a/doc/developer-guide/internal-libraries/intrusive-hash-map.en.rst b/doc/developer-guide/internal-libraries/intrusive-hash-map.en.rst deleted file mode 100644 index 150e9a3594..0000000000 --- a/doc/developer-guide/internal-libraries/intrusive-hash-map.en.rst +++ /dev/null @@ -1,213 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file distributed with this work for - additional information regarding copyright ownership. The ASF licenses this file to you under the - Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software distributed under the License - is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express - or implied. See the License for the specific language governing permissions and limitations under - the License. - -.. include:: ../../common.defs - -.. _lib-intrusive-hash-map: -.. highlight:: cpp -.. default-domain:: cpp - -IntrusiveHashMap -**************** - -:class:`IntrusiveHashMap` provides a "hashed" or "unordered" set, using intrusive links. It provides a -container for elements, each of which has a :arg:`key`. A hash function is applied to a key to -generate a :arg:`hash id` which is used to group the elements in to buckets for fast lookup. This -container is a mix of :code:`std::unordered_set` and :code:`std::unordered_map`. There is no -separation between elements and keys, but each element can contain non-key data. - -Iteration over elements is provided and is constant time. - -In order to optimize lookup, the container can increase the number of buckets used. This is called -"expansion" and when it occurs is control by the "expansion policy" of the container. The policy can -be to be automatic or only done explicitly. - -Usage -===== - -To use an :class:`IntrusiveHashMap` the element must provide support for the container. This is done -through an associated descriptor class which provides the operations needed to manipulate the elements -in the container. - - -Details -======= - -.. class:: template < typename H > IntrusiveHashMap - - :tparam H: Element operations. - - An unordered map using a hash function. The properties of the map are determined by types and - operations provided by the descriptor type :arg:`H`. The following types are derived from :arg:`H` - and defined in the container type. - - .. type:: value_type - - The type of elements in the container, deduced from the return types of the link accessor methods - in :arg:`H`. - - .. type:: key_type - - The type of the key used for hash computations. Deduced from the return type of the key - accessor. An instance of this type is never default constructed nor modified, therefore it can - be a reference if the key type is expensive to copy. - - .. type:: hash_id - - The type of the hash of a :type:`key_type`. Deduced from the return type of the hash function. - This must be a numeric type. - - :arg:`H` - This describes the hash map, primarily via the operations required for the map. The related types are deduced - from the function return types. This is designed to be compatible with :class:`IntrusiveDList`. - - .. function:: static key_type key_of(value_type * v) - - Key accessor - return the key of the element :arg:`v`. - - .. function:: static hash_id hash_of(key_type key) - - Hash function - compute the hash value of the :arg:`key`. - - .. function:: static bool equal(key_type lhs, key_type rhs) - - Key comparison - two keys are equal if this function returns :code:`true`. - - .. function:: static IntrusiveHashMap::value_type * & next_ptr(IntrusiveHashMap::value_type * v) - - Return a reference to the next element pointer embedded in the element :arg:`v`. - - .. function:: static IntrusiveHashMap::value_type * & prev_ptr(IntrusiveHashMap::value_type * v) - - Return a reference to the previous element pointer embedded in the element :arg:`v`. - - .. type:: iterator - - An STL compliant iterator over elements in the container. - - .. type:: range - - An STL compliant half open range of elements, represented by a pair of iterators. - - .. function:: IntrusiveHashMap & insert(value_type * v) - - Insert the element :arg:`v` into the container. If there are already elements with the same - key, :arg:`v` is inserted after these elements. - - There is no :code:`emplace` because :arg:`v` is put in the container, not a copy of :arg:`v`. - For the same reason :arg:`v` must be constructed before calling this method, the container - will never create an element instance. - - .. function:: iterator begin() - - Return an iterator to the first element in the container. - - .. function:: iterator end() - - Return an iterator to past the last element in the container. - - .. function:: iterator find(value_type * v) - - Search for :arg:`v` in the container. If found, return an iterator referring to :arg:`v`. If not - return the end iterator. This validates :arg:`v` is in the container. - - .. function:: range equal_range(key_type key) - - Find all elements with a key that is equal to :arg:`key`. The returned value is a half open - range starting with the first matching element to one past the last matching element. All - element in the range will have a key that is equal to :arg:`key`. If no element has a matching - key the range will be empty. - - .. function:: IntrusiveHashMap & erase(iterator spot) - - Remove the element referred to by :arg:`spot` from the container. - - .. function:: iterator iterator_for(value_type * v) - - Return an iterator for :arg:`v`. This is very fast, faster than :func:`IntrusiveHashMap::find` - but less safe because no validation done on :arg:`v`. If it not in the container (either in no - container or a different one) further iteration on the returned iterator will go badly. It is - useful inside range :code:`for` loops when it is guaranteed the element is in the container. - - .. function:: template <typename F> IntrusiveHashMap & apply(F && f) - - :tparam F: A functional type with the signature :code:`void (value_type*)`. - - This applies the function :arg:`f` to every element in the container in such a way that - modification of the element does not interfere with the iteration. The most common use is to - :code:`delete` the elements during cleanup. The common idiom :: - - for ( auto & elt : container) delete &elt; - - is problematic because the iteration links are in the deleted element causing the computation - of the next element to be a use after free. Using :func:`IntrusiveHashMap::apply` enables safe - cleanup. :: - - container.apply([](value_type & v) { delete & v; }); - - Because the links are intrusive it is possible for other classes or the element class to - modify them. In such cases this method provides a safe way to invoke such mechanisms. - -Design Notes -============ - -This is a refresh of an previously existing class, :code:`TSHahTable`. The switch to C++ 11 and then -C++ 17 made it possible to do much better in terms of the internal implementation and API. The -overall functionality is the roughly the same but with an easier API, compatibility with -:class:`IntrusiveDList`, improved compliance with STL container standards, and better internal -implementation. - -The biggest change is that elements are stored in a single global list rather than per hash bucket. -The buckets serve only as entry points in to the global list and to count the number of elements -per bucket. This simplifies the implementation of iteration, so that the old :code:`Location` nested -class can be removed. Elements with equal keys can be handled in the same way as with STL -containers, via iterator ranges, instead of a custom pseudo-iterator class. - -Notes on :func:`IntrusiveHashMap::apply` ----------------------------------------- - -This was added after some experience with use of the container. Initially it was added to make -cleaning up the container easier. Without it, cleanup looks like :: - - for ( auto spot = map.begin(), limit = map.end() ; spot != limit ; delete &( * spot++)) { - ; // empty - } - -Instead one can do :: - - map.apply([](value_type& v) { delete &v; }); - -The post increment operator guarantees that :arg:`spot` has been updated before the current element is destroyed. -However, it turns out to be handy in other map modifying operations. In the unit tests there is -this code - -.. literalinclude:: ../../../src/tscore/unit_tests/test_IntrusiveHashMap.cc - :lines: 129-132 - -This removes all elements that do not have the payload "dup". As another design note, -:func:`IntrusiveHashMap::iterator_for` here serves to bypass validation checking on the target for -:func:`IntrusiveHashMap::erase`, which is proper because :func:`IntrusiveHashMap::apply` guarantees -:arg:`thing` is in the map. - -Without :code:`apply` this is needed :: - - auto idx = map.begin(); - while (idx != map.end()) { - auto x{idx++}; - if ("dup"sv != x->_payload) { - map.erase(x); - } - } - -The latter is more verbose and more importantly less obvious, depending on a subtle interaction with -post increment. diff --git a/include/tscore/IntrusiveHashMap.h b/include/tscore/IntrusiveHashMap.h deleted file mode 100644 index bbf90a77cc..0000000000 --- a/include/tscore/IntrusiveHashMap.h +++ /dev/null @@ -1,724 +0,0 @@ -/** @file - - Instrusive hash map. - - @section license License - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#pragma once - -#include <vector> -#include <array> -#include <algorithm> -#include "tscpp/util/IntrusiveDList.h" - -/** Intrusive Hash Table. - - Values stored in this container are not destroyed when the container is destroyed or removed from the container. - They must be released by the client. - - Duplicate keys are allowed. Clients must walk the list for multiple entries. - @see @c Location::operator++() - - By default the table automatically expands to limit the average chain length. This can be tuned. If set - to @c MANUAL then the table will expand @b only when explicitly requested to do so by the client. - @see @c ExpansionPolicy - @see @c setExpansionPolicy() - @see @c setExpansionLimit() - @see @c expand() - - The hash table is configured by a descriptor class. This must contain the following members - - - The static method <tt>key_type key_of(value_type *)</tt> which returns the key for an instance of @c value_type. - - - The static method <tt>bool equal(key_type lhs, key_type rhs)</tt> which checks if two instances of @c Key are the same. - - - The static method <tt>hash_id hash_of(key_type)</tt> which computes the hash value of the key. @c ID must a numeric type. - - - The static method <tt>value_type *& next_ptr(value_type *)</tt> which returns a reference to a forward pointer. - - - The static method <tt>value_type *& prev_ptr(value_type *)</tt> which returns a reference to a backwards pointer. - - These are the required members, it is permitted to have other methods (if the descriptor is used for other purposes) - or to provide overloads of the methods. Note this is compatible with @c IntrusiveDList. - - Several internal types are deduced from these arguments. - - @a Key is the return type of @a key_of and represents the key that distinguishes instances of @a value_type. Two - instances of @c value_type are considered the same if their respective @c Key values are @c equal. @c Key is - presumed cheap to copy. If the underlying key is not a simple type then @a Key should be a constant pointer or a - constant reference. The hash table will never attempt to modify a key. - - @a ID The numeric type that is the hash value for an instance of @a Key. - - Example for @c Http1ServerSession keyed by the origin server IP address. - - @code - struct Descriptor { - static sockaddr const* key_of(Http1ServerSession const* value) { return &value->ip.sa } - static bool equal(sockaddr const* lhs, sockaddr const* rhs) { return ats_ip_eq(lhs, rhs); } - static uint32_t hash_of(sockaddr const* key) { return ats_ip_hash(key); } - static Http1ServerSession *& next_ptr(Http1ServerSession * ssn) { return ssn->_next; } - static Http1ServerSession *& prev_ptr(Http1ServerSession * ssn) { return ssn->_prev; } - }; - using Table = IntrusiveHashMap<Descriptor>; - @endcode - - */ -template <typename H> class IntrusiveHashMap -{ - using self_type = IntrusiveHashMap; - -public: - /// Type of elements in the map. - using value_type = typename std::remove_pointer<typename std::remove_reference<decltype(H::next_ptr(nullptr))>::type>::type; - /// Key type for the elements. - using key_type = decltype(H::key_of(static_cast<value_type *>(nullptr))); - /// The numeric hash ID computed from a key. - using hash_id = decltype(H::hash_of(H::key_of(static_cast<value_type *>(nullptr)))); - - /// When the hash table is expanded. - enum ExpansionPolicy { - MANUAL, ///< Client must explicitly expand the table. - AVERAGE, ///< Table expands if average chain length exceeds limit. [default] - MAXIMUM ///< Table expands if any chain length exceeds limit. - }; - -protected: - /** List of elements. - * All table elements are in this list. The buckets reference their starting element in the list, or nothing if - * no elements are in that bucket. - */ - using List = ts::IntrusiveDList<H>; - - /// A bucket for the hash map. - struct Bucket { - /// Support for ts::IntrusiveDList<Bucket::Linkage>, definitions and link storage. - struct Linkage { - static Bucket *&next_ptr(Bucket *b); ///< Access next pointer. - static Bucket *&prev_ptr(Bucket *b); ///< Access prev pointer. - Bucket *_prev{nullptr}; ///< Prev pointer. - Bucket *_next{nullptr}; ///< Next pointer. - } _link; - - value_type *_v{nullptr}; ///< First element in the bucket. - size_t _count{0}; ///< Number of elements in the bucket. - - /** Marker for the chain having different keys. - - This is used to determine if expanding the hash map would be useful - buckets that are not mixed - will not be changed by an expansion. - */ - bool _mixed_p{false}; - - /// Compute the limit value for iteration in this bucket. - /// This is the value of the next bucket, or @c nullptr if no next bucket. - value_type *limit() const; - - /// Verify @a v is in this bucket. - bool contains(value_type *v) const; - - void clear(); ///< Reset to initial state. - }; - -public: - /// The default starting number of buckets. - static size_t constexpr DEFAULT_BUCKET_COUNT = 7; ///< POOMA. - /// The default expansion policy limit. - static size_t constexpr DEFAULT_EXPANSION_LIMIT = 4; ///< Value from previous version. - /// Expansion policy if not specified in constructor. - static ExpansionPolicy constexpr DEFAULT_EXPANSION_POLICY = AVERAGE; - - using iterator = typename List::iterator; - using const_iterator = typename List::const_iterator; - - /// A range of elements in the map. - /// It is a half open range, [first, last) in the usual STL style. - /// @internal I tried @c std::pair as a base for this but was unable to get STL container operations to work. - struct range : public std::pair<iterator, iterator> { - using super_type = std::pair<iterator, iterator>; ///< Super type. - using super_type::super_type; ///< Use super type constructors. - - // These methods enable treating the range as a view in to the hash map. - - /// Return @a first - iterator const &begin() const; - /// Return @a last - iterator const &end() const; - }; - - /// A range of constant elements in the map. - struct const_range : public std::pair<const_iterator, const_iterator> { - using super_type = std::pair<const_iterator, const_iterator>; ///< Super type. - - /// Allow implicit conversion of range to const_range. - const_range(range const &r); - - using super_type::super_type; ///< Use super type constructors. - - // These methods enable treating the range as a view in to the hash map. - - /// Return @a first - const_iterator const &begin() const; - /// Return @a last - const_iterator const &end() const; - }; - - /// Construct, starting with @n buckets. - /// This doubles as the default constructor. - IntrusiveHashMap(size_t n = DEFAULT_BUCKET_COUNT); - - /** Remove all values from the table. - - The values are not cleaned up. The values are not touched in this method, therefore it is safe - to destroy them first and then @c clear this table. - */ - self_type &clear(); - - iterator begin(); ///< First element. - const_iterator begin() const; ///< First element. - iterator end(); ///< Past last element. - const_iterator end() const; ///< Past last element. - - /** Insert a value in to the table. - The @a value must @b NOT already be in a table of this type. - @note The value itself is put in the table, @b not a copy. - */ - void insert(value_type *v); - - /** Find an element with a key equal to @a key. - - @return A element with a matching key, or the end iterator if not found. - */ - const_iterator find(key_type key) const; - iterator find(key_type key); - - /** Get an iterator for an existing value @a v. - - @return An iterator that references @a v, or the end iterator if @a v is not in the table. - */ - const_iterator find(value_type const *v) const; - iterator find(value_type *v); - - /** Find the range of objects with keys equal to @a key. - - @return A iterator pair of [first, last) items with equal keys. - */ - const_range equal_range(key_type key) const; - range equal_range(key_type key); - - /** Get an @c iterator for the value @a v. - - This is a bit obscure but needed in certain cases. Because the interface assumes iterators are always valid - this avoid containment checks, which is useful if @a v is already known to be in the container. - */ - iterator iterator_for(value_type *v); - const_iterator iterator_for(const value_type *v) const; - - /** Remove the value at @a loc from the table. - - @note This does @b not clean up the removed elements. Use carefully to avoid leaks. - - @return An iterator the next value past @a loc. [STL compatibility] - */ - iterator erase(iterator const &loc); - - /// Remove all elements in the @c range @a r. - iterator erase(range const &r); - - /// Remove all elements in the range (start, limit] - iterator erase(iterator const &start, iterator const &limit); - - /// Remove a @a value from the container. - /// @a value is checked for being a member of the container. - /// @return @c true if @a value was in the container and removed, @c false if it was not in the container. - bool erase(value_type *value); - - /** Apply @a F(value_type&) to every element in the hash map. - * - * This is similar to a range for loop except the iteration is done in a way where destruction or alternation of - * the element does @b not affect the iterator. Primarily this is useful for @c delete to clean up the elements - * but it can have other uses. - * - * @tparam F A functional object of the form <tt>void F(value_type&)</tt> - * @param f The function to apply. - * @return - */ - template <typename F> self_type &apply(F &&f); - - /** Expand the hash if needed. - - Useful primarily when the expansion policy is set to @c MANUAL. - */ - void expand(); - - /// Number of elements in the map. - size_t count() const; - - /// Number of buckets in the array. - size_t bucket_count() const; - - /// Set the expansion policy to @a policy. - self_type &set_expansion_policy(ExpansionPolicy policy); - - /// Get the current expansion policy - ExpansionPolicy get_expansion_policy() const; - - /// Set the limit value for the expansion policy. - self_type &set_expansion_limit(size_t n); - - /// Set the limit value for the expansion policy. - size_t get_expansion_limit() const; - -protected: - /// The type of storage for the buckets. - using Table = std::vector<Bucket>; - - List _list; ///< Elements in the table. - Table _table; ///< Array of buckets. - - /// List of non-empty buckets. - ts::IntrusiveDList<typename Bucket::Linkage> _active_buckets; - - Bucket *bucket_for(key_type key); - - ExpansionPolicy _expansion_policy{DEFAULT_EXPANSION_POLICY}; ///< When to expand the table. - size_t _expansion_limit{DEFAULT_EXPANSION_LIMIT}; ///< Limit value for expansion. - - // noncopyable - IntrusiveHashMap(const IntrusiveHashMap &) = delete; - IntrusiveHashMap &operator=(const IntrusiveHashMap &) = delete; - - // Hash table size prime list. - static constexpr std::array<size_t, 29> PRIME = { - {1, 3, 7, 13, 31, 61, 127, 251, 509, 1021, - 2039, 4093, 8191, 16381, 32749, 65521, 131071, 262139, 524287, 1048573, - 2097143, 4194301, 8388593, 16777213, 33554393, 67108859, 134217689, 268435399, 536870909} - }; -}; - -template <typename H> -auto -IntrusiveHashMap<H>::Bucket::Linkage::next_ptr(Bucket *b) -> Bucket *& -{ - return b->_link._next; -} - -template <typename H> -auto -IntrusiveHashMap<H>::Bucket::Linkage::prev_ptr(Bucket *b) -> Bucket *& -{ - return b->_link._prev; -} - -// This is designed so that if the bucket is empty, then @c nullptr is returned, which will immediately terminate -// a search loop on an empty bucket because that will start with a nullptr candidate, matching the limit. -template <typename H> -auto -IntrusiveHashMap<H>::Bucket::limit() const -> value_type * -{ - Bucket *n{_link._next}; - return n ? n->_v : nullptr; -}; - -template <typename H> -void -IntrusiveHashMap<H>::Bucket::clear() -{ - _v = nullptr; - _count = 0; - _mixed_p = false; - // These can be left set during an expansion, when the bucket did have elements before but not - // after. Therefore make sure they are cleared. - _link._next = _link._prev = nullptr; -} - -template <typename H> -bool -IntrusiveHashMap<H>::Bucket::contains(value_type *v) const -{ - value_type *x = _v; - value_type *limit = this->limit(); - while (x != limit && x != v) { - x = H::next_ptr(x); - } - return x == v; -} - -// --------------------- -template <typename H> -auto -IntrusiveHashMap<H>::range::begin() const -> iterator const & -{ - return super_type::first; -} -template <typename H> -auto -IntrusiveHashMap<H>::range::end() const -> iterator const & -{ - return super_type::second; -} - -template <typename H> -auto -IntrusiveHashMap<H>::const_range::begin() const -> const_iterator const & -{ - return super_type::first; -} - -template <typename H> -auto -IntrusiveHashMap<H>::const_range::end() const -> const_iterator const & -{ - return super_type::second; -} - -// --------------------- - -template <typename H> IntrusiveHashMap<H>::IntrusiveHashMap(size_t n) -{ - if (n) { - _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), n)); - } -} - -template <typename H> -auto -IntrusiveHashMap<H>::bucket_for(key_type key) -> Bucket * -{ - return &_table[H::hash_of(key) % _table.size()]; -} - -template <typename H> -auto -IntrusiveHashMap<H>::begin() -> iterator -{ - return _list.begin(); -} - -template <typename H> -auto -IntrusiveHashMap<H>::begin() const -> const_iterator -{ - return _list.begin(); -} - -template <typename H> -auto -IntrusiveHashMap<H>::end() -> iterator -{ - return _list.end(); -} - -template <typename H> -auto -IntrusiveHashMap<H>::end() const -> const_iterator -{ - return _list.end(); -} - -template <typename H> -auto -IntrusiveHashMap<H>::clear() -> self_type & -{ - for (auto &b : _table) { - b.clear(); - } - // Clear container data. - _list.clear(); - _active_buckets.clear(); - return *this; -} - -template <typename H> -auto -IntrusiveHashMap<H>::find(key_type key) -> iterator -{ - Bucket *b = this->bucket_for(key); - value_type *v = b->_v; - value_type *limit = b->limit(); - while (v != limit && !H::equal(key, H::key_of(v))) { - v = H::next_ptr(v); - } - return v == limit ? _list.end() : _list.iterator_for(v); -} - -template <typename H> -auto -IntrusiveHashMap<H>::find(key_type key) const -> const_iterator -{ - return const_cast<self_type *>(this)->find(key); -} - -template <typename H> -auto -IntrusiveHashMap<H>::equal_range(key_type key) -> range -{ - iterator first{this->find(key)}; - iterator last{first}; - iterator limit{this->end()}; - - while (last != limit && H::equal(key, H::key_of(&*last))) { - ++last; - } - - return range{first, last}; -} - -template <typename H> -auto -IntrusiveHashMap<H>::equal_range(key_type key) const -> const_range -{ - return const_cast<self_type *>(this)->equal_range(key); -} - -template <typename H> -auto -IntrusiveHashMap<H>::iterator_for(const value_type *v) const -> const_iterator -{ - return _list.iterator_for(v); -} - -template <typename H> -auto -IntrusiveHashMap<H>::iterator_for(value_type *v) -> iterator -{ - return _list.iterator_for(v); -} - -template <typename H> -auto -IntrusiveHashMap<H>::find(value_type *v) -> iterator -{ - Bucket *b = this->bucket_for(H::key_of(v)); - return b->contains(v) ? _list.iterator_for(v) : this->end(); -} - -template <typename H> -auto -IntrusiveHashMap<H>::find(value_type const *v) const -> const_iterator -{ - return const_cast<self_type *>(this)->find(const_cast<value_type *>(v)); -} - -template <typename H> -void -IntrusiveHashMap<H>::insert(value_type *v) -{ - auto key = H::key_of(v); - Bucket *bucket = this->bucket_for(key); - value_type *spot = bucket->_v; - bool mixed_p = false; // Found a different key in the bucket. - - if (nullptr == spot) { // currently empty bucket, set it and add to active list. - _list.append(v); - bucket->_v = v; - _active_buckets.append(bucket); - } else { - value_type *limit = bucket->limit(); - - // First search the bucket to see if the key is already in it. - while (spot != limit && !H::equal(key, H::key_of(spot))) { - spot = H::next_ptr(spot); - } - if (spot != bucket->_v) { - mixed_p = true; // found some other key, it's going to be mixed. - } - _list.insert_before(spot, v); - if (spot == bucket->_v) { // added before the bucket start, update the start. - bucket->_v = v; - } - bucket->_mixed_p = mixed_p; - } - ++bucket->_count; - - // auto expand if appropriate. - if ((AVERAGE == _expansion_policy && (_list.count() / _table.size()) > _expansion_limit) || - (MAXIMUM == _expansion_policy && bucket->_count > _expansion_limit && bucket->_mixed_p)) { - this->expand(); - } -} - -template <typename H> -auto -IntrusiveHashMap<H>::erase(iterator const &loc) -> iterator -{ - value_type *v = loc; - iterator zret = ++(this->iterator_for(v)); // get around no const_iterator -> iterator. - Bucket *b = this->bucket_for(H::key_of(v)); - value_type *nv = H::next_ptr(v); - value_type *limit = b->limit(); - if (b->_v == v) { // removed first element in bucket, update bucket - if (limit == nv) { // that was also the only element, deactivate bucket - _active_buckets.erase(b); - b->clear(); - } else { - b->_v = nv; - --b->_count; - } - } - _list.erase(loc); - return zret; -} - -template <typename H> -bool -IntrusiveHashMap<H>::erase(value_type *v) -{ - auto loc = this->iterator_for(v); - if (loc != this->end()) { - this->erase(loc); - return true; - } - return false; -} - -template <typename H> -auto -IntrusiveHashMap<H>::erase(iterator const &start, iterator const &limit) -> iterator -{ - auto spot{start}; - Bucket *bucket{this->bucket_for(spot)}; - while (spot != limit) { - auto target = bucket; - bucket = bucket->_link._next; // bump now to avoid forward iteration problems in case of bucket removal. - value_type *v_limit = bucket ? bucket->_v : nullptr; - while (spot != v_limit && spot != limit) { - --(target->_count); - ++spot; - } - if (target->_count == 0) { - _active_buckets.erase(target); - } - } - _list.erase(start, limit); - return _list.iterator_for(limit); // convert from const_iterator back to iterator -}; - -template <typename H> -auto -IntrusiveHashMap<H>::erase(range const &r) -> iterator -{ - return this->erase(r.first, r.second); -} - -namespace detail -{ -// Make @c apply more convenient by allowing the function to take a reference type or pointer type to the container -// elements. The pointer type is the base, plus a shim to convert from a reference type functor to a pointer pointer -// type. The complex return type definition forces only one, but not both, to be valid for a particular functor. This -// also must be done via free functions and not method overloads because the compiler forces a match up of method -// definitions and declarations before any template instantiation. - -template <typename H, typename F> -auto -IntrusiveHashMapApply(IntrusiveHashMap<H> &map, F &&f) - -> decltype(f(*static_cast<typename IntrusiveHashMap<H>::value_type *>(nullptr)), map) -{ - return map.apply([&f](typename IntrusiveHashMap<H>::value_type *v) { return f(*v); }); -} - -template <typename H, typename F> -auto -IntrusiveHashMapApply(IntrusiveHashMap<H> &map, F &&f) - -> decltype(f(static_cast<typename IntrusiveHashMap<H>::value_type *>(nullptr)), map) -{ - auto spot{map.begin()}; - auto limit{map.end()}; - while (spot != limit) { - f(spot++); // post increment means @a spot is updated before @a f is applied. - } - return map; -} -} // namespace detail - -template <typename H> -template <typename F> -auto -IntrusiveHashMap<H>::apply(F &&f) -> self_type & -{ - return detail::IntrusiveHashMapApply(*this, f); -}; - -template <typename H> -void -IntrusiveHashMap<H>::expand() -{ - ExpansionPolicy org_expansion_policy = _expansion_policy; // save for restore. - value_type *old = _list.head(); // save for repopulating. - auto old_size = _table.size(); - - // Reset to empty state. - this->clear(); - _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), old_size + 1)); - - _expansion_policy = MANUAL; // disable any auto expand while we're expanding. - while (old) { - value_type *v = old; - old = H::next_ptr(old); - this->insert(v); - } - // stashed array gets cleaned up when @a tmp goes out of scope. - _expansion_policy = org_expansion_policy; // reset to original value. -} - -template <typename H> -size_t -IntrusiveHashMap<H>::count() const -{ - return _list.count(); -} - -template <typename H> -size_t -IntrusiveHashMap<H>::bucket_count() const -{ - return _table.size(); -} - -template <typename H> -auto -IntrusiveHashMap<H>::set_expansion_policy(ExpansionPolicy policy) -> self_type & -{ - _expansion_policy = policy; - return *this; -} - -template <typename H> -auto -IntrusiveHashMap<H>::get_expansion_policy() const -> ExpansionPolicy -{ - return _expansion_policy; -} - -template <typename H> -auto -IntrusiveHashMap<H>::set_expansion_limit(size_t n) -> self_type & -{ - _expansion_limit = n; - return *this; -} - -template <typename H> -size_t -IntrusiveHashMap<H>::get_expansion_limit() const -{ - return _expansion_limit; -} -/* ---------------------------------------------------------------------------------------------- */ diff --git a/src/tscore/CMakeLists.txt b/src/tscore/CMakeLists.txt index 24772dcda4..d1e25c7dcd 100644 --- a/src/tscore/CMakeLists.txt +++ b/src/tscore/CMakeLists.txt @@ -144,7 +144,6 @@ add_executable(test_tscore unit_tests/test_HKDF.cc unit_tests/test_Histogram.cc unit_tests/test_History.cc - unit_tests/test_IntrusiveHashMap.cc unit_tests/test_IntrusivePtr.cc unit_tests/test_List.cc unit_tests/test_MMH.cc diff --git a/src/tscore/Makefile.am b/src/tscore/Makefile.am index 1a49ca5d07..707be7af7e 100644 --- a/src/tscore/Makefile.am +++ b/src/tscore/Makefile.am @@ -162,7 +162,6 @@ test_tscore_SOURCES = \ unit_tests/test_History.cc \ unit_tests/test_ink_inet.cc \ unit_tests/test_ink_memory.cc \ - unit_tests/test_IntrusiveHashMap.cc \ unit_tests/test_IntrusivePtr.cc \ unit_tests/test_layout.cc \ unit_tests/test_List.cc \ diff --git a/src/tscore/unit_tests/test_IntrusiveHashMap.cc b/src/tscore/unit_tests/test_IntrusiveHashMap.cc deleted file mode 100644 index 6cda72128d..0000000000 --- a/src/tscore/unit_tests/test_IntrusiveHashMap.cc +++ /dev/null @@ -1,240 +0,0 @@ -/** @file - - IntrusiveHashMap unit tests. - - @section license License - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include <iostream> -#include <string_view> -#include <string> -#include <bitset> -#include <random> -#include <tscore/IntrusiveHashMap.h> -#include <tscpp/util/ts_bw_format.h> -#include "catch.hpp" - -// ------------- -// --- TESTS --- -// ------------- - -using namespace std::literals; - -namespace -{ -struct Thing { - std::string _payload; - int _n{0}; - - Thing(std::string_view text) : _payload(text) {} - Thing(std::string_view text, int x) : _payload(text), _n(x) {} - - Thing *_next{nullptr}; - Thing *_prev{nullptr}; -}; - -struct ThingMapDescriptor { - static Thing *& - next_ptr(Thing *thing) - { - return thing->_next; - } - static Thing *& - prev_ptr(Thing *thing) - { - return thing->_prev; - } - static std::string_view - key_of(Thing *thing) - { - return thing->_payload; - } - static constexpr std::hash<std::string_view> hasher{}; - static auto - hash_of(std::string_view s) -> decltype(hasher(s)) - { - return hasher(s); - } - static bool - equal(std::string_view const &lhs, std::string_view const &rhs) - { - return lhs == rhs; - } -}; - -using Map = IntrusiveHashMap<ThingMapDescriptor>; - -} // namespace - -TEST_CASE("IntrusiveHashMap", "[libts][IntrusiveHashMap]") -{ - Map map; - map.insert(new Thing("bob")); - REQUIRE(map.count() == 1); - map.insert(new Thing("dave")); - map.insert(new Thing("persia")); - REQUIRE(map.count() == 3); - // Need to be bit careful cleaning up, since the link pointers are in the objects and deleting - // the object makes it unsafe to use an iterator referencing that object. For a full cleanup, - // the best option is to first delete everything, then clean up the map. - map.apply([](Thing *thing) { delete thing; }); - map.clear(); - REQUIRE(map.count() == 0); - - size_t nb = map.bucket_count(); - std::bitset<64> marks; - for (unsigned int i = 1; i <= 63; ++i) { - std::string name; - swoc::bwprint(name, "{} squared is {}", i, i * i); - Thing *thing = new Thing(name); - thing->_n = i; - map.insert(thing); - REQUIRE(map.count() == i); - REQUIRE(map.find(name) != map.end()); - } - REQUIRE(map.count() == 63); - REQUIRE(map.bucket_count() > nb); - for (auto &thing : map) { - REQUIRE(0 == marks[thing._n]); - marks[thing._n] = true; - } - marks[0] = true; - REQUIRE(marks.all()); - map.insert(new Thing("dup"sv, 79)); - map.insert(new Thing("dup"sv, 80)); - map.insert(new Thing("dup"sv, 81)); - - auto r = map.equal_range("dup"sv); - REQUIRE(r.first != r.second); - REQUIRE(r.first->_payload == "dup"sv); - REQUIRE(r.first->_n == 81); - - Map::iterator idx; - - // Erase all the non-"dup" and see if the range is still correct. - map.apply([&map](Thing *thing) { - if (thing->_payload != "dup"sv) { - map.erase(map.iterator_for(thing)); - delete thing; - } - }); - r = map.equal_range("dup"sv); - REQUIRE(r.first != r.second); - idx = r.first; - REQUIRE(idx->_payload == "dup"sv); - REQUIRE(idx->_n == 81); - REQUIRE((++idx)->_payload == "dup"sv); - REQUIRE(idx->_n != r.first->_n); - REQUIRE(idx->_n == 79); - REQUIRE((++idx)->_payload == "dup"sv); - REQUIRE(idx->_n != r.first->_n); - REQUIRE(idx->_n == 80); - REQUIRE(++idx == map.end()); - // Verify only the "dup" items are left. - for (auto &&elt : map) { - REQUIRE(elt._payload == "dup"sv); - } - // clean up the last bits. - map.apply([](Thing *thing) { delete thing; }); -}; - -// Some more involved tests. -TEST_CASE("IntrusiveHashMapManyStrings", "[IntrusiveHashMap]") -{ - std::vector<std::string> strings; - - std::uniform_int_distribution<short> char_gen{'a', 'z'}; - std::uniform_int_distribution<short> length_gen{20, 40}; - std::minstd_rand randu; - constexpr int N = 1009; - - Map ihm; - - strings.reserve(N); - for (int i = 0; i < N; ++i) { - auto len = length_gen(randu); - std::string s; - s.reserve(len); - for (decltype(len) j = 0; j < len; ++j) { - s += char_gen(randu); - } - strings.push_back(s); - } - - // Fill the IntrusiveHashMap - for (int i = 0; i < N; ++i) { - ihm.insert(new Thing{strings[i], i}); - } - - REQUIRE(ihm.count() == N); - - // Do some lookups - just require the whole loop, don't artificially inflate the test count. - bool miss_p = false; - for (int j = 0, idx = 17; j < N; ++j, idx = (idx + 17) % N) { - if (auto spot = ihm.find(strings[idx]); spot == ihm.end() || spot->_n != idx) { - miss_p = true; - } - } - REQUIRE(miss_p == false); - - // Let's try some duplicates when there's a lot of data in the map. - miss_p = false; - for (int idx = 23; idx < N; idx += 23) { - ihm.insert(new Thing(strings[idx], 2000 + idx)); - } - for (int idx = 23; idx < N; idx += 23) { - auto spot = ihm.find(strings[idx]); - if (spot == ihm.end() || spot->_n != 2000 + idx || ++spot == ihm.end() || spot->_n != idx) { - miss_p = true; - } - } - REQUIRE(miss_p == false); - - // Do a different stepping, special cases the intersection with the previous stepping. - miss_p = false; - for (int idx = 31; idx < N; idx += 31) { - ihm.insert(new Thing(strings[idx], 3000 + idx)); - } - for (int idx = 31; idx < N; idx += 31) { - auto spot = ihm.find(strings[idx]); - if (spot == ihm.end() || spot->_n != 3000 + idx || ++spot == ihm.end() || (idx != (23 * 31) && spot->_n != idx) || - (idx == (23 * 31) && spot->_n != 2000 + idx)) { - miss_p = true; - } - } - REQUIRE(miss_p == false); - - // Check for misses. - miss_p = false; - for (int i = 0; i < 99; ++i) { - char s[41]; - auto len = length_gen(randu); - for (decltype(len) j = 0; j < len; ++j) { - s[j] = char_gen(randu); - } - std::string_view name(s, len); - auto spot = ihm.find(name); - if (spot != ihm.end() && name != spot->_payload) { - miss_p = true; - } - } - REQUIRE(miss_p == false); - - ihm.apply([](Thing *thing) { delete thing; }); -};