http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9ccd5a31/storage/FastHashTable.hpp ---------------------------------------------------------------------- diff --git a/storage/FastHashTable.hpp b/storage/FastHashTable.hpp deleted file mode 100644 index 4a95cd9..0000000 --- a/storage/FastHashTable.hpp +++ /dev/null @@ -1,2515 +0,0 @@ -/** - * Copyright 2011-2015 Quickstep Technologies LLC. - * Copyright 2015-2016 Pivotal Software, Inc. - * Copyright 2016, Quickstep Research Group, Computer Sciences Department, - * University of WisconsinâMadison. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - **/ - -#ifndef QUICKSTEP_STORAGE_FAST_HASH_TABLE_HPP_ -#define QUICKSTEP_STORAGE_FAST_HASH_TABLE_HPP_ - -#include <atomic> -#include <cstddef> -#include <cstdlib> -#include <type_traits> -#include <vector> - -#include "catalog/CatalogTypedefs.hpp" -#include "storage/HashTableBase.hpp" -#include "storage/StorageBlob.hpp" -#include "storage/StorageBlockInfo.hpp" -#include "storage/StorageConstants.hpp" -#include "storage/StorageManager.hpp" -#include "storage/TupleReference.hpp" -#include "storage/ValueAccessor.hpp" -#include "storage/ValueAccessorUtil.hpp" -#include "threading/SpinMutex.hpp" -#include "threading/SpinSharedMutex.hpp" -#include "types/Type.hpp" -#include "types/TypedValue.hpp" -#include "utility/BloomFilter.hpp" -#include "utility/HashPair.hpp" -#include "utility/Macros.hpp" - -namespace quickstep { - -/** \addtogroup Storage - * @{ - */ - -/** - * @brief Base class for the hash table implementation in which the payload can - * be just a bunch of bytes. This implementation is suitable for - * aggregation hash table with multiple aggregation handles (e.g. SUM, - * MAX, MIN etc). - * - * At present there is one implementation for this base class. - * 1. SeparateChainingHashTable - Keys/values are stored in a separate - * region of memory from the base hash table slot array. Every bucket - * has a "next" pointer so that entries that collide (i.e. map to the - * same base slot) form chains of pointers with each other. Although - * this implementation has some extra indirection compared to - * LinearOpenAddressingHashTable, it does not have the same - * vulnerabilities to key skew, and it additionally supports a very - * efficient bucket-preallocation mechanism that minimizes cache - * coherency overhead when multiple threads are building a HashTable. - * - * @note If you need to create a HashTable and not just use it as a client, see - * HashTableFactory, which simplifies the process of creating a - * HashTable. - * - * @param resizable Whether this hash table is resizable (using memory from a - * StorageManager) or not (using a private, fixed memory allocation). - * @param serializable If true, this hash table can safely be saved to and - * loaded from disk. If false, some out of band memory may be used (e.g. - * to store variable length keys). - * @param force_key_copy If true, inserted keys are always copied into this - * HashTable's memory. If false, pointers to external values may be - * stored instead. force_key_copy should be true if the hash table will - * outlive the external key values which are inserted into it. Note that - * if serializable is true and force_key_copy is false, then relative - * offsets will be used instead of absolute pointers to keys, meaning - * that the pointed-to keys must be serialized and deserialized in - * exactly the same relative byte order (e.g. as part of the same - * StorageBlock), and keys must not change position relative to this - * HashTable (beware TupleStorageSubBlocks that may self-reorganize when - * modified). If serializable and resizable are both true, then - * force_key_copy must also be true. - * @param allow_duplicate_keys If true, multiple values can be mapped to the - * same key. If false, one and only one value may be mapped. - **/ -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -class FastHashTable : public HashTableBase<resizable, - serializable, - force_key_copy, - allow_duplicate_keys> { - static_assert(!(serializable && resizable && !force_key_copy), - "A HashTable must have force_key_copy=true when serializable " - "and resizable are both true."); - - public: - // Shadow template parameters. This is useful for shared test harnesses. - static constexpr bool template_resizable = resizable; - static constexpr bool template_serializable = serializable; - static constexpr bool template_force_key_copy = force_key_copy; - static constexpr bool template_allow_duplicate_keys = allow_duplicate_keys; - - // Some HashTable implementations (notably LinearOpenAddressingHashTable) - // use a special hash code to represent an empty bucket, and another special - // code to indicate that a bucket is currently being inserted into. For those - // HashTables, this is a surrogate hash value for empty buckets. Keys which - // actually hash to this value should have their hashes mutated (e.g. by - // adding 1). We use zero, since we will often be using memory which is - // already zeroed-out and this saves us the trouble of a memset. This has - // some downside, as the hash function we use is the identity hash for - // integers, and the integer 0 is common in many data sets and must be - // adjusted (and will then spuriously collide with 1). Nevertheless, this - // expense is outweighed by no longer having to memset large regions of - // memory when initializing a HashTable. - static constexpr unsigned char kEmptyHashByte = 0x0; - static constexpr std::size_t kEmptyHash = 0x0; - - // A surrogate hash value for a bucket which is currently being inserted - // into. As with kEmptyHash, keys which actually hash to this value should - // have their hashes adjusted. - static constexpr std::size_t kPendingHash = ~kEmptyHash; - - /** - * @brief Virtual destructor. - **/ - virtual ~FastHashTable() { - if (resizable) { - if (blob_.valid()) { - if (serializable) { - DEV_WARNING( - "Destroying a resizable serializable HashTable's underlying " - "StorageBlob."); - } - const block_id blob_id = blob_->getID(); - blob_.release(); - storage_manager_->deleteBlockOrBlobFile(blob_id); - } - } - } - - /** - * @brief Get the ID of the StorageBlob used to store a resizable HashTable. - * - * @warning This method must not be used for a non-resizable HashTable. - * - * @return The ID of the StorageBlob used to store this HashTable. - **/ - inline block_id getBlobId() const { - DEBUG_ASSERT(resizable); - return blob_->getID(); - } - - /** - * @brief Erase all entries in this hash table. - * - * @warning This method is not guaranteed to be threadsafe. - **/ - virtual void clear() = 0; - - /** - * @brief Add a new entry into the hash table. - * - * @warning The key must not be null. - * @warning This method is threadsafe with regard to other calls to put(), - * putCompositeKey(), putValueAccessor(), and - * putValueAccessorCompositeKey(), but should not be used - * simultaneously with upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey(). - * @note This version is for single scalar keys, see also putCompositeKey(). - * @note If the hash table is (close to) full and resizable is true, this - * routine might result in rebuilding the entire hash table. - * - * @param key The key. - * @param value The value payload. - * @return HashTablePutResult::kOK if an entry was successfully inserted, - * HashTablePutResult::kDuplicateKey if allow_duplicate_keys is false - * and key was a duplicate, or HashTablePutResult::kOutOfSpace if - * resizable is false and storage space for the hash table has been - * exhausted. - **/ - HashTablePutResult put(const TypedValue &key, const std::uint8_t &value); - - /** - * @brief Add a new entry into the hash table (composite key version). - * - * @warning No component of the key may be null. - * @warning This method is threadsafe with regard to other calls to put(), - * putCompositeKey(), putValueAccessor(), and - * putValueAccessorCompositeKey(), but should not be used - * simultaneously with upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey(). - * @note This version is for composite keys, see also put(). - * @note If the hash table is (close to) full and resizable is true, this - * routine might result in rebuilding the entire hash table. - * - * @param key The components of the key. - * @param value The value payload. - * @return HashTablePutResult::kOK if an entry was successfully inserted, - * HashTablePutResult::kDuplicateKey if allow_duplicate_keys is false - * and key was a duplicate, or HashTablePutResult::kOutOfSpace if - * resizable is false and storage space for the hash table has been - * exhausted. - **/ - - HashTablePutResult putCompositeKey(const std::vector<TypedValue> &key, - const std::uint8_t *value_ptr); - - /** - * @brief Add (multiple) new entries into the hash table from a - * ValueAccessor. - * - * @warning This method is threadsafe with regard to other calls to put(), - * putCompositeKey(), putValueAccessor(), and - * putValueAccessorCompositeKey(), but should not be used - * simultaneously with upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey(). - * @note This version is for scalar keys, see also - * putValueAccessorCompositeKey(). - * @note If the hash table fills up while this call is in progress and - * resizable is true, this might result in rebuilding the entire hash - * table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before inserting it (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor, which should provide a call - * operator that takes const ValueAccessor& as an argument (or better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as an argument) and returns either - * a ValueT or a reference to a ValueT. The functor should generate - * the appropriate mapped value for the current tuple the accessor is - * iterating on. - * @return HashTablePutResult::kOK if all keys and generated values from - * accessor were successfully inserted. - * HashTablePutResult::kOutOfSpace is returned if this hash-table is - * non-resizable and ran out of space (note that some entries may - * still have been inserted, and accessor's iteration will be left on - * the first tuple which could not be inserted). - * HashTablePutResult::kDuplicateKey is returned if - * allow_duplicate_keys is false and a duplicate key is encountered - * (as with HashTablePutResult::kOutOfSpace, some entries may have - * been inserted, and accessor will be left on the tuple with a - * duplicate key). - **/ - template <typename FunctorT> - HashTablePutResult putValueAccessor(ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor); - - /** - * @brief Add (multiple) new entries into the hash table from a - * ValueAccessor (composite key version). - * - * @warning This method is threadsafe with regard to other calls to put(), - * putCompositeKey(), putValueAccessor(), and - * putValueAccessorCompositeKey(), but should not be used - * simultaneously with upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey(). - * @note This version is for composite keys, see also putValueAccessor(). - * @note If the hash table fills up while this call is in progress and - * resizable is true, this might result in rebuilding the entire hash - * table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_ids The attribute IDs of each key component to be read - * from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * has a null component before inserting it (null keys are skipped). - * This must be set to true if some of the keys that will be read from - * accessor may be null. - * @param functor A pointer to a functor, which should provide a call - * operator that takes const ValueAccessor& as an argument (or better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as an argument) and returns either - * a ValueT or a reference to a ValueT. The functor should generate - * the appropriate mapped value for the current tuple the accessor is - * iterating on. - * @return HashTablePutResult::kOK if all keys and generated values from - * accessor were successfully inserted. - * HashTablePutResult::kOutOfSpace is returned if this hash-table is - * non-resizable and ran out of space (note that some entries may - * still have been inserted, and accessor's iteration will be left on - * the first tuple which could not be inserted). - * HashTablePutResult::kDuplicateKey is returned if - * allow_duplicate_keys is false and a duplicate key is encountered - * (as with HashTablePutResult::kOutOfSpace, some entries may have - * been inserted, and accessor will be left on the tuple with a - * duplicate key). - **/ - template <typename FunctorT> - HashTablePutResult putValueAccessorCompositeKey( - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor); - - /** - * @brief Apply a functor to the value mapped to a key, first inserting a new - * value if one is not already present. - * - * @warning The key must not be null. - * @warning This method is only usable if allow_duplicate_keys is false. - * @warning This method is threadsafe with regard to other calls to upsert(), - * upsertCompositeKey(), upsertValueAccessor(), and - * upsertValueAccessorCompositeKey(), but should not be used - * simultaneously with put(), putCompositeKey(), putValueAccessor(), - * or putValueAccessorCompositeKey(). - * @warning The ValueT* pointer passed to functor's call operator is only - * guaranteed to be valid for the duration of the call. The functor - * should not store a copy of the pointer and assume that it remains - * valid. - * @warning Although this method itself is threadsafe, the ValueT object - * accessed by functor is not guaranteed to be (although it is - * guaranteed that its initial insertion will be atomic). If it is - * possible for multiple threads to call upsert() with the same key - * at the same time, then their access to ValueT should be made - * threadsafe (e.g. with the use of atomic types, mutexes, or some - * other external synchronization). - * @note This version is for single scalar keys, see also - * upsertCompositeKey(). - * @note If the hash table is (close to) full and resizable is true, this - * routine might result in rebuilding the entire hash table. - * - * @param key The key. - * @param initial_value If there was not already a preexisting entry in this - * HashTable for the specified key, then the value will be initialized - * with a copy of initial_value. This parameter is ignored if a value - * is already present for key. - * @param functor A pointer to a functor, which should provide a call - * operator which takes ValueT* as an argument. The call operator will - * be invoked once on the value corresponding to key (which may be - * newly inserted and default-constructed). - * @return True on success, false if upsert failed because there was not - * enough space to insert a new entry in this HashTable. - **/ - template <typename FunctorT> - bool upsert(const TypedValue &key, - const std::uint8_t *initial_value_ptr, - FunctorT *functor); - - /** - * @brief Apply a functor to the value mapped to a key, first inserting a new - * value if one is not already present. - * - * @warning The key must not be null. - * @warning This method is only usable if allow_duplicate_keys is false. - * @warning This method is threadsafe with regard to other calls to upsert(), - * upsertCompositeKey(), upsertValueAccessor(), and - * upsertValueAccessorCompositeKey(), but should not be used - * simultaneously with put(), putCompositeKey(), putValueAccessor(), - * or putValueAccessorCompositeKey(). - * @warning The ValueT* pointer passed to functor's call operator is only - * guaranteed to be valid for the duration of the call. The functor - * should not store a copy of the pointer and assume that it remains - * valid. - * @warning Although this method itself is threadsafe, the ValueT object - * accessed by functor is not guaranteed to be (although it is - * guaranteed that its initial insertion will be atomic). If it is - * possible for multiple threads to call upsertCompositeKey() with - * the same key at the same time, then their access to ValueT should - * be made threadsafe (e.g. with the use of atomic types, mutexes, - * or some other external synchronization). - * @note This version is for composite keys, see also upsert(). - * @note If the hash table is (close to) full and resizable is true, this - * routine might result in rebuilding the entire hash table. - * - * @param key The key. - * @param initial_value If there was not already a preexisting entry in this - * HashTable for the specified key, then the value will be initialized - * with a copy of initial_value. This parameter is ignored if a value - * is already present for key. - * @param functor A pointer to a functor, which should provide a call - * operator which takes ValueT* as an argument. The call operator will - * be invoked once on the value corresponding to key (which may be - * newly inserted and default-constructed). - * @return True on success, false if upsert failed because there was not - * enough space to insert a new entry in this HashTable. - **/ - template <typename FunctorT> - bool upsertCompositeKeyFast(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - FunctorT *functor); - - template <typename FunctorT> - bool upsertCompositeKeyFast(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - FunctorT *functor, - int index); - - bool upsertCompositeKeyFast(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - const std::uint8_t *source_state); - - /** - * @brief Apply a functor to (multiple) entries in this hash table, with keys - * drawn from a ValueAccessor. New values are first inserted if not - * already present. - * - * @warning This method is only usable if allow_duplicate_keys is false. - * @warning This method is threadsafe with regard to other calls to upsert(), - * upsertCompositeKey(), upsertValueAccessor(), and - * upsertValueAccessorCompositeKey(), but should not be used - * simultaneously with put(), putCompositeKey(), putValueAccessor(), - * or putValueAccessorCompositeKey(). - * @warning The ValueAccessor reference and ValueT* pointer passed to - * functor's call operator are only guaranteed to be valid for the - * duration of the call. The functor should not store a copy of - * these pointers and assume that they remain valid. - * @warning Although this method itself is threadsafe, the ValueT object - * accessed by functor is not guaranteed to be (although it is - * guaranteed that its initial insertion will be atomic). If it is - * possible for multiple threads to call upsertValueAccessor() with - * the same key at the same time, then their access to ValueT should - * be made threadsafe (e.g. with the use of atomic types, mutexes, - * or some other external synchronization). - * @note This version is for single scalar keys, see also - * upsertValueAccessorCompositeKey(). - * @note If the hash table is (close to) full and resizable is true, this - * routine might result in rebuilding the entire hash table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before upserting it (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor, which should provide a call - * operator that takes two arguments: const ValueAccessor& (or better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as its first argument) and ValueT*. - * The call operator will be invoked once for every tuple with a - * non-null key in accessor. - * @return True on success, false if upsert failed because there was not - * enough space to insert new entries for all the keys in accessor - * (note that some entries may still have been upserted, and - * accessor's iteration will be left on the first tuple which could - * not be inserted). - **/ - bool upsertValueAccessorFast( - const std::vector<attribute_id> &argument_ids, - ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys); - - /** - * @brief Apply a functor to (multiple) entries in this hash table, with keys - * drawn from a ValueAccessor. New values are first inserted if not - * already present. Composite key version. - * - * @warning This method is only usable if allow_duplicate_keys is false. - * @warning This method is threadsafe with regard to other calls to upsert(), - * upsertCompositeKey(), upsertValueAccessor(), and - * upsertValueAccessorCompositeKey(), but should not be used - * simultaneously with put(), putCompositeKey(), putValueAccessor(), - * or putValueAccessorCompositeKey(). - * @warning The ValueAccessor reference and ValueT* pointer passed to - * functor's call operator are only guaranteed to be valid for the - * duration of the call. The functor should not store a copy of - * these pointers and assume that they remain valid. - * @warning Although this method itself is threadsafe, the ValueT object - * accessed by functor is not guaranteed to be (although it is - * guaranteed that its initial insertion will be atomic). If it is - * possible for multiple threads to call upsertValueAccessor() with - * the same key at the same time, then their access to ValueT should - * be made threadsafe (e.g. with the use of atomic types, mutexes, - * or some other external synchronization). - * @note This version is for composite keys, see also upsertValueAccessor(). - * @note If the hash table is (close to) full and resizable is true, this - * routine might result in rebuilding the entire hash table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_ids The attribute IDs of each key component to be read - * from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before upserting it (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor, which should provide a call - * operator that takes two arguments: const ValueAccessor& (or better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as its first argument) and ValueT*. - * The call operator will be invoked once for every tuple with a - * non-null key in accessor. - * @return True on success, false if upsert failed because there was not - * enough space to insert new entries for all the keys in accessor - * (note that some entries may still have been upserted, and - * accessor's iteration will be left on the first tuple which could - * not be inserted). - **/ - bool upsertValueAccessorCompositeKeyFast( - const std::vector<attribute_id> &argument, - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys) override; - - /** - * @brief Determine the number of entries (key-value pairs) contained in this - * HashTable. - * @note For some HashTable implementations, this is O(1), but for others it - * may be O(n) where n is the number of buckets. - * - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * - * @return The number of entries in this HashTable. - **/ - virtual std::size_t numEntries() const = 0; - - /** - * @brief Lookup a key against this hash table to find a matching entry. - * - * @warning Only usable with the hash table that does not allow duplicate - * keys. - * @warning The key must not be null. - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for single scalar keys. See also - * getSingleCompositeKey(). - * - * @param key The key to look up. - * @return The value of a matched entry if a matching key is found. - * Otherwise, return NULL. - **/ - virtual const std::uint8_t* getSingle(const TypedValue &key) const = 0; - - /** - * @brief Lookup a composite key against this hash table to find a matching - * entry. - * - * @warning Only usable with the hash table that does not allow duplicate - * keys. - * @warning The key must not be null. - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for composite keys. See also getSingle(). - * - * @param key The key to look up. - * @return The value of a matched entry if a matching key is found. - * Otherwise, return NULL. - **/ - virtual const std::uint8_t* getSingleCompositeKey( - const std::vector<TypedValue> &key) const = 0; - virtual const std::uint8_t *getSingleCompositeKey( - const std::vector<TypedValue> &key, int index) const = 0; - - /** - * @brief Lookup a key against this hash table to find matching entries. - * - * @warning The key must not be null. - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note It is more efficient to call getSingle() if the hash table does not - * allow duplicate keys. - * @note This version is for single scalar keys. See also - * getAllCompositeKey(). - * - * @param key The key to look up. - * @param values A vector to hold values of all matching entries. Matches - * will be appended to the vector. - **/ - virtual void getAll(const TypedValue &key, - std::vector<const std::uint8_t *> *values) const = 0; - - /** - * @brief Lookup a composite key against this hash table to find matching - * entries. - * - * @warning The key must not be null. - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note It is more efficient to call getSingleCompositeKey() if the hash - * table does not allow duplicate keys. - * @note This version is for composite keys. See also getAll(). - * - * @param key The key to look up. - * @param values A vector to hold values of all matching entries. Matches - * will be appended to the vector. - **/ - virtual void getAllCompositeKey( - const std::vector<TypedValue> &key, - std::vector<const std::uint8_t *> *values) const = 0; - - /** - * @brief Lookup (multiple) keys from a ValueAccessor and apply a functor to - * the matching values. - * - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for single scalar keys. See also - * getAllFromValueAccessorCompositeKey(). - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor, which should provide a call - * operator that takes 2 arguments: const ValueAccessor& (or better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as its first argument) and - * const ValueT&. The functor will be invoked once for each pair of a - * key taken from accessor and matching value. - **/ - template <typename FunctorT> - void getAllFromValueAccessor(ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor) const; - - /** - * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to the - * matching values and additionally call a recordMatch() function of - * the functor when the first match for a key is found. - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for single scalar keys. See also - * getAllFromValueAccessorCompositeKeyWithExtraWorkForFirstMatch(). - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor, which should provide two functions: - * 1) An operator that takes 2 arguments: const ValueAccessor& (or - * better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as its first argument) and - * const ValueT&. The operator will be invoked once for each pair of a - * key taken from accessor and matching value. - * 2) A function hasMatch that takes 1 argument: const ValueAccessor&. - * The function will be called only once for a key from accessor when - * the first match is found. - */ - template <typename FunctorT> - void getAllFromValueAccessorWithExtraWorkForFirstMatch( - ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor) const; - - /** - * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to the - * matching values and additionally call a recordMatch() function of - * the functor when the first match for a key is found. Composite key - * version. - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor, which should provide two functions: - * 1) An operator that takes 2 arguments: const ValueAccessor& (or - * better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as its first argument) and - * const ValueT&. The operator will be invoked once for each pair of a - * key taken from accessor and matching value. - * 2) A function hasMatch that takes 1 argument: const ValueAccessor&. - * The function will be called only once for a key from accessor when - * the first match is found. - */ - template <typename FunctorT> - void getAllFromValueAccessorCompositeKeyWithExtraWorkForFirstMatch( - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor) const; - - /** - * @brief Lookup (multiple) keys from a ValueAccessor and apply a functor to - * the matching values. Composite key version. - * - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for composite keys. See also - * getAllFromValueAccessor(). - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_ids The attribute IDs of each key component to be read - * from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * has a null component before inserting it (null keys are skipped). - * This must be set to true if some of the keys that will be read from - * accessor may be null. - * @param functor A pointer to a functor, which should provide a call - * operator that takes 2 arguments: const ValueAccessor& (or better - * yet, a templated call operator which takes a const reference to - * some subclass of ValueAccessor as its first argument) and - * const ValueT&. The functor will be invoked once for each pair of a - * key taken from accessor and matching value. - **/ - template <typename FunctorT> - void getAllFromValueAccessorCompositeKey( - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor) const; - - /** - * @brief Apply the functor to each key with a match in the hash table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor which should provide an operator that - * takes 1 argument: const ValueAccessor&. The operator will be called - * only once for a key from accessor if there is a match. - */ - template <typename FunctorT> - void runOverKeysFromValueAccessorIfMatchFound(ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor) const { - return runOverKeysFromValueAccessor<true>( - accessor, key_attr_id, check_for_null_keys, functor); - } - - /** - * @brief Apply the functor to each key with a match in the hash table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor which should provide an operator that - * takes 1 argument: const ValueAccessor&. The operator will be called - * only once for a key from accessor if there is a match. - */ - template <typename FunctorT> - void runOverKeysFromValueAccessorIfMatchFoundCompositeKey( - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor) const { - return runOverKeysFromValueAccessorCompositeKey<true>( - accessor, key_attr_ids, check_for_null_keys, functor); - } - - /** - * @brief Apply the functor to each key without a match in the hash table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor which should provide an operator that - * takes 1 argument: const ValueAccessor&. The operator will be called - * only once for a key from accessor if there is no match. - */ - template <typename FunctorT> - void runOverKeysFromValueAccessorIfMatchNotFound( - ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor) const { - return runOverKeysFromValueAccessor<false>( - accessor, key_attr_id, check_for_null_keys, functor); - } - - /** - * @brief Apply the functor to each key without a match in the hash table. - * - * @param accessor A ValueAccessor which will be used to access keys. - * beginIteration() should be called on accessor before calling this - * method. - * @param key_attr_id The attribute ID of the keys to be read from accessor. - * @param check_for_null_keys If true, each key will be checked to see if it - * is null before looking it up (null keys are skipped). This must be - * set to true if some of the keys that will be read from accessor may - * be null. - * @param functor A pointer to a functor which should provide an operator that - * takes 1 argument: const ValueAccessor&. The operator will be called - * only once for a key from accessor if there is no match. - */ - template <typename FunctorT> - void runOverKeysFromValueAccessorIfMatchNotFoundCompositeKey( - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor) const { - return runOverKeysFromValueAccessorCompositeKey<false>( - accessor, key_attr_ids, check_for_null_keys, functor); - } - - /** - * @brief Apply a functor to each key, value pair in this hash table. - * - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for single scalar keys. See also - * forEachCompositeKey(). - * - * @param functor A pointer to a functor, which should provide a call - * operator which takes 2 arguments: const TypedValue&, const ValueT&. - * The call operator will be invoked once on each key, value pair in - * this hash table (note that if allow_duplicate_keys is true, - * the call may occur multiple times for the same key with different - * values). - * @return The number of key-value pairs visited. - **/ - template <typename FunctorT> - std::size_t forEach(FunctorT *functor) const; - - /** - * @brief Apply a functor to each key, value pair in this hash table. - * - * @warning This method assumes that no concurrent calls to put(), - * putCompositeKey(), putValueAccessor(), - * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(), - * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are - * taking place (i.e. that this HashTable is immutable for the - * duration of the call and as long as the returned pointer may be - * dereferenced). Concurrent calls to getSingle(), - * getSingleCompositeKey(), getAll(), getAllCompositeKey(), - * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(), - * forEach(), and forEachCompositeKey() are safe. - * @note This version is for composite keys. See also forEach(). - * - * @param functor A pointer to a functor, which should provide a call - * operator which takes 2 arguments: const std::vector<TypedValue>&, - * const ValueT&. The call operator will be invoked once on each key, - * value pair in this hash table (note that if allow_duplicate_keys is - * true, the call may occur multiple times for the same key with - * different values). - * @return The number of key-value pairs visited. - **/ - template <typename FunctorT> - std::size_t forEachCompositeKeyFast(FunctorT *functor) const; - - template <typename FunctorT> - std::size_t forEachCompositeKeyFast(FunctorT *functor, int index) const; - - /** - * @brief A call to this function will cause a bloom filter to be built - * during the build phase of this hash table. - **/ - inline void enableBuildSideBloomFilter() { - has_build_side_bloom_filter_ = true; - } - - /** - * @brief A call to this function will cause a set of bloom filters to be - * probed during the probe phase of this hash table. - **/ - inline void enableProbeSideBloomFilter() { - has_probe_side_bloom_filter_ = true; - } - - /** - * @brief This function sets the pointer to the bloom filter to be - * used during the build phase of this hash table. - * @warning Should call enable_build_side_bloom_filter() first to enable - * bloom filter usage during build phase. - * @note The ownership of the bloom filter lies with the caller. - * - * @param bloom_filter The pointer to the bloom filter. - **/ - inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) { - build_bloom_filter_ = bloom_filter; - } - - /** - * @brief This function adds a pointer to the list of bloom filters to be - * used during the probe phase of this hash table. - * @warning Should call enable_probe_side_bloom_filter() first to enable - * bloom filter usage during probe phase. - * @note The ownership of the bloom filter lies with the caller. - * - * @param bloom_filter The pointer to the bloom filter. - **/ - inline void addProbeSideBloomFilter(const BloomFilter *bloom_filter) { - probe_bloom_filters_.emplace_back(bloom_filter); - } - - /** - * @brief This function adds a vector of attribute ids corresponding to a - * bloom filter used during the probe phase of this hash table. - * @warning Should call enable_probe_side_bloom_filter() first to enable - * bloom filter usage during probe phase. - * - * @param probe_attribute_ids The vector of attribute ids to use for probing - * the bloom filter. - **/ - inline void addProbeSideAttributeIds( - std::vector<attribute_id> &&probe_attribute_ids) { - probe_attribute_ids_.push_back(probe_attribute_ids); - } - - protected: - /** - * @brief Constructor for new resizable hash table. - * - * @param key_types A vector of one or more types (>1 indicates a composite - * key). - * @param num_entries The estimated number of entries this hash table will - * hold. - * @param storage_manager The StorageManager to use (a StorageBlob will be - * allocated to hold this hash table's contents). - * @param adjust_hashes If true, the hash of a key should be modified by - * applying AdjustHash() so that it does not collide with one of the - * special values kEmptyHash or kPendingHash. If false, the hash is - * used as-is. - * @param use_scalar_literal_hash If true, the key is a single scalar literal - * (non-composite) that it is safe to use the simplified hash function - * TypedValue::getHashScalarLiteral() on. If false, the generic - * TypedValue::getHash() method will be used. - * @param preallocate_supported If true, this HashTable overrides - * preallocateForBulkInsert() to allow bulk-allocation of resources - * (i.e. buckets and variable-length key storage) in a single up-front - * pass when bulk-inserting entries. If false, resources are allocated - * on the fly for each entry. - **/ - FastHashTable(const std::vector<const Type *> &key_types, - const std::size_t num_entries, - const std::vector<AggregationHandle *> &handles, - const std::vector<std::size_t> &payload_sizes, - StorageManager *storage_manager, - const bool adjust_hashes, - const bool use_scalar_literal_hash, - const bool preallocate_supported) - : key_types_(key_types), - scalar_key_inline_(true), - key_inline_(nullptr), - adjust_hashes_(adjust_hashes), - use_scalar_literal_hash_(use_scalar_literal_hash), - preallocate_supported_(preallocate_supported), - handles_(handles), - num_handles_(handles.size()), - total_payload_size_(std::accumulate( - payload_sizes.begin(), payload_sizes.end(), sizeof(SpinMutex))), - storage_manager_(storage_manager), - hash_table_memory_(nullptr), - hash_table_memory_size_(0) { - DEBUG_ASSERT(resizable); - std::size_t running_sum = sizeof(SpinMutex); - for (auto size : payload_sizes) { - payload_offsets_.emplace_back(running_sum); - running_sum += size; - } - } - - /** - * @brief Constructor for non-resizable hash table. - * - * @param key_types A vector of one or more types (>1 indicates a composite - * key). - * @param hash_table_memory A pointer to memory to use for this hash table. - * @param hash_table_memory_size The size of hash_table_memory in bytes. - * @param new_hash_table If true, this hash table is being constructed for - * the first time and hash_table_memory will be cleared. If false, - * reload a pre-existing hash table. - * @param hash_table_memory_zeroed If new_hash_table is true, setting this to - * true means that this HashTable will assume that hash_table_memory - * has already been zeroed-out (any newly-allocated block or blob - * memory from StorageManager is zeroed-out). If false, this HashTable - * will explicitly zero-fill its memory as neccessary. This parameter - * has no effect when new_hash_table is false. - * @param adjust_hashes If true, the hash of a key should be modified by - * applying AdjustHash() so that it does not collide with one of the - * special values kEmptyHash or kPendingHash. If false, the hash is - * used as-is. - * @param use_scalar_literal_hash If true, the key is a single scalar literal - * (non-composite) that it is safe to use the simplified hash function - * TypedValue::getHashScalarLiteral() on. If false, the generic - * TypedValue::getHash() method will be used. - * @param preallocate_supported If true, this HashTable overrides - * preallocateForBulkInsert() to allow bulk-allocation of resources - * (i.e. buckets and variable-length key storage) in a single up-front - * pass when bulk-inserting entries. If false, resources are allocated - * on the fly for each entry. - **/ - FastHashTable(const std::vector<const Type *> &key_types, - void *hash_table_memory, - const std::size_t hash_table_memory_size, - const bool new_hash_table, - const bool hash_table_memory_zeroed, - const bool adjust_hashes, - const bool use_scalar_literal_hash, - const bool preallocate_supported) - : key_types_(key_types), - scalar_key_inline_(true), - key_inline_(nullptr), - adjust_hashes_(adjust_hashes), - use_scalar_literal_hash_(use_scalar_literal_hash), - preallocate_supported_(preallocate_supported), - storage_manager_(nullptr), - hash_table_memory_(hash_table_memory), - hash_table_memory_size_(hash_table_memory_size) { - DEBUG_ASSERT(!resizable); - } - - // Adjust 'hash' so that it is not exactly equal to either of the special - // values kEmptyHash or kPendingHash. - inline constexpr static std::size_t AdjustHash(const std::size_t hash) { - return hash + (hash == kEmptyHash) - (hash == kPendingHash); - } - - // Set information about which key components are stored inline. This usually - // comes from a HashTableKeyManager, and is set by the constructor of a - // subclass of HashTable. - inline void setKeyInline(const std::vector<bool> *key_inline) { - scalar_key_inline_ = key_inline->front(); - key_inline_ = key_inline; - } - - // Generate a hash for a composite key by hashing each component of 'key' and - // mixing their bits with CombineHashes(). - inline std::size_t hashCompositeKey(const std::vector<TypedValue> &key) const; - - // If 'force_key_copy' is true and some part of a composite key is - // variable-length, calculate the total number of bytes for variable-length - // key components that need to be copied. Otherwise, return 0 to indicate - // that no variable-length copy is required. - inline std::size_t calculateVariableLengthCompositeKeyCopySize( - const std::vector<TypedValue> &key) const; - - // Helpers for put. If this HashTable is resizable, 'resize_shared_mutex_' - // should be locked in shared mode before calling either of these methods. - virtual HashTablePutResult putInternal( - const TypedValue &key, - const std::size_t variable_key_size, - const std::uint8_t &value, - HashTablePreallocationState *prealloc_state) = 0; - - virtual HashTablePutResult putCompositeKeyInternalFast( - const std::vector<TypedValue> &key, - const std::size_t variable_key_size, - const std::uint8_t *init_value_ptr, - HashTablePreallocationState *prealloc_state) = 0; - - // Helpers for upsert. Both return a pointer to the value corresponding to - // 'key'. If this HashTable is resizable, 'resize_shared_mutex_' should be - // locked in shared mode while calling and using the returned pointer. May - // return NULL if there is not enough space to insert a new key, in which - // case a resizable HashTable should release the 'resize_shared_mutex_' and - // call resize(), then try again. - virtual std::uint8_t *upsertInternalFast( - const TypedValue &key, - const std::size_t variable_key_size, - const std::uint8_t *init_value_ptr) = 0; - - virtual std::uint8_t *upsertCompositeKeyInternalFast( - const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - const std::size_t variable_key_size) = 0; - - // Helpers for forEach. Each return true on success, false if no more entries - // exist to iterate over. After a successful call, '*key' is overwritten with - // the key of the next entry, '*value' points to the associated value, and - // '*entry_num' is incremented to the next (implementation defined) entry to - // check ('*entry_num' should initially be set to zero). - virtual bool getNextEntry(TypedValue *key, - const std::uint8_t **value, - std::size_t *entry_num) const = 0; - virtual bool getNextEntryCompositeKey(std::vector<TypedValue> *key, - const std::uint8_t **value, - std::size_t *entry_num) const = 0; - - // Helpers for getAllFromValueAccessor. Each return true on success, false if - // no more entries exist for the specified key. After a successful call, - // '*value' points to the associated value, and '*entry_num' is incremented - // to the next (implementation defined) entry to check ('*entry_num' should - // initially be set to zero). - virtual bool getNextEntryForKey(const TypedValue &key, - const std::size_t hash_code, - const std::uint8_t **value, - std::size_t *entry_num) const = 0; - virtual bool getNextEntryForCompositeKey(const std::vector<TypedValue> &key, - const std::size_t hash_code, - const std::uint8_t **value, - std::size_t *entry_num) const = 0; - - // Return true if key exists in the hash table. - virtual bool hasKey(const TypedValue &key) const = 0; - virtual bool hasCompositeKey(const std::vector<TypedValue> &key) const = 0; - - // For a resizable HashTable, grow to accomodate more entries. If - // 'extra_buckets' is not zero, it may serve as a "hint" to implementations - // that at least the requested number of extra buckets are required when - // resizing (mainly used in putValueAccessor() and - // putValueAccessorCompositeKey() when 'preallocate_supported_' is true). - // Implementations are free to ignore 'extra_buckets'. If - // 'extra_variable_storage' is not zero, implementations will attempt to - // allocate at least enough additional variable-key storage space to - // accomodate the number of bytes specified. 'retry_num' is intended ONLY for - // when resize() recursively calls itself and should not be set to nonzero by - // any other caller. - virtual void resize(const std::size_t extra_buckets, - const std::size_t extra_variable_storage, - const std::size_t retry_num = 0) = 0; - - // In the case where 'allow_duplicate_keys' is true, it is possible to - // pre-calculate the number of key-value entries and the amount of - // variable-length key storage that will be needed to insert all the - // entries from a ValueAccessor in putValueAccessor() or - // putValueAccessorCompositeKey() before actually inserting anything. Some - // HashTable implemetations (notably SeparateChainingHashTable) can achieve - // better performance by ammortizing the cost of allocating certain resources - // (buckets and variable-length key storage) in one up-front allocation. This - // method is intended to support that. Returns true and fills in - // '*prealloc_state' if pre-allocation was successful. Returns false if a - // resize() is needed. - virtual bool preallocateForBulkInsert( - const std::size_t total_entries, - const std::size_t total_variable_key_size, - HashTablePreallocationState *prealloc_state) { - FATAL_ERROR( - "Called HashTable::preallocateForBulkInsert() on a HashTable " - "implementation that does not support preallocation."); - } - - // Type(s) of keys. - const std::vector<const Type *> key_types_; - - // Information about whether key components are stored inline or in a - // separate variable-length storage region. This is usually determined by a - // HashTableKeyManager and set by calling setKeyInline(). - bool scalar_key_inline_; - const std::vector<bool> *key_inline_; - - // Whether hashes should be adjusted by AdjustHash() before being used. - const bool adjust_hashes_; - // Whether it is safe to use the simplified TypedValue::getHashScalarLiteral() - // method instead of the generic TypedValue::getHash() method. - const bool use_scalar_literal_hash_; - // Whether preallocateForBulkInsert() is supported by this HashTable. - const bool preallocate_supported_; - - const std::vector<AggregationHandle *> handles_; - const unsigned int num_handles_; - const std::size_t total_payload_size_; - std::vector<std::size_t> payload_offsets_; - - // Used only when resizable is true: - StorageManager *storage_manager_; - MutableBlobReference blob_; - // Locked in shared mode for most operations, exclusive mode during resize. - // Not locked at all for non-resizable HashTables. - alignas(kCacheLineBytes) SpinSharedMutex<true> resize_shared_mutex_; - - // Used only when resizable is false: - void *hash_table_memory_; - const std::size_t hash_table_memory_size_; - - private: - // Assign '*key_vector' with the attribute values specified by 'key_attr_ids' - // at the current position of 'accessor'. If 'check_for_null_keys' is true, - // stops and returns true if any of the values is null, otherwise returns - // false. - template <typename ValueAccessorT> - inline static bool GetCompositeKeyFromValueAccessor( - const ValueAccessorT &accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - std::vector<TypedValue> *key_vector) { - for (std::vector<attribute_id>::size_type key_idx = 0; - key_idx < key_attr_ids.size(); - ++key_idx) { - (*key_vector)[key_idx] = accessor.getTypedValue(key_attr_ids[key_idx]); - if (check_for_null_keys && (*key_vector)[key_idx].isNull()) { - return true; - } - } - return false; - } - - // If run_if_match_found is true, apply the functor to each key if a match is - // found; otherwise, apply the functor if no match is found. - template <bool run_if_match_found, typename FunctorT> - void runOverKeysFromValueAccessor(ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor) const; - - template <bool run_if_match_found, typename FunctorT> - void runOverKeysFromValueAccessorCompositeKey( - ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor) const; - - // Method containing the actual logic implementing getAllFromValueAccessor(). - // Has extra template parameters that control behavior to avoid some - // inner-loop branching. - template <typename FunctorT, - bool check_for_null_keys, - bool adjust_hashes_template, - bool use_scalar_literal_hash_template> - void getAllFromValueAccessorImpl(ValueAccessor *accessor, - const attribute_id key_attr_id, - FunctorT *functor) const; - - // Data structures used for bloom filter optimized semi-joins. - bool has_build_side_bloom_filter_ = false; - bool has_probe_side_bloom_filter_ = false; - BloomFilter *build_bloom_filter_; - std::vector<const BloomFilter *> probe_bloom_filters_; - std::vector<std::vector<attribute_id>> probe_attribute_ids_; - DISALLOW_COPY_AND_ASSIGN(FastHashTable); -}; - -/** - * @brief An instantiation of the HashTable template for use in aggregations. - * @note This has force_key_copy = true, so that we don't have dangling pointers - * to blocks that are evicted. - **/ -using AggregationStateFastHashTable = FastHashTable<true, false, true, false>; - -/** @} */ - -// ---------------------------------------------------------------------------- -// Implementations of template class methods follow. - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -HashTablePutResult -FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>:: - put(const TypedValue &key, const std::uint8_t &value) { - const std::size_t variable_size = - (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0; - if (resizable) { - HashTablePutResult result = HashTablePutResult::kOutOfSpace; - while (result == HashTablePutResult::kOutOfSpace) { - { - SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_); - result = putInternal(key, variable_size, value, nullptr); - } - if (result == HashTablePutResult::kOutOfSpace) { - resize(0, variable_size); - } - } - return result; - } else { - return putInternal(key, variable_size, value, nullptr); - } -} - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -HashTablePutResult -FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>:: - putCompositeKey(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr) { - const std::size_t variable_size = - calculateVariableLengthCompositeKeyCopySize(key); - if (resizable) { - HashTablePutResult result = HashTablePutResult::kOutOfSpace; - while (result == HashTablePutResult::kOutOfSpace) { - { - SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_); - result = putCompositeKeyInternalFast( - key, variable_size, init_value_ptr, nullptr); - } - if (result == HashTablePutResult::kOutOfSpace) { - resize(0, variable_size); - } - } - return result; - } else { - return putCompositeKeyInternalFast( - key, variable_size, init_value_ptr, nullptr); - } -} - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -template <typename FunctorT> -HashTablePutResult -FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>:: - putValueAccessor(ValueAccessor *accessor, - const attribute_id key_attr_id, - const bool check_for_null_keys, - FunctorT *functor) { - HashTablePutResult result = HashTablePutResult::kOutOfSpace; - std::size_t variable_size; - HashTablePreallocationState prealloc_state; - bool using_prealloc = allow_duplicate_keys && preallocate_supported_; - return InvokeOnAnyValueAccessor( - accessor, - [&](auto *accessor) -> HashTablePutResult { // NOLINT(build/c++11) - if (using_prealloc) { - std::size_t total_entries = 0; - std::size_t total_variable_key_size = 0; - if (check_for_null_keys || (force_key_copy && !scalar_key_inline_)) { - // If we need to filter out nulls OR make variable copies, make a - // prepass over the ValueAccessor. - while (accessor->next()) { - TypedValue key = accessor->getTypedValue(key_attr_id); - if (check_for_null_keys && key.isNull()) { - continue; - } - ++total_entries; - total_variable_key_size += (force_key_copy && !scalar_key_inline_) - ? key.getDataSize() - : 0; - } - accessor->beginIteration(); - } else { - total_entries = accessor->getNumTuples(); - } - if (resizable) { - bool prealloc_succeeded = false; - while (!prealloc_succeeded) { - { - SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_); - prealloc_succeeded = this->preallocateForBulkInsert( - total_entries, total_variable_key_size, &prealloc_state); - } - if (!prealloc_succeeded) { - this->resize(total_entries, total_variable_key_size); - } - } - } else { - using_prealloc = this->preallocateForBulkInsert( - total_entries, total_variable_key_size, &prealloc_state); - } - } - std::unique_ptr<BloomFilter> thread_local_bloom_filter; - if (has_build_side_bloom_filter_) { - thread_local_bloom_filter.reset( - new BloomFilter(build_bloom_filter_->getRandomSeed(), - build_bloom_filter_->getNumberOfHashes(), - build_bloom_filter_->getBitArraySize())); - } - if (resizable) { - while (result == HashTablePutResult::kOutOfSpace) { - { - result = HashTablePutResult::kOK; - SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_); - while (accessor->next()) { - TypedValue key = accessor->getTypedValue(key_attr_id); - if (check_for_null_keys && key.isNull()) { - continue; - } - variable_size = (force_key_copy && !scalar_key_inline_) - ? key.getDataSize() - : 0; - result = this->putInternal( - key, - variable_size, - (*functor)(*accessor), - using_prealloc ? &prealloc_state : nullptr); - // Insert into bloom filter, if enabled. - if (has_build_side_bloom_filter_) { - thread_local_bloom_filter->insertUnSafe( - static_cast<const std::uint8_t *>(key.getDataPtr()), - key.getDataSize()); - } - if (result == HashTablePutResult::kDuplicateKey) { - DEBUG_ASSERT(!using_prealloc); - return result; - } else if (result == HashTablePutResult::kOutOfSpace) { - DEBUG_ASSERT(!using_prealloc); - break; - } - } - } - if (result == HashTablePutResult::kOutOfSpace) { - this->resize(0, variable_size); - accessor->previous(); - } - } - } else { - while (accessor->next()) { - TypedValue key = accessor->getTypedValue(key_attr_id); - if (check_for_null_keys && key.isNull()) { - continue; - } - variable_size = - (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0; - result = - this->putInternal(key, - variable_size, - (*functor)(*accessor), - using_prealloc ? &prealloc_state : nullptr); - // Insert into bloom filter, if enabled. - if (has_build_side_bloom_filter_) { - thread_local_bloom_filter->insertUnSafe( - static_cast<const std::uint8_t *>(key.getDataPtr()), - key.getDataSize()); - } - if (result != HashTablePutResult::kOK) { - return result; - } - } - } - // Update the build side bloom filter with thread local copy, if - // available. - if (has_build_side_bloom_filter_) { - build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get()); - } - - return HashTablePutResult::kOK; - }); -} - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -template <typename FunctorT> -HashTablePutResult -FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>:: - putValueAccessorCompositeKey(ValueAccessor *accessor, - const std::vector<attribute_id> &key_attr_ids, - const bool check_for_null_keys, - FunctorT *functor) { - DEBUG_ASSERT(key_types_.size() == key_attr_ids.size()); - HashTablePutResult result = HashTablePutResult::kOutOfSpace; - std::size_t variable_size; - HashTablePreallocationState prealloc_state; - bool using_prealloc = allow_duplicate_keys && preallocate_supported_; - std::vector<TypedValue> key_vector; - key_vector.resize(key_attr_ids.size()); - return InvokeOnAnyValueAccessor( - accessor, - [&](auto *accessor) -> HashTablePutResult { // NOLINT(build/c++11) - if (using_prealloc) { - std::size_t total_entries = 0; - std::size_t total_variable_key_size = 0; - if (check_for_null_keys || force_key_copy) { - // If we need to filter out nulls OR make variable copies, make a - // prepass over the ValueAccessor. - while (accessor->next()) { - if (this->GetCompositeKeyFromValueAccessor(*accessor, - key_attr_ids, - check_for_null_keys, - &key_vector)) { - continue; - } - ++total_entries; - total_variable_key_size += - this->calculateVariableLengthCompositeKeyCopySize(key_vector); - } - accessor->beginIteration(); - } else { - total_entries = accessor->getNumTuples(); - } - if (resizable) { - bool prealloc_succeeded = false; - while (!prealloc_succeeded) { - { - SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_); - prealloc_succeeded = this->preallocateForBulkInsert( - total_entries, total_variable_key_size, &prealloc_state); - } - if (!prealloc_succeeded) { - this->resize(total_entries, total_variable_key_size); - } - } - } else { - using_prealloc = this->preallocateForBulkInsert( - total_entries, total_variable_key_size, &prealloc_state); - } - } - if (resizable) { - while (result == HashTablePutResult::kOutOfSpace) { - { - result = HashTablePutResult::kOK; - SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_); - while (accessor->next()) { - if (this->GetCompositeKeyFromValueAccessor(*accessor, - key_attr_ids, - check_for_null_keys, - &key_vector)) { - continue; - } - variable_size = - this->calculateVariableLengthCompositeKeyCopySize( - key_vector); - result = this->putCompositeKeyInternal( - key_vector, - variable_size, - (*functor)(*accessor), - using_prealloc ? &prealloc_state : nullptr); - if (result == HashTablePutResult::kDuplicateKey) { - DEBUG_ASSERT(!using_prealloc); - return result; - } else if (result == HashTablePutResult::kOutOfSpace) { - DEBUG_ASSERT(!using_prealloc); - break; - } - } - } - if (result == HashTablePutResult::kOutOfSpace) { - this->resize(0, variable_size); - accessor->previous(); - } - } - } else { - while (accessor->next()) { - if (this->GetCompositeKeyFromValueAccessor(*accessor, - key_attr_ids, - check_for_null_keys, - &key_vector)) { - continue; - } - variable_size = - this->calculateVariableLengthCompositeKeyCopySize(key_vector); - result = this->putCompositeKeyInternal( - key_vector, - variable_size, - (*functor)(*accessor), - using_prealloc ? &prealloc_state : nullptr); - if (result != HashTablePutResult::kOK) { - return result; - } - } - } - - return HashTablePutResult::kOK; - }); -} - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -template <typename FunctorT> -bool FastHashTable<resizable, - serializable, - force_key_copy, - allow_duplicate_keys>::upsert(const TypedValue &key, - const std::uint8_t - *initial_value_ptr, - FunctorT *functor) { - DEBUG_ASSERT(!allow_duplicate_keys); - const std::size_t variable_size = - (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0; - if (resizable) { - for (;;) { - { - SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_); - std::uint8_t *value = - upsertInternalFast(key, variable_size, initial_value_ptr); - if (value != nullptr) { - (*functor)(value); - return true; - } - } - resize(0, force_key_copy && !scalar_key_inline_ ? key.getDataSize() : 0); - } - } else { - std::uint8_t *value = - upsertInternalFast(key, variable_size, initial_value_ptr); - if (value == nullptr) { - return false; - } else { - (*functor)(value); - return true; - } - } -} - -class HashTableMergerFast { - public: - /** - * @brief Constructor - * - * @param handle The Aggregation handle being used. - * @param destination_hash_table The destination hash table to which other - * hash tables will be merged. - **/ - explicit HashTableMergerFast( - AggregationStateHashTableBase *destination_hash_table) - : destination_hash_table_( - static_cast<FastHashTable<true, false, true, false> *>( - destination_hash_table)) {} - - /** - * @brief The operator for the functor. - * - * @param group_by_key The group by key being merged. - * @param source_state The aggregation state for the given key in the source - * aggregation hash table. - **/ - inline void operator()(const std::vector<TypedValue> &group_by_key, - const std::uint8_t *source_state) { - const std::uint8_t *original_state = - destination_hash_table_->getSingleCompositeKey(group_by_key); - if (original_state != nullptr) { - // The CHECK is required as upsertCompositeKey can return false if the - // hash table runs out of space during the upsert process. The ideal - // solution will be to retry again if the upsert fails. - CHECK(destination_hash_table_->upsertCompositeKeyFast( - group_by_key, original_state, source_state)); - } else { - destination_hash_table_->putCompositeKey(group_by_key, source_state); - } - } - - private: - FastHashTable<true, false, true, false> *destination_hash_table_; - - DISALLOW_COPY_AND_ASSIGN(HashTableMergerFast); -}; - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -template <typename FunctorT> -bool FastHashTable<resizable, - serializable, - force_key_copy, - allow_duplicate_keys>:: - upsertCompositeKeyFast(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - FunctorT *functor) { - DEBUG_ASSERT(!allow_duplicate_keys); - const std::size_t variable_size = - calculateVariableLengthCompositeKeyCopySize(key); - if (resizable) { - for (;;) { - { - SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_); - std::uint8_t *value = - upsertCompositeKeyInternalFast(key, init_value_ptr, variable_size); - if (value != nullptr) { - (*functor)(value); - return true; - } - } - resize(0, variable_size); - } - } else { - std::uint8_t *value = - upsertCompositeKeyInternalFast(key, init_value_ptr, variable_size); - if (value == nullptr) { - return false; - } else { - (*functor)(value); - return true; - } - } -} - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -template <typename FunctorT> -bool FastHashTable<resizable, - serializable, - force_key_copy, - allow_duplicate_keys>:: - upsertCompositeKeyFast(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - FunctorT *functor, - int index) { - DEBUG_ASSERT(!allow_duplicate_keys); - const std::size_t variable_size = - calculateVariableLengthCompositeKeyCopySize(key); - if (resizable) { - for (;;) { - { - SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_); - std::uint8_t *value = - upsertCompositeKeyInternalFast(key, init_value_ptr, variable_size); - if (value != nullptr) { - (*functor)(value + payload_offsets_[index]); - return true; - } - } - resize(0, variable_size); - } - } else { - std::uint8_t *value = - upsertCompositeKeyInternalFast(key, init_value_ptr, variable_size); - if (value == nullptr) { - return false; - } else { - (*functor)(value + payload_offsets_[index]); - return true; - } - } -} - -template <bool resizable, - bool serializable, - bool force_key_copy, - bool allow_duplicate_keys> -bool FastHashTable<resizable, - serializable, - force_key_copy, - allow_duplicate_keys>:: - upsertCompositeKeyFast(const std::vector<TypedValue> &key, - const std::uint8_t *init_value_ptr, - const std::uint8_t *source_state) { - DEBUG_ASSERT(!allow_duplicate_keys); - const std::size_t variable_size = - calculateVariableLengthCompositeKeyCopySize(key); - if (resizable) { - for (;;) { - { - SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_); - std::uint8_t *value = - upsertCompositeKeyInternalFast(key, init_value_ptr, variable_size); - if (value != nullptr) { - SpinMutexLock lock(*(reinterpret_cast<SpinMutex *>(value))); - for (unsigned int k = 0; k < num_handles_; ++k) { - handles_[k]->mergeStatesFast(source_state + payload_offsets_[k], - value + payload_offsets_[k]); - } - return true; - } - } - resize(0, variable_size); - } - } else { - std::uint8_t *value = - upsertCompositeKeyInternalFast(key, init_value_ptr, variable_size); -
<TRUNCATED>