Repository: incubator-quickstep Updated Branches: refs/heads/adaptive-bloom-filters [created] 946332c38
Initial commit Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/946332c3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/946332c3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/946332c3 Branch: refs/heads/adaptive-bloom-filters Commit: 946332c387cccc00e43666c72bb5a4b80f633a48 Parents: 2d39b8e Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Sat Jun 11 23:14:00 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Sun Jun 12 20:02:28 2016 -0500 ---------------------------------------------------------------------- CMakeLists.txt | 1 + cli/QuickstepCli.cpp | 10 ++ compression/CompressionDictionaryLite.hpp | 42 ++++++ storage/BasicColumnStoreValueAccessor.hpp | 26 +++- storage/CMakeLists.txt | 2 + storage/CompressedColumnStoreValueAccessor.hpp | 22 +++ .../CompressedPackedRowStoreValueAccessor.hpp | 22 +++ storage/HashTable.hpp | 37 +++-- storage/PackedRowStoreValueAccessor.hpp | 25 +++- storage/SplitRowStoreValueAccessor.hpp | 45 ++++++ storage/ValueAccessor.hpp | 36 +++++ types/containers/ColumnVector.hpp | 35 +++++ types/containers/ColumnVectorsValueAccessor.hpp | 17 +++ utility/BloomFilterAdapter.hpp | 128 +++++++++++++++++ utility/CMakeLists.txt | 10 ++ utility/EventProfiler.cpp | 28 ++++ utility/EventProfiler.hpp | 138 +++++++++++++++++++ 17 files changed, 600 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index ef7fd50..4c566ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -722,6 +722,7 @@ target_link_libraries(quickstep_cli_shell quickstep_queryoptimizer_QueryProcessor quickstep_storage_PreloaderThread quickstep_threading_ThreadIDBasedMap + quickstep_utility_EventProfiler quickstep_utility_Macros quickstep_utility_PtrVector quickstep_utility_SqlError http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/cli/QuickstepCli.cpp ---------------------------------------------------------------------- diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp index 558d6eb..d59b021 100644 --- a/cli/QuickstepCli.cpp +++ b/cli/QuickstepCli.cpp @@ -69,6 +69,7 @@ typedef quickstep::LineReaderDumb LineReaderImpl; #include "storage/PreloaderThread.hpp" #include "threading/ThreadIDBasedMap.hpp" +#include "utility/EventProfiler.hpp" #include "utility/Macros.hpp" #include "utility/PtrVector.hpp" #include "utility/SqlError.hpp" @@ -150,6 +151,8 @@ DEFINE_bool(initialize_db, false, "If true, initialize a database."); DEFINE_bool(print_query, false, "Print each input query statement. This is useful when running a " "large number of queries in a batch."); +DEFINE_string(profile_output, "", + "Output file name for writing the profiled events."); } // namespace quickstep @@ -419,6 +422,13 @@ int main(int argc, char* argv[]) { printf("Time: %s ms\n", quickstep::DoubleToStringWithSignificantDigits( time_ms.count(), 3).c_str()); + + if (!quickstep::FLAGS_profile_output.empty()) { + std::ofstream ofs(quickstep::FLAGS_profile_output, std::ios::out); + quickstep::simple_profiler.writeToStream(ofs); + quickstep::simple_profiler.clear(); + ofs.close(); + } } catch (const std::exception &e) { fprintf(stderr, "QUERY EXECUTION ERROR: %s\n", e.what()); break; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/compression/CompressionDictionaryLite.hpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionaryLite.hpp b/compression/CompressionDictionaryLite.hpp index 45019c0..8c7741f 100644 --- a/compression/CompressionDictionaryLite.hpp +++ b/compression/CompressionDictionaryLite.hpp @@ -174,6 +174,15 @@ class CompressionDictionaryLite { } } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthForCode(const std::uint32_t code) const { + if (type_is_variable_length_) { + return variableLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code); + } else { + return fixedLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code); + } + } + /** * @brief Get the value represented by the specified code as a TypedValue. * @note This version is for codes of 8 bits or less. Also see @@ -255,6 +264,39 @@ class CompressionDictionaryLite { return retval; } + template <typename CodeType, bool check_null = true> + inline std::pair<const void*, std::size_t> fixedLengthGetUntypedValueAndByteLengthHelper( + const CodeType code) const { + if (check_null && (code == getNullCode())) { + return std::make_pair(nullptr, 0); + } + DCHECK_LT(code, numberOfCodes()); + return std::make_pair(static_cast<const char*>(dictionary_memory_) + + 2 * sizeof(std::uint32_t) // Header. + + code * type_fixed_byte_length_, // Index into value array. + type_fixed_byte_length_); + } + + template <typename CodeType, bool check_null = true> + inline std::pair<const void*, std::size_t> variableLengthGetUntypedValueAndByteLengthHelper( + const CodeType code) const { + if (check_null && (code == getNullCode())) { + return std::make_pair(nullptr, 0); + } + DCHECK_LT(code, numberOfCodes()); + + const std::uint32_t value_offset = static_cast<const std::uint32_t*>(dictionary_memory_)[code + 2]; + const void *data_ptr = variable_length_data_region_ + value_offset; + DCHECK_LT(data_ptr, static_cast<const char*>(dictionary_memory_) + dictionary_memory_size_); + + std::size_t data_size = (code == *static_cast<const std::uint32_t*>(dictionary_memory_) - 1) ? + (static_cast<const char*>(dictionary_memory_) + + dictionary_memory_size_ + - static_cast<const char*>(data_ptr)) + : (static_cast<const std::uint32_t*>(dictionary_memory_)[code + 3] - value_offset); + return std::make_pair(data_ptr, data_size); + } + template <typename CodeType> inline TypedValue fixedLengthGetTypedValueHelper(const CodeType code) const { if (code == getNullCode()) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/BasicColumnStoreValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/storage/BasicColumnStoreValueAccessor.hpp b/storage/BasicColumnStoreValueAccessor.hpp index 759e187..7907fd5 100644 --- a/storage/BasicColumnStoreValueAccessor.hpp +++ b/storage/BasicColumnStoreValueAccessor.hpp @@ -18,6 +18,8 @@ #ifndef QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_ #define QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_ +#include <cstddef> +#include <utility> #include <vector> #include "catalog/CatalogRelationSchema.hpp" @@ -43,7 +45,8 @@ class BasicColumnStoreValueAccessorHelper { : relation_(relation), num_tuples_(num_tuples), column_stripes_(column_stripes), - column_null_bitmaps_(column_null_bitmaps) { + column_null_bitmaps_(column_null_bitmaps), + attr_max_lengths_(relation.getMaximumAttributeByteLengths()) { } inline tuple_id numPackedTuples() const { @@ -61,9 +64,23 @@ class BasicColumnStoreValueAccessorHelper { return nullptr; } - // TODO(chasseur): Consider cacheing the byte lengths of attributes. - return static_cast<const char*>(column_stripes_[attr]) - + (tuple * relation_.getAttributeById(attr)->getType().maximumByteLength()); + return static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_max_lengths_[attr]); + } + + template <bool check_null> + inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple, + const attribute_id attr) const { + DEBUG_ASSERT(tuple < num_tuples_); + DEBUG_ASSERT(relation_.hasAttributeWithId(attr)); + if (check_null + && (!column_null_bitmaps_.elementIsNull(attr)) + && column_null_bitmaps_[attr].getBit(tuple)) { + return std::make_pair(nullptr, 0); + } + + const std::size_t attr_length = attr_max_lengths_[attr]; + return std::make_pair(static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_length), + attr_length); } inline TypedValue getAttributeValueTyped(const tuple_id tuple, @@ -80,6 +97,7 @@ class BasicColumnStoreValueAccessorHelper { const tuple_id num_tuples_; const std::vector<void*> &column_stripes_; const PtrVector<BitVector<false>, true> &column_null_bitmaps_; + const std::vector<std::size_t> &attr_max_lengths_; DISALLOW_COPY_AND_ASSIGN(BasicColumnStoreValueAccessorHelper); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt index a77976a..5d29055 100644 --- a/storage/CMakeLists.txt +++ b/storage/CMakeLists.txt @@ -663,6 +663,8 @@ target_link_libraries(quickstep_storage_HashTable quickstep_types_Type quickstep_types_TypedValue quickstep_utility_BloomFilter + quickstep_utility_BloomFilterAdapter + quickstep_utility_EventProfiler quickstep_utility_HashPair quickstep_utility_Macros) target_link_libraries(quickstep_storage_HashTableBase http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/CompressedColumnStoreValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/storage/CompressedColumnStoreValueAccessor.hpp b/storage/CompressedColumnStoreValueAccessor.hpp index 64eb315..984dea3 100644 --- a/storage/CompressedColumnStoreValueAccessor.hpp +++ b/storage/CompressedColumnStoreValueAccessor.hpp @@ -52,6 +52,7 @@ class CompressedColumnStoreValueAccessorHelper { const PtrVector<BitVector<false>, true> &uncompressed_column_null_bitmaps) : relation_(relation), num_tuples_(num_tuples), + attr_max_lengths_(relation.getMaximumAttributeByteLengths()), compression_info_(compression_info), dictionary_coded_attributes_(dictionary_coded_attributes), truncated_attributes_(truncated_attributes), @@ -84,6 +85,26 @@ class CompressedColumnStoreValueAccessorHelper { } } + template <bool check_null> + inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple, + const attribute_id attr) const { + if (dictionary_coded_attributes_[attr]) { + return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>( + getCode(tuple, attr)); + } else if (truncated_attributes_[attr]) { + if (truncated_attribute_is_int_[attr]) { + int_buffer_ = getCode(tuple, attr); + return std::make_pair(&int_buffer_, sizeof(int_buffer_)); + } else { + long_buffer_ = getCode(tuple, attr); + return std::make_pair(&long_buffer_, sizeof(long_buffer_)); + } + } else { + return std::make_pair(getAttributePtr<check_null>(tuple, attr), + attr_max_lengths_[attr]); + } + } + inline TypedValue getAttributeValueTyped(const tuple_id tuple, const attribute_id attr) const { if (dictionary_coded_attributes_[attr]) { @@ -138,6 +159,7 @@ class CompressedColumnStoreValueAccessorHelper { const CatalogRelationSchema &relation_; const tuple_id num_tuples_; + const std::vector<std::size_t> &attr_max_lengths_; const CompressedBlockInfo &compression_info_; const std::vector<bool> &dictionary_coded_attributes_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/CompressedPackedRowStoreValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/storage/CompressedPackedRowStoreValueAccessor.hpp b/storage/CompressedPackedRowStoreValueAccessor.hpp index 024b0ec..7058aec 100644 --- a/storage/CompressedPackedRowStoreValueAccessor.hpp +++ b/storage/CompressedPackedRowStoreValueAccessor.hpp @@ -58,6 +58,7 @@ class CompressedPackedRowStoreValueAccessorHelper { num_tuples_(num_tuples), tuple_length_bytes_(tuple_length_bytes), attribute_offsets_(attribute_offsets), + attr_max_lengths_(relation.getMaximumAttributeByteLengths()), compression_info_(compression_info), dictionary_coded_attributes_(dictionary_coded_attributes), truncated_attributes_(truncated_attributes), @@ -92,6 +93,26 @@ class CompressedPackedRowStoreValueAccessorHelper { } } + template <bool check_null> + inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple, + const attribute_id attr) const { + if (dictionary_coded_attributes_[attr]) { + return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>( + getCode(tuple, attr)); + } else if (truncated_attributes_[attr]) { + if (truncated_attribute_is_int_[attr]) { + int_buffer_ = getCode(tuple, attr); + return std::make_pair(&int_buffer_, sizeof(int_buffer_)); + } else { + long_buffer_ = getCode(tuple, attr); + return std::make_pair(&long_buffer_, sizeof(long_buffer_)); + } + } else { + return std::make_pair(getAttributePtr<check_null>(tuple, attr), + attr_max_lengths_[attr]); + } + } + inline TypedValue getAttributeValueTyped(const tuple_id tuple, const attribute_id attr) const { if (dictionary_coded_attributes_[attr]) { @@ -150,6 +171,7 @@ class CompressedPackedRowStoreValueAccessorHelper { const tuple_id num_tuples_; const std::size_t tuple_length_bytes_; const std::vector<std::size_t> &attribute_offsets_; + const std::vector<std::size_t> &attr_max_lengths_; const CompressedBlockInfo &compression_info_; const std::vector<bool> &dictionary_coded_attributes_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/HashTable.hpp ---------------------------------------------------------------------- diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp index be31fd9..23150be 100644 --- a/storage/HashTable.hpp +++ b/storage/HashTable.hpp @@ -20,10 +20,13 @@ #ifndef QUICKSTEP_STORAGE_HASH_TABLE_HPP_ #define QUICKSTEP_STORAGE_HASH_TABLE_HPP_ +#include <algorithm> #include <atomic> #include <cstddef> #include <cstdlib> +#include <memory> #include <type_traits> +#include <utility> #include <vector> #include "catalog/CatalogTypedefs.hpp" @@ -39,6 +42,8 @@ #include "types/Type.hpp" #include "types/TypedValue.hpp" #include "utility/BloomFilter.hpp" +#include "utility/BloomFilterAdapter.hpp" +#include "utility/EventProfiler.hpp" #include "utility/HashPair.hpp" #include "utility/Macros.hpp" @@ -2246,26 +2251,19 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_ InvokeOnAnyValueAccessor( accessor, [&](auto *accessor) -> void { // NOLINT(build/c++11) + std::unique_ptr<BloomFilterAdapter> bloom_filter_adapter = nullptr; + if (has_probe_side_bloom_filter_) { + bloom_filter_adapter.reset( + new BloomFilterAdapter(probe_bloom_filters_, probe_attribute_ids_)); + } + + std::string tag = has_probe_side_bloom_filter_ ? "plus_bloom_filter" : "hash_probe"; + auto *container = simple_profiler.getContainer(); + container->startEvent(tag); while (accessor->next()) { - // Probe any bloom filters, if enabled. - if (has_probe_side_bloom_filter_) { - DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size()); - // Check if the key is contained in the BloomFilters or not. - bool bloom_miss = false; - for (std::size_t i = 0; i < probe_bloom_filters_.size() && !bloom_miss; ++i) { - const BloomFilter *bloom_filter = probe_bloom_filters_[i]; - for (const attribute_id &attr_id : probe_attribute_ids_[i]) { - TypedValue bloom_key = accessor->getTypedValue(attr_id); - if (!bloom_filter->contains(static_cast<const std::uint8_t*>(bloom_key.getDataPtr()), - bloom_key.getDataSize())) { - bloom_miss = true; - break; - } - } - } - if (bloom_miss) { - continue; // On a bloom filter miss, probing the hash table can be skipped. - } + // Check if the key is contained in the BloomFilters or not. + if (has_probe_side_bloom_filter_ && bloom_filter_adapter->miss(accessor)) { + continue; } TypedValue key = accessor->getTypedValue(key_attr_id); @@ -2285,6 +2283,7 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_ } } } + container->endEvent(tag); }); } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/PackedRowStoreValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/storage/PackedRowStoreValueAccessor.hpp b/storage/PackedRowStoreValueAccessor.hpp index 03a975e..cbd273e 100644 --- a/storage/PackedRowStoreValueAccessor.hpp +++ b/storage/PackedRowStoreValueAccessor.hpp @@ -18,6 +18,8 @@ #ifndef QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_ #define QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_ +#include <utility> + #include "catalog/CatalogRelationSchema.hpp" #include "catalog/CatalogTypedefs.hpp" #include "storage/StorageBlockInfo.hpp" @@ -40,7 +42,8 @@ class PackedRowStoreValueAccessorHelper { : relation_(relation), num_tuples_(num_tuples), tuple_storage_(tuple_storage), - null_bitmap_(null_bitmap) { + null_bitmap_(null_bitmap), + attr_max_lengths_(relation.getMaximumAttributeByteLengths()) { } inline tuple_id numPackedTuples() const { @@ -65,6 +68,25 @@ class PackedRowStoreValueAccessorHelper { + relation_.getFixedLengthAttributeOffset(attr); // Attribute offset within tuple. } + template <bool check_null> + inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple, + const attribute_id attr) const { + DEBUG_ASSERT(tuple < num_tuples_); + DEBUG_ASSERT(relation_.hasAttributeWithId(attr)); + if (check_null) { + const int nullable_idx = relation_.getNullableAttributeIndex(attr); + if ((nullable_idx != -1) + && null_bitmap_->getBit(tuple * relation_.numNullableAttributes() + nullable_idx)) { + return std::make_pair(nullptr, 0); + } + } + + return std::make_pair(static_cast<const char*>(tuple_storage_) + + (tuple * relation_.getFixedByteLength()) + + relation_.getFixedLengthAttributeOffset(attr), + attr_max_lengths_[attr]); + } + inline TypedValue getAttributeValueTyped(const tuple_id tuple, const attribute_id attr) const { const Type &attr_type = relation_.getAttributeById(attr)->getType(); @@ -79,6 +101,7 @@ class PackedRowStoreValueAccessorHelper { const tuple_id num_tuples_; const void *tuple_storage_; const BitVector<false> *null_bitmap_; + const std::vector<std::size_t> &attr_max_lengths_; DISALLOW_COPY_AND_ASSIGN(PackedRowStoreValueAccessorHelper); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/SplitRowStoreValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp index 9ea1a3a..19937f2 100644 --- a/storage/SplitRowStoreValueAccessor.hpp +++ b/storage/SplitRowStoreValueAccessor.hpp @@ -100,6 +100,11 @@ class SplitRowStoreValueAccessor : public ValueAccessor { return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const { + return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_); + } + inline TypedValue getTypedValue(const attribute_id attr_id) const { return getTypedValueAtAbsolutePosition(attr_id, current_position_); } @@ -140,6 +145,44 @@ class SplitRowStoreValueAccessor : public ValueAccessor { } } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id, + const tuple_id tid) const { + DEBUG_ASSERT(occupancy_bitmap_.getBit(tid)); + DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id)); + const char *tuple_slot = static_cast<const char*>(tuple_storage_) + + tuple_slot_bytes_ * tid; + if (check_null) { + const int nullable_idx = relation_.getNullableAttributeIndex(attr_id); + if (nullable_idx != -1) { + // const_cast is safe here. We will only be using read-only methods of + // BitVector. + BitVector<true> tuple_null_bitmap(const_cast<void*>(static_cast<const void*>(tuple_slot)), + relation_.numNullableAttributes()); + if (tuple_null_bitmap.getBit(nullable_idx)) { + return std::make_pair(nullptr, 0); + } + } + } + + const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr_id); + if (variable_length_idx == -1) { + // Fixed-length, stored in-line in slot. + return std::make_pair(tuple_slot + per_tuple_null_bitmap_bytes_ + + relation_.getFixedLengthAttributeOffset(attr_id), + attr_max_lengths_[attr_id]); + + } else { + // Variable-length, stored at back of block. + const std::uint32_t *pos_ptr = reinterpret_cast<const std::uint32_t*>( + tuple_slot + per_tuple_null_bitmap_bytes_ + + relation_.getFixedByteLength() + + variable_length_idx * 2 * sizeof(std::uint32_t)); + return std::make_pair(static_cast<const char*>(tuple_storage_) + pos_ptr[0], + pos_ptr[1]); + } + } + inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id, const tuple_id tid) const { DEBUG_ASSERT(occupancy_bitmap_.getBit(tid)); @@ -317,6 +360,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor { tuple_storage_(tuple_storage), tuple_slot_bytes_(tuple_slot_bytes), per_tuple_null_bitmap_bytes_(per_tuple_null_bitmap_bytes), + attr_max_lengths_(relation.getMaximumAttributeByteLengths()), current_position_(std::numeric_limits<std::size_t>::max()) { } @@ -327,6 +371,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor { const void *tuple_storage_; const std::size_t tuple_slot_bytes_; const std::size_t per_tuple_null_bitmap_bytes_; + const std::vector<std::size_t> &attr_max_lengths_; std::size_t current_position_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/storage/ValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp index e2a898e..e9370cc 100644 --- a/storage/ValueAccessor.hpp +++ b/storage/ValueAccessor.hpp @@ -375,6 +375,11 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor { return accessor_->template getUntypedValueAtAbsolutePosition<check_null>(attr_id, *current_position_); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const { + return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, *current_position_); + } + inline TypedValue getTypedValue(const attribute_id attr_id) const { return accessor_->getTypedValueAtAbsolutePosition(attr_id, *current_position_); } @@ -387,6 +392,13 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor { } // Pass-through. + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id, + const tuple_id tid) const { + return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, tid); + } + + // Pass-through. inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id, const tuple_id tid) const { return accessor_->getTypedValueAtAbsolutePosition(attr_id, tid); @@ -560,6 +572,12 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor { id_sequence_[current_position_]); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const { + return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>( + attr_id, id_sequence_[current_position_]); + } + inline TypedValue getTypedValue(const attribute_id attr_id) const { return accessor_->getTypedValueAtAbsolutePosition(attr_id, id_sequence_[current_position_]); } @@ -571,6 +589,13 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor { "OrderedTupleIdSequenceAdapterValueAccessor"); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id, + const tuple_id tid) const { + FATAL_ERROR("getUntypedValueAndByteLengthAtAbsolutePosition() not implemented in " + "OrderedTupleIdSequenceAdapterValueAccessor"); + } + inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id, const tuple_id tid) const { FATAL_ERROR("getTypedValueAtAbsolutePosition() not implemented in " @@ -737,6 +762,11 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor { return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_tuple_); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const { + return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_tuple_); + } + inline TypedValue getTypedValue(const attribute_id attr_id) const { return getTypedValueAtAbsolutePosition(attr_id, current_tuple_); } @@ -747,6 +777,12 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor { return helper_.template getAttributeValue<check_null>(tid, attr_id); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id, + const tuple_id tid) const { + return helper_.template getAttributeValueAndByteLength<check_null>(tid, attr_id); + } + inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id, const tuple_id tid) const { return helper_.getAttributeValueTyped(tid, attr_id); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/types/containers/ColumnVector.hpp ---------------------------------------------------------------------- diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp index 76968ba..a9349ee 100644 --- a/types/containers/ColumnVector.hpp +++ b/types/containers/ColumnVector.hpp @@ -193,6 +193,22 @@ class NativeColumnVector : public ColumnVector { } /** + * @brief Get the untyped pointer to a value as well as the value's byte length + * in this NativeColumnVector as a pair. + * + * @param position The position of the value to get. + * @return A pair containing the untyped pointer to the value at position and + * the value's byte length. + **/ + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const { + DCHECK_LT(position, actual_length_); + return (check_null && null_bitmap_ && null_bitmap_->getBit(position)) + ? std::make_pair(nullptr, 0) + : std::make_pair(static_cast<const char*>(values_) + (position * type_length_), type_length_); + } + + /** * @brief Get a value in this NativeColumnVector as a TypedValue. * * @param position The position of the value to get. @@ -453,6 +469,25 @@ class IndirectColumnVector : public ColumnVector { } /** + * @brief Get the untyped pointer to a value as well as the value's byte length + * in this IndirectColumnVector as a pair. + * + * @param position The position of the value to get. + * @return A pair containing the untyped pointer to the value at position and + * the value's byte length. + **/ + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const { + DCHECK_LT(position, values_.size()); + if (check_null && type_is_nullable_ && values_[position].isNull()) { + return std::make_pair(nullptr, 0); + } else { + const TypedValue &value = values_[position]; + return std::make_pair(value.getDataPtr(), value.getDataSize()); + } + } + + /** * @brief Get a value in this IndirectColumnVector as a TypedValue. * * @param position The position of the value to get. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/types/containers/ColumnVectorsValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp index f1d29a2..d69d1d8 100644 --- a/types/containers/ColumnVectorsValueAccessor.hpp +++ b/types/containers/ColumnVectorsValueAccessor.hpp @@ -124,6 +124,11 @@ class ColumnVectorsValueAccessor : public ValueAccessor { return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_); } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const { + return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_); + } + inline TypedValue getTypedValue(const attribute_id attr_id) const { return getTypedValueAtAbsolutePosition(attr_id, current_position_); } @@ -140,6 +145,18 @@ class ColumnVectorsValueAccessor : public ValueAccessor { } } + template <bool check_null = true> + inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id, + const tuple_id tid) const { + DCHECK(attributeIdInRange(attr_id)); + DCHECK(tupleIdInRange(tid)); + if (column_native_[attr_id]) { + return static_cast<const NativeColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid); + } else { + return static_cast<const IndirectColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid); + } + } + inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id, const tuple_id tid) const { DCHECK(attributeIdInRange(attr_id)); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/utility/BloomFilterAdapter.hpp ---------------------------------------------------------------------- diff --git a/utility/BloomFilterAdapter.hpp b/utility/BloomFilterAdapter.hpp new file mode 100644 index 0000000..5deb275 --- /dev/null +++ b/utility/BloomFilterAdapter.hpp @@ -0,0 +1,128 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#ifndef QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP +#define QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <utility> +#include <vector> + +#include "catalog/CatalogTypedefs.hpp" +#include "utility/BloomFilter.hpp" +#include "utility/Macros.hpp" + +#include "glog/logging.h" + +namespace quickstep { + +/** \addtogroup Utility + * @{ + */ + +class BloomFilterAdapter { + public: + BloomFilterAdapter(const std::vector<const BloomFilter*> &bloom_filters, + const std::vector<std::vector<attribute_id>> &attribute_ids) + : num_bloom_filters_(bloom_filters.size()) { + DCHECK_EQ(bloom_filters.size(), attribute_ids.size()); + + bloom_filter_entries_.reserve(num_bloom_filters_); + bloom_filter_entry_indices_.reserve(num_bloom_filters_); + + for (std::size_t i = 0; i < num_bloom_filters_; ++i) { + bloom_filter_entries_.emplace_back(bloom_filters[i], attribute_ids[i]); + bloom_filter_entry_indices_.emplace_back(i); + } + } + + template <typename ValueAccessorT> + inline bool miss(const ValueAccessorT *accessor) { + return missImpl<ValueAccessorT, true>(accessor); + } + + template <typename ValueAccessorT, bool adapt_filters> + inline bool missImpl(const ValueAccessorT *accessor) { + for (std::size_t i = 0; i < num_bloom_filters_; ++i) { + const std::size_t entry_idx = bloom_filter_entry_indices_[i]; + BloomFilterEntry &entry = bloom_filter_entries_[entry_idx]; + if (adapt_filters) { + ++entry.cnt; + } + + const BloomFilter *bloom_filter = entry.bloom_filter; + for (const attribute_id &attr_id : entry.attribute_ids) { + const std::pair<const void*, std::size_t> value_and_byte_length = + accessor->getUntypedValueAndByteLength(attr_id); + if (!bloom_filter->contains(static_cast<const std::uint8_t*>(value_and_byte_length.first), + value_and_byte_length.second)) { + if (adapt_filters) { + // Record miss + ++entry.miss; + + // Update entry order + if (i > 0) { + const std::size_t prev_entry_idx = bloom_filter_entry_indices_[i-1]; + if (entry.isBetterThan(bloom_filter_entries_[prev_entry_idx])) { + bloom_filter_entry_indices_[i-1] = entry_idx; + bloom_filter_entry_indices_[i] = prev_entry_idx; + } + } + } + return true; + } + } + } + return false; + } + + private: + struct BloomFilterEntry { + BloomFilterEntry(const BloomFilter *in_bloom_filter, + const std::vector<attribute_id> &in_attribute_ids) + : bloom_filter(in_bloom_filter), + attribute_ids(in_attribute_ids), + miss(0), + cnt(0) { + } + + inline bool isBetterThan(const BloomFilterEntry& other) { + return static_cast<std::uint64_t>(miss) * other.cnt + > static_cast<std::uint64_t>(cnt + 5) * (other.miss + 5); + } + + const BloomFilter *bloom_filter; + const std::vector<attribute_id> &attribute_ids; + std::uint32_t miss; + std::uint32_t cnt; + }; + + const std::size_t num_bloom_filters_; + std::vector<BloomFilterEntry> bloom_filter_entries_; + std::vector<std::size_t> bloom_filter_entry_indices_; + + DISALLOW_COPY_AND_ASSIGN(BloomFilterAdapter); +}; + +/** @} */ + +} // namespace quickstep + +#endif // QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/utility/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt index 2d3db8f..de0e737 100644 --- a/utility/CMakeLists.txt +++ b/utility/CMakeLists.txt @@ -159,6 +159,7 @@ add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp) add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp) add_library(quickstep_utility_BitVector ../empty_src.cpp BitVector.hpp) add_library(quickstep_utility_BloomFilter ../empty_src.cpp BloomFilter.hpp) +add_library(quickstep_utility_BloomFilterAdapter ../empty_src.cpp BloomFilterAdapter.hpp) add_library(quickstep_utility_BloomFilter_proto ${quickstep_utility_BloomFilter_proto_srcs} ${quickstep_utility_BloomFilter_proto_hdrs}) @@ -166,6 +167,7 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory. add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp) add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp) add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp) +add_library(quickstep_utility_EventProfiler EventProfiler.cpp EventProfiler.hpp) add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp) add_library(quickstep_utility_Glob Glob.cpp Glob.hpp) add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp) @@ -216,6 +218,10 @@ target_link_libraries(quickstep_utility_BloomFilter quickstep_threading_SpinSharedMutex quickstep_utility_BloomFilter_proto quickstep_utility_Macros) +target_link_libraries(quickstep_utility_BloomFilterAdapter + quickstep_catalog_CatalogTypedefs + quickstep_utility_BloomFilter + quickstep_utility_Macros) target_link_libraries(quickstep_utility_BloomFilter_proto ${PROTOBUF_LIBRARY}) target_link_libraries(quickstep_utility_CalculateInstalledMemory @@ -225,6 +231,8 @@ target_link_libraries(quickstep_utility_CheckSnprintf target_link_libraries(quickstep_utility_DAG glog quickstep_utility_Macros) +target_link_libraries(quickstep_utility_EventProfiler + quickstep_threading_Mutex) target_link_libraries(quickstep_utility_Glob glog) target_link_libraries(quickstep_utility_MemStream @@ -297,11 +305,13 @@ target_link_libraries(quickstep_utility quickstep_utility_BitManipulation quickstep_utility_BitVector quickstep_utility_BloomFilter + quickstep_utility_BloomFilterAdapter quickstep_utility_BloomFilter_proto quickstep_utility_CalculateInstalledMemory quickstep_utility_Cast quickstep_utility_CheckSnprintf quickstep_utility_DAG + quickstep_utility_EventProfiler quickstep_utility_EqualsAnyConstant quickstep_utility_Glob quickstep_utility_HashPair http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/utility/EventProfiler.cpp ---------------------------------------------------------------------- diff --git a/utility/EventProfiler.cpp b/utility/EventProfiler.cpp new file mode 100644 index 0000000..794c67d --- /dev/null +++ b/utility/EventProfiler.cpp @@ -0,0 +1,28 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#include "utility/EventProfiler.hpp" + +#include <cstddef> +#include <string> +#include <vector> + +namespace quickstep { + +EventProfiler simple_profiler; + +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/946332c3/utility/EventProfiler.hpp ---------------------------------------------------------------------- diff --git a/utility/EventProfiler.hpp b/utility/EventProfiler.hpp new file mode 100644 index 0000000..7ad9105 --- /dev/null +++ b/utility/EventProfiler.hpp @@ -0,0 +1,138 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#ifndef QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_ +#define QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_ + +#include <chrono> +#include <cstddef> +#include <cstring> +#include <ctime> +#include <iomanip> +#include <map> +#include <ostream> +#include <thread> +#include <type_traits> +#include <utility> +#include <vector> + +#include "threading/Mutex.hpp" + +#include "glog/logging.h" + +namespace quickstep { + +/** \addtogroup Utility + * @{ + */ + +using clock = std::chrono::steady_clock; + +class EventProfiler { + + public: + EventProfiler() + : zero_time(clock::now()) { + } + + struct EventInfo { + clock::time_point start_time; + clock::time_point end_time; + bool is_finished; + + explicit EventInfo(const clock::time_point &start_time_in) + : start_time(start_time_in), + is_finished(false) { + } + + EventInfo() + : start_time(clock::now()), + is_finished(false) { + } + + inline void endEvent() { + end_time = clock::now(); + is_finished = true; + } + }; + + struct EventContainer { + inline void startEvent(const std::string &tag) { + events[tag].emplace_back(clock::now()); + } + + inline void endEvent(const std::string &tag) { + auto &event_info = events.at(tag).back(); + event_info.is_finished = true; + event_info.end_time = clock::now(); + } + + inline std::vector<EventInfo> *getEventLine(const std::string &tag) { + return &events[tag]; + } + + std::map<std::string, std::vector<EventInfo>> events; + }; + + EventContainer *getContainer() { + MutexLock lock(mutex_); + return &thread_map_[std::this_thread::get_id()]; + } + + void writeToStream(std::ostream &os) const { + time_t rawtime; + time(&rawtime); + char event_id[32]; + strftime(event_id, sizeof event_id, "%Y-%m-%d %H:%M:%S", localtime(&rawtime)); + + int thread_id = 0; + for (const auto &thread_ctx : thread_map_) { + for (const auto &event_group : thread_ctx.second.events) { + for (const auto &event_info : event_group.second) { + CHECK(event_info.is_finished) << "Unfinished profiling event"; + + os << std::setprecision(12) + << event_id << "," + << thread_id << "," << event_group.first << "," + << std::chrono::duration<double>(event_info.start_time - zero_time).count() + << "," + << std::chrono::duration<double>(event_info.end_time - zero_time).count() + << "\n"; + } + } + ++thread_id; + } + } + + void clear() { + zero_time = clock::now(); + thread_map_.clear(); + } + + private: + clock::time_point zero_time; + std::map<std::thread::id, EventContainer> thread_map_; + Mutex mutex_; +}; + +extern EventProfiler simple_profiler; + +/** @} */ + +} // namespace quickstep + +#endif // QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_