http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionDictionary.cpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionary.cpp b/compression/CompressionDictionary.cpp deleted file mode 100644 index 72c183f..0000000 --- a/compression/CompressionDictionary.cpp +++ /dev/null @@ -1,415 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#include "compression/CompressionDictionary.hpp" - -#include <cstddef> -#include <cstdint> -#include <iterator> -#include <limits> -#include <utility> - -#include "compression/CompressionDictionaryLite.hpp" -#include "types/TypedValue.hpp" -#include "types/operations/comparisons/ComparisonID.hpp" -#include "types/operations/comparisons/ComparisonUtil.hpp" -#include "types/operations/comparisons/EqualComparison.hpp" -#include "utility/Macros.hpp" - -#include "glog/logging.h" - -using std::numeric_limits; -using std::pair; -using std::size_t; -using std::uint32_t; - -namespace quickstep { - -namespace compression_dictionary_internal { - -template <bool variable_length> -class CompressionDictionaryIterator : public std::iterator<std::random_access_iterator_tag, const void*> { - public: - typedef std::iterator<std::random_access_iterator_tag, const void*>::difference_type difference_type; - - CompressionDictionaryIterator(const CompressionDictionary &dictionary, const uint32_t code) - : dictionary_(&dictionary), - code_(code) { - } - - CompressionDictionaryIterator() - : dictionary_(nullptr), - code_(0) { - } - - CompressionDictionaryIterator(const CompressionDictionaryIterator &other) - : dictionary_(other.dictionary_), - code_(other.code_) { - } - - CompressionDictionaryIterator& operator=(const CompressionDictionaryIterator& other) = default; - - // Comparisons. - inline bool operator==(const CompressionDictionaryIterator& other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ == other.code_; - } - - inline bool operator!=(const CompressionDictionaryIterator& other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ != other.code_; - } - - inline bool operator<(const CompressionDictionaryIterator& other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ < other.code_; - } - - inline bool operator<=(const CompressionDictionaryIterator& other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ <= other.code_; - } - - inline bool operator>(const CompressionDictionaryIterator& other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ > other.code_; - } - - inline bool operator>=(const CompressionDictionaryIterator& other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ >= other.code_; - } - - // Increment/decrement. - inline CompressionDictionaryIterator& operator++() { - ++code_; - return *this; - } - - CompressionDictionaryIterator operator++(int) { - CompressionDictionaryIterator result(*this); - ++(*this); - return result; - } - - inline CompressionDictionaryIterator& operator--() { - --code_; - return *this; - } - - CompressionDictionaryIterator operator--(int) { - CompressionDictionaryIterator result(*this); - --(*this); - return result; - } - - // Compound assignment. - inline CompressionDictionaryIterator& operator+=(difference_type n) { - code_ += n; - return *this; - } - - inline CompressionDictionaryIterator& operator-=(difference_type n) { - code_ -= n; - return *this; - } - - // Note: + operator with difference_type on the left is not defined. - CompressionDictionaryIterator operator+(difference_type n) const { - return CompressionDictionaryIterator(dictionary_, code_ + n); - } - - CompressionDictionaryIterator operator-(difference_type n) const { - return CompressionDictionaryIterator(dictionary_, code_ - n); - } - - difference_type operator-(const CompressionDictionaryIterator &other) const { - DCHECK_EQ(dictionary_, other.dictionary_); - return code_ - other.code_; - } - - // Dereference. - inline const void* operator*() const { - DCHECK(dictionary_ != nullptr); - if (variable_length) { - return dictionary_->variableLengthGetUntypedValueHelper<std::uint32_t>(code_); - } else { - return dictionary_->fixedLengthGetUntypedValueHelper<std::uint32_t>(code_); - } - } - - inline const void** operator->() const { - FATAL_ERROR("-> dereference operator unimplemented for CompressionDictionaryIterator."); - } - - const void* operator[](difference_type n) const { - DCHECK(dictionary_ != nullptr); - if (variable_length) { - return dictionary_->variableLengthGetUntypedValueHelper<std::uint32_t>(code_ + n); - } else { - return dictionary_->fixedLengthGetUntypedValueHelper<std::uint32_t>(code_ + n); - } - } - - uint32_t getCode() const { - return code_; - } - - private: - const CompressionDictionary *dictionary_; - uint32_t code_; -}; - -} // namespace compression_dictionary_internal - -using compression_dictionary_internal::CompressionDictionaryIterator; - -uint32_t CompressionDictionary::getCodeForUntypedValue(const void *value) const { - if (value == nullptr) { - return getNullCode() == numeric_limits<uint32_t>::max() ? number_of_codes_including_null_ - : getNullCode(); - } - - uint32_t candidate_code = getLowerBoundCodeForUntypedValue(value); - if (candidate_code >= *static_cast<const uint32_t*>(dictionary_memory_)) { - return number_of_codes_including_null_; - } - - if (CheckUntypedValuesEqual(type_, value, getUntypedValueForCode(candidate_code))) { - return candidate_code; - } else { - return number_of_codes_including_null_; - } -} - -std::pair<uint32_t, uint32_t> CompressionDictionary::getLimitCodesForComparisonUntyped( - const ComparisonID comp, - const void *value) const { - if (value == nullptr) { - return pair<uint32_t, uint32_t>(number_of_codes_including_null_, - number_of_codes_including_null_); - } - - pair<uint32_t, uint32_t> limit_codes; - switch (comp) { - case ComparisonID::kEqual: - limit_codes.first = getCodeForUntypedValue(value); - limit_codes.second = (limit_codes.first == number_of_codes_including_null_) - ? limit_codes.first - : limit_codes.first + 1; - break; - case ComparisonID::kNotEqual: - LOG(FATAL) << "Called CompressionDictionary::getLimitCodesForComparisonUntyped() " - << "with comparison kNotEqual, which is not allowed."; - case ComparisonID::kLess: - limit_codes.first = 0; - limit_codes.second = getLowerBoundCodeForUntypedValue(value, true); - break; - case ComparisonID::kLessOrEqual: - limit_codes.first = 0; - limit_codes.second = getUpperBoundCodeForUntypedValue(value); - break; - case ComparisonID::kGreater: - limit_codes.first = getUpperBoundCodeForUntypedValue(value); - limit_codes.second = *static_cast<const uint32_t*>(dictionary_memory_); - break; - case ComparisonID::kGreaterOrEqual: - limit_codes.first = getLowerBoundCodeForUntypedValue(value); - limit_codes.second = *static_cast<const uint32_t*>(dictionary_memory_); - break; - default: - LOG(FATAL) << "Unknown comparison in CompressionDictionary::getLimitCodesForComparisonUntyped()."; - } - - return limit_codes; -} - -std::uint32_t CompressionDictionary::getCodeForDifferentTypedValue(const TypedValue &value, - const Type &value_type) const { - if (value.isNull()) { - return getNullCode() == numeric_limits<uint32_t>::max() ? number_of_codes_including_null_ - : getNullCode(); - } - - uint32_t candidate_code = getLowerBoundCodeForDifferentTypedValue(value, value_type); - if (candidate_code >= *static_cast<const uint32_t*>(dictionary_memory_)) { - return candidate_code; - } - - if (EqualComparison::Instance().compareTypedValuesChecked( - value, value_type, - getTypedValueForCode(candidate_code), type_)) { - return candidate_code; - } else { - return number_of_codes_including_null_; - } -} - -std::pair<uint32_t, uint32_t> CompressionDictionary::getLimitCodesForComparisonDifferentTyped( - const ComparisonID comp, - const TypedValue &value, - const Type &value_type) const { - if (value.isNull()) { - return pair<uint32_t, uint32_t>(number_of_codes_including_null_, - number_of_codes_including_null_); - } - - pair<uint32_t, uint32_t> limit_codes; - switch (comp) { - case ComparisonID::kEqual: - limit_codes.first = getCodeForDifferentTypedValue(value, value_type); - limit_codes.second = (limit_codes.first == number_of_codes_including_null_) - ? limit_codes.first - : limit_codes.first + 1; - break; - case ComparisonID::kNotEqual: - LOG(FATAL) << "Called CompressionDictionary::getLimitCodesForComparisonTyped() " - << "with comparison kNotEqual, which is not allowed."; - case ComparisonID::kLess: - limit_codes.first = 0; - limit_codes.second = getLowerBoundCodeForDifferentTypedValue(value, value_type, true); - break; - case ComparisonID::kLessOrEqual: - limit_codes.first = 0; - limit_codes.second = getUpperBoundCodeForDifferentTypedValue(value, value_type); - break; - case ComparisonID::kGreater: - limit_codes.first = getUpperBoundCodeForDifferentTypedValue(value, value_type); - limit_codes.second = *static_cast<const uint32_t*>(dictionary_memory_); - break; - case ComparisonID::kGreaterOrEqual: - limit_codes.first = getLowerBoundCodeForDifferentTypedValue(value, value_type); - limit_codes.second = *static_cast<const uint32_t*>(dictionary_memory_); - break; - default: - LOG(FATAL) << "Unknown comparison in CompressionDictionary::getLimitCodesForComparisonTyped()."; - } - - return limit_codes; -} - -uint32_t CompressionDictionary::getLowerBoundCodeForUntypedValue(const void *value, - const bool ignore_null_code) const { - uint32_t code; - if (type_is_variable_length_) { - CompressionDictionaryIterator<true> begin_it(*this, 0); - CompressionDictionaryIterator<true> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - code = GetBoundForUntypedValue<CompressionDictionaryIterator<true>, LowerBoundFunctor>( - type_, - begin_it, - end_it, - value).getCode(); - } else { - CompressionDictionaryIterator<false> begin_it(*this, 0); - CompressionDictionaryIterator<false> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - code = GetBoundForUntypedValue<CompressionDictionaryIterator<false>, LowerBoundFunctor>( - type_, - begin_it, - end_it, - value).getCode(); - } - - return !ignore_null_code && (code == *static_cast<const uint32_t*>(dictionary_memory_)) - ? number_of_codes_including_null_ - : code; -} - -uint32_t CompressionDictionary::getUpperBoundCodeForUntypedValue(const void *value) const { - if (type_is_variable_length_) { - CompressionDictionaryIterator<true> begin_it(*this, 0); - CompressionDictionaryIterator<true> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - return GetBoundForUntypedValue<CompressionDictionaryIterator<true>, UpperBoundFunctor>( - type_, - begin_it, - end_it, - value).getCode(); - } else { - CompressionDictionaryIterator<false> begin_it(*this, 0); - CompressionDictionaryIterator<false> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - return GetBoundForUntypedValue<CompressionDictionaryIterator<false>, UpperBoundFunctor>( - type_, - begin_it, - end_it, - value).getCode(); - } -} - -uint32_t CompressionDictionary::getLowerBoundCodeForDifferentTypedValue( - const TypedValue &value, - const Type &value_type, - const bool ignore_null_code) const { - uint32_t code; - if (type_is_variable_length_) { - CompressionDictionaryIterator<true> begin_it(*this, 0); - CompressionDictionaryIterator<true> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - code = GetBoundForDifferentTypedValue<CompressionDictionaryIterator<true>, LowerBoundFunctor>( - type_, - begin_it, - end_it, - value, - value_type).getCode(); - } else { - CompressionDictionaryIterator<false> begin_it(*this, 0); - CompressionDictionaryIterator<false> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - code = GetBoundForDifferentTypedValue<CompressionDictionaryIterator<false>, LowerBoundFunctor>( - type_, - begin_it, - end_it, - value, - value_type).getCode(); - } - - return !ignore_null_code && (code == *static_cast<const uint32_t*>(dictionary_memory_)) - ? number_of_codes_including_null_ - : code; -} - -uint32_t CompressionDictionary::getUpperBoundCodeForDifferentTypedValue( - const TypedValue &value, - const Type &value_type) const { - if (type_is_variable_length_) { - CompressionDictionaryIterator<true> begin_it(*this, 0); - CompressionDictionaryIterator<true> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - return GetBoundForDifferentTypedValue<CompressionDictionaryIterator<true>, UpperBoundFunctor>( - type_, - begin_it, - end_it, - value, - value_type).getCode(); - } else { - CompressionDictionaryIterator<false> begin_it(*this, 0); - CompressionDictionaryIterator<false> end_it(*this, - *static_cast<const uint32_t*>(dictionary_memory_)); - return GetBoundForDifferentTypedValue<CompressionDictionaryIterator<false>, UpperBoundFunctor>( - type_, - begin_it, - end_it, - value, - value_type).getCode(); - } -} - -} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionDictionary.hpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionary.hpp b/compression/CompressionDictionary.hpp deleted file mode 100644 index 27592e8..0000000 --- a/compression/CompressionDictionary.hpp +++ /dev/null @@ -1,260 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#ifndef QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_HPP_ -#define QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_HPP_ - -#include <cstddef> -#include <cstdint> -#include <limits> -#include <utility> - -#include "compression/CompressionDictionaryLite.hpp" -#include "types/Type.hpp" -#include "types/TypedValue.hpp" -#include "types/operations/comparisons/ComparisonID.hpp" -#include "utility/Macros.hpp" - -#include "glog/logging.h" - -namespace quickstep { - -namespace compression_dictionary_internal { -template <bool variable_length_type> class CompressionDictionaryIterator; -} - -/** \addtogroup Compression - * @{ - */ - -// In-memory format of a CompressionDictionary is as follows: -// std::uint32_t num_codes - number of codes/values stored in dictionary -// (excluding NULL) -// std::uint32_t null_code - a code representing a NULL value (equal to -// num_codes if NULL is present, or the maximum -// possible value of std::uint32_t if NULL is not -// present) -// For fixed-length types: -// array of num_codes values -// For variable-length types: -// array of num_codes offsets of variable-length values -// variable-length storage region, with values stored at offsets specified -// by previous array. - -/** - * @brief Extends the basic interface provided by CompressionDictionaryLite - * with methods for inverse lookup. - **/ -class CompressionDictionary : public CompressionDictionaryLite { - public: - /** - * @brief Constructor. - * - * @param type The type of values being compressed. LessComparison must be - * applicable to this Type. - * @param dictionary_memory The memory location of the physical dictionary. - * @param dictionary_memory_size The size (in bytes) of the physical - * dictionary at dictionary_memory. - **/ - CompressionDictionary(const Type &type, - const void *dictionary_memory, - const std::size_t dictionary_memory_size) - : CompressionDictionaryLite(type, dictionary_memory, dictionary_memory_size) { - } - - virtual ~CompressionDictionary() { - } - - /** - * @brief Get the compressed code that represents the specified untyped - * value. - * @note This uses a binary search to find the appropriate code. It runs in - * O(log(n)) time. - * - * @param value An untyped pointer to a value, which must be of the exact - * same Type as the Type used to construct this dictionary. - * @return The code for value in this dictionary, or the value of - * numberOfCodes() (the maximum code plus one) if value is not - * contained in this dictionary. - **/ - std::uint32_t getCodeForUntypedValue(const void *value) const; - - /** - * @brief Get the compressed code that represents the specified typed value. - * @note This uses a binary search to find the appropriate code. It runs in - * O(log(n)) time. - * - * @param value A typed value, which can be either the exact same Type as - * the values in this dictionary, or another Type which is comparable - * according to LessComparison. - * @param value_type The Type that value belongs to. - * @return The code for value in this dictionary, or the value of - * numberOfCodes() (the maximum code plus one) if value is not - * contained in this dictionary. - **/ - std::uint32_t getCodeForTypedValue(const TypedValue &value, - const Type &value_type) const { - if (value.isNull()) { - return getNullCode() == std::numeric_limits<std::uint32_t>::max() ? number_of_codes_including_null_ - : getNullCode(); - } else if (value_type.isSubsumedBy(type_)) { - return getCodeForUntypedValue(value.getDataPtr()); - } else { - return getCodeForDifferentTypedValue(value, value_type); - } - } - - /** - * @brief Find the first code which is not less than the specified untyped - * value, similar to std::lower_bound(). - * @warning value must not be NULL. - * - * @param value An untyped pointer to a value, which must be of the exact - * same Type as the Type used to construct this dictionary. - * @return The first code whose corresponding uncompressed value is not less - * than value. May return numberOfCodes() if every value in the - * dictionary is less than value. - **/ - std::uint32_t getLowerBoundCodeForUntypedValue(const void *value, - const bool ignore_null_code = false) const; - - /** - * @brief Find the first code which is not less than the specified typed - * value, similar to std::lower_bound(). - * @warning value must not be NULL. - * - * @param value A typed value, which can be either the exact same Type as - * the values in this dictionary, or another Type which is comparable - * according to LessComparison. - * @param value_type The Type that value belongs to. - * @return The first code whose corresponding uncompressed value is not less - * than value. May return numberOfCodes() if every value in the - * dictionary is less than value. - **/ - std::uint32_t getLowerBoundCodeForTypedValue(const TypedValue &value, - const Type &value_type, - const bool ignore_null_code = false) const { - DCHECK(!value.isNull()); - if (value_type.isSubsumedBy(type_)) { - return getLowerBoundCodeForUntypedValue(value.getDataPtr(), ignore_null_code); - } else { - return getLowerBoundCodeForDifferentTypedValue(value, value_type, ignore_null_code); - } - } - - /** - * @brief Find the first code which is greater than the specified untyped - * value, similar to std::upper_bound(). - * @warning value must not be NULL. - * - * @param value An untyped pointer to a value, which must be of the exact - * same Type as the Type used to construct this dictionary. - * @return The first code whose corresponding uncompressed value is greater - * than value. May return numberOfCodes() if every value in the - * dictionary is less than or equal to value. - **/ - std::uint32_t getUpperBoundCodeForUntypedValue(const void *value) const; - - /** - * @brief Find the first code which is greater than the specified typed - * value, similar to std::upper_bound(). - * @warning value must not be NULL. - * - * @param value A typed value, which can be either the exact same Type as - * the values in this dictionary, or another Type which is comparable - * according to LessComparison. - * @param value_type The Type that value belongs to. - * @return The first code whose corresponding uncompressed value is greater - * than value. May return numberOfCodes() if every value in the - * dictionary is less than or equal to value. - **/ - std::uint32_t getUpperBoundCodeForTypedValue(const TypedValue &value, - const Type &value_type) const { - DCHECK(!value.isNull()); - if (value_type.isSubsumedBy(type_)) { - return getUpperBoundCodeForUntypedValue(value.getDataPtr()); - } else { - return getUpperBoundCodeForDifferentTypedValue(value, value_type); - } - } - - /** - * @brief Determine the range of codes that match a specified comparison with - * a specified untyped value. - * - * @param comp The comparison to evaluate. - * @param value An untyped pointer to a value, which must be of the exact - * same Type as the Type used to construct this dictionary. - * @return The limits of the range of codes which match the predicate - * "coded-value comp value". The range is [first, second) (i.e. it - * is inclusive of first but not second). - **/ - std::pair<std::uint32_t, std::uint32_t> getLimitCodesForComparisonUntyped( - const ComparisonID comp, - const void *value) const; - - /** - * @brief Determine the range of codes that match a specified comparison with - * a specified typed value. - * - * @param comp The comparison to evaluate. - * @param value A typed value, which can be either the exact same Type as - * the values in this dictionary, or another Type which is comparable - * according to LessComparison. - * @param value_type The Type that value belongs to. - * @return The limits of the range of codes which match the predicate - * "coded-value comp value". The range is [first, second) (i.e. it - * is inclusive of first but not second). - **/ - std::pair<std::uint32_t, std::uint32_t> getLimitCodesForComparisonTyped( - const ComparisonID comp, - const TypedValue &value, - const Type &value_type) const { - if (value_type.isSubsumedBy(type_)) { - return getLimitCodesForComparisonUntyped(comp, - value.isNull() ? nullptr : value.getDataPtr()); - } else { - return getLimitCodesForComparisonDifferentTyped(comp, value, value_type); - } - } - - private: - friend class compression_dictionary_internal::CompressionDictionaryIterator<false>; - friend class compression_dictionary_internal::CompressionDictionaryIterator<true>; - - std::uint32_t getCodeForDifferentTypedValue(const TypedValue &value, - const Type &value_type) const; - std::uint32_t getLowerBoundCodeForDifferentTypedValue(const TypedValue &value, - const Type &value_type, - const bool ignore_null_code = false) const; - std::uint32_t getUpperBoundCodeForDifferentTypedValue(const TypedValue &value, - const Type &value_type) const; - std::pair<std::uint32_t, std::uint32_t> getLimitCodesForComparisonDifferentTyped( - const ComparisonID comp, - const TypedValue &value, - const Type &value_type) const; - - DISALLOW_COPY_AND_ASSIGN(CompressionDictionary); -}; - -/** @} */ - -} // namespace quickstep - -#endif // QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionDictionaryBuilder.cpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionaryBuilder.cpp b/compression/CompressionDictionaryBuilder.cpp deleted file mode 100644 index 905af91..0000000 --- a/compression/CompressionDictionaryBuilder.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#include "compression/CompressionDictionaryBuilder.hpp" - -#include <cstddef> -#include <cstdint> -#include <limits> -#include <unordered_map> -#include <utility> -#include <vector> - -#include "types/Type.hpp" -#include "types/TypedValue.hpp" -#include "types/operations/comparisons/ComparisonUtil.hpp" - -#include "glog/logging.h" - -using std::numeric_limits; -using std::pair; -using std::uint32_t; -using std::unordered_map; - -namespace quickstep { - -namespace { - -inline void CopyFixedLengthValuesToLocation(const std::vector<TypedValue> &values, - const std::size_t type_length, - char *location) { - for (const TypedValue &value : values) { - value.copyInto(location); - location += type_length; - } -} - -inline void CopyVariableLengthValuesToLocation(const std::vector<TypedValue> &values, - char *location) { - uint32_t *offset_array_ptr = reinterpret_cast<uint32_t*>(location); - char *values_location = static_cast<char*>(location) - + values.size() * sizeof(uint32_t); - uint32_t value_offset = 0; - for (const TypedValue &value : values) { - *offset_array_ptr = value_offset; - value.copyInto(values_location + value_offset); - - ++offset_array_ptr; - value_offset += value.getDataSize(); - } -} - -} // namespace - -CompressionDictionaryBuilder::CompressionDictionaryBuilder(const Type &type) - : type_(type), - type_is_nullable_(type.isNullable()), - fixed_type_length_(type.isVariableLength() ? 0 : type.maximumByteLength()), - last_inserted_iterator_(value_set_.end()), - num_values_(0), - code_length_bits_(0), - total_value_size_(0), - null_value_present_(false), - last_insert_was_null_(false), - may_contain_out_of_line_literal_(false), - built_(false) { -} - -void CompressionDictionaryBuilder::buildDictionary(void *location) { - *static_cast<uint32_t*>(location) = num_values_ - null_value_present_; - *(static_cast<uint32_t*>(location) + 1) - = null_value_present_ ? num_values_ - 1 - : std::numeric_limits<uint32_t>::max(); - - char *copy_location = static_cast<char*>(location) + 2 * sizeof(uint32_t); - - std::vector<TypedValue> sorted_values; - sorted_values.reserve(value_set_.size()); - if (may_contain_out_of_line_literal_ && !TypedValue::RepresentedInline(type_.getTypeID())) { - // Avoid an intermediate copy of out-of-line data. - for (const pair<const TypedValue, uint32_t> &value_pair : value_set_) { - sorted_values.emplace_back(value_pair.first.makeReferenceToThis()); - } - } else { - for (const pair<const TypedValue, uint32_t> &value_pair : value_set_) { - sorted_values.emplace_back(value_pair.first); - } - } - SortValues(type_, sorted_values.begin(), sorted_values.end()); - - // Update 'value_set_' with the actual codes used for each value so that - // getCodeForValue() can do fast lookups. - for (uint32_t code = 0; code < sorted_values.size(); ++code) { - value_set_.find(sorted_values[code])->second = code; - } - - if (fixed_type_length_) { - CopyFixedLengthValuesToLocation(sorted_values, fixed_type_length_, copy_location); - } else { - CopyVariableLengthValuesToLocation(sorted_values, copy_location); - } - - built_ = true; -} - -bool CompressionDictionaryBuilder::insertEntryInternal(const TypedValue &value, - bool by_reference) { - DCHECK(!built_); - DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); - - if (type_is_nullable_ && value.isNull()) { - last_insert_was_null_ = !null_value_present_; - null_value_present_ = true; - if (last_insert_was_null_) { - ++num_values_; - if ((code_length_bits_ == 0) || (num_values_ == (1u << code_length_bits_) + 1)) { - ++code_length_bits_; - } - } - return last_insert_was_null_; - } - - DCHECK(!value.isNull()); - if (num_values_ == numeric_limits<uint32_t>::max()) { - LOG(FATAL) << "Attempted to insert a value into a CompressionDictionaryBuilder which " - << "would cause it to overflow the limit of " - << numeric_limits<uint32_t>::max() << " entries."; - } - - pair<unordered_map<TypedValue, - uint32_t, - TypedValueHasher, - TypedValueEqualChecker>::iterator, - bool> insertion_result; - if (by_reference) { - insertion_result = value_set_.emplace(value.makeReferenceToThis(), - numeric_limits<uint32_t>::max()); - } else { - insertion_result = value_set_.emplace(value, - numeric_limits<uint32_t>::max()); - } - if (!insertion_result.second) { - return false; - } - if (!by_reference) { - // const_cast here is safe: TypedValue will be modified in-place and - // hash/compare the same. - const_cast<TypedValue&>(insertion_result.first->first).ensureNotReference(); - } - last_inserted_iterator_ = insertion_result.first; - const std::size_t value_length = (fixed_type_length_ == 0) ? insertion_result.first->first.getDataSize() - : fixed_type_length_; - - // Update state. - last_insert_was_null_ = false; - ++num_values_; - if ((code_length_bits_ == 0) || (num_values_ == (1u << code_length_bits_) + 1)) { - ++code_length_bits_; - } - total_value_size_ += value_length; - if (total_value_size_ > numeric_limits<uint32_t>::max()) { - LOG(FATAL) << "Attempted to insert a value into a CompressionDictionaryBuilder which would " - << "overflow the limit of " - << numeric_limits<uint32_t>::max() << " total bytes."; - } - - return true; -} - -void CompressionDictionaryBuilder::undoLastInsert() { - DCHECK(!built_); - if (last_insert_was_null_) { - null_value_present_ = false; - last_insert_was_null_ = false; - } else { - DCHECK(last_inserted_iterator_ != value_set_.end()); - total_value_size_ -= (fixed_type_length_ == 0) ? last_inserted_iterator_->first.getDataSize() - : fixed_type_length_; - value_set_.erase(last_inserted_iterator_); - last_inserted_iterator_ = value_set_.end(); - } - - // Update state. - --num_values_; - if (num_values_ == 0) { - code_length_bits_ = 0; - } else if ((code_length_bits_ > 1) && (num_values_ == (1u << (code_length_bits_ - 1)))) { - --code_length_bits_; - } -} - -} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionDictionaryBuilder.hpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionaryBuilder.hpp b/compression/CompressionDictionaryBuilder.hpp deleted file mode 100644 index cad6852..0000000 --- a/compression/CompressionDictionaryBuilder.hpp +++ /dev/null @@ -1,245 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#ifndef QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_BUILDER_HPP_ -#define QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_BUILDER_HPP_ - -#include <cstddef> -#include <cstdint> -#include <unordered_map> - -#include "types/Type.hpp" -#include "types/TypedValue.hpp" -#include "utility/Macros.hpp" - -#include "glog/logging.h" - -namespace quickstep { - -/** \addtogroup Compression - * @{ - */ - -/** - * @brief An object which accumulates typed values and builds a physical - * dictionary for a CompressionDictionary object. Usable with both - * fixed-length and variable-length types. - **/ -class CompressionDictionaryBuilder { - public: - /** - * @brief Constructor. - * - * @param type The Type to build a CompressionDictionary for. Must be - * orderable by LessComparison. - **/ - explicit CompressionDictionaryBuilder(const Type &type); - - /** - * @brief Destructor. - **/ - ~CompressionDictionaryBuilder() { - } - - /** - * @brief Get the number of entries (unique values/codes) in the dictionary - * being built. - * - * @return The number of entries in the dictionary. - **/ - inline std::uint32_t numberOfEntries() const { - return num_values_; - } - - /** - * @brief Get the number of bits needed to represent a code in the dictionary - * being built. - * - * @return The length, in bits, of codes for the dictionary. - **/ - inline std::uint8_t codeLengthBits() const { - return code_length_bits_; - } - - /** - * @brief Get the number of bytes used to represent a code in the dictionary - * being build when all codes are padded up to the next power-of-two - * number of bytes. - * - * @return The length, in bytes, of codes padded up to a power-of-two bytes. - **/ - inline std::uint8_t codeLengthPaddedBytes() const { - if (code_length_bits_ < 9) { - return 1; - } else if (code_length_bits_ < 17) { - return 2; - } else { - return 4; - } - } - - /** - * @brief Get the number of bytes needed to store the physical dictionary - * being built. - * - * @return The size, in bytes, of the dictionary. - **/ - inline std::size_t dictionarySizeBytes() const { - if (fixed_type_length_) { - return 2 * sizeof(std::uint32_t) + total_value_size_; - } else { - return (num_values_ + 2 - null_value_present_) * sizeof(std::uint32_t) - + total_value_size_; - } - } - - /** - * @brief Determine if the dictionary being built contains a particular - * value. - **/ - bool containsValue(const TypedValue &value) const { - DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); - if (value.isNull()) { - return null_value_present_; - } - return value_set_.find(value) != value_set_.end(); - } - - /** - * @brief Lookup the code for a particular value in a dictionary that has - * been built by this CompressionDictionaryBuilder. - * @note This method uses a hash-table lookup for O(1) constant time - * performance in the general case, in contrast to the O(log(n)) - * lookup performance of CompressionDictionary::getCodeForTypedValue(). - * @warning Only call this AFTER buildDictionary(). - * - * @param value A previously-inserted value to look up. - * @return The code that maps to value. - **/ - inline std::uint32_t getCodeForValue(const TypedValue &value) const { - DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); - DCHECK(containsValue(value)); - DCHECK(built_); - if (value.isNull()) { - return num_values_ - 1; - } - return value_set_.find(value)->second; - } - - /** - * @brief Construct a physical dictionary in the specified memory location. - * - * @param location The memory location where the physical dictionary should - * be built. Must have dictionarySizeBytes() available to write at - * location. - **/ - void buildDictionary(void *location); - - /** - * @brief Add a value to the dictionary being built. - * @note This method makes a copy of the value passed in. If the caller can - * guarantee that value remains in existence for the life of this - * CompressionDictionaryBuilder, it is more memory-efficient to use - * insertEntryByReference() instead. - * - * @param value A typed value to add to the dictionary. - * @return True if value has been added, false if it was already present and - * the dictionary was not modified. - **/ - bool insertEntry(const TypedValue &value) { - DCHECK(!built_); - // This is used in buildDictionary() to avoid an intermediate copy of - // out-of-line data. - may_contain_out_of_line_literal_ = true; - return insertEntryInternal(value, false); - } - - /** - * @brief Add a value to the dictionary being built without copying it. - * @warning The caller must ensure that value is not deleted until after - * done using this CompressionDictionaryBuilder. - * - * @param value A typed value to add to the dictionary. - * @return True if value has been added, false if it was already present and - * the dictionary was not modified. - **/ - bool insertEntryByReference(const TypedValue &value) { - DCHECK(!built_); - return insertEntryInternal(value, true); - } - - /** - * @brief Remove the last entry successfully added to the dictionary via - * insertEntry(), reducing the dictionary size and potentially - * reducing the code length in bits. - **/ - void undoLastInsert(); - - private: - struct TypedValueHasher { - inline std::size_t operator()(const TypedValue &value) const { - return value.getHash(); - } - }; - - struct TypedValueEqualChecker { - inline bool operator()(const TypedValue &left, const TypedValue &right) const { - return left.fastEqualCheck(right); - } - }; - - bool insertEntryInternal(const TypedValue &value, bool by_reference); - - const Type &type_; - const bool type_is_nullable_; - std::size_t fixed_type_length_; // 0 for variable-length types. - - // Initially, the uint32_t value associated with each key is simply the max - // for uint32_t. buildDictionary() updates values to be the actual compressed - // codes for each value so that getCodeForValue() can do fast lookups. - std::unordered_map<TypedValue, - std::uint32_t, - TypedValueHasher, - TypedValueEqualChecker> - value_set_; - std::unordered_map<TypedValue, - std::uint32_t, - TypedValueHasher, - TypedValueEqualChecker>::iterator - last_inserted_iterator_; - - std::uint32_t num_values_; - std::uint8_t code_length_bits_; - std::size_t total_value_size_; - bool null_value_present_; - bool last_insert_was_null_; - bool may_contain_out_of_line_literal_; - - // Keeps track of whether buildDictionary() has been called for - // error-checking purposes. - bool built_; - - DISALLOW_COPY_AND_ASSIGN(CompressionDictionaryBuilder); -}; - -/** @} */ - -} // namespace quickstep - -#endif // QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_BUILDER_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionDictionaryLite.cpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionaryLite.cpp b/compression/CompressionDictionaryLite.cpp deleted file mode 100644 index d21d43c..0000000 --- a/compression/CompressionDictionaryLite.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#include "compression/CompressionDictionaryLite.hpp" - -#include <cstddef> -#include <cstdint> - -#include "types/Type.hpp" - -#include "glog/logging.h" - -using std::size_t; -using std::uint32_t; - -namespace quickstep { - -CompressionDictionaryLite::CompressionDictionaryLite( - const Type &type, - const void *dictionary_memory, - const std::size_t dictionary_memory_size) - : type_(type), - type_is_variable_length_(type_.isVariableLength()), - dictionary_memory_(dictionary_memory), - dictionary_memory_size_(dictionary_memory_size), - number_of_codes_including_null_(*static_cast<const uint32_t*>(dictionary_memory_)), - type_fixed_byte_length_(type_.maximumByteLength()) { - uint32_t non_null_codes = number_of_codes_including_null_; - if (*(static_cast<const uint32_t*>(dictionary_memory_) + 1) - == number_of_codes_including_null_) { - ++number_of_codes_including_null_; - } - - for (code_length_bits_ = 32; code_length_bits_ > 0; --code_length_bits_) { - if ((number_of_codes_including_null_ - 1) >> (code_length_bits_ - 1)) { - break; - } - } - - if (type_is_variable_length_) { - if (dictionary_memory_size_ < - 2 * sizeof(uint32_t) + non_null_codes * (sizeof(uint32_t) + type_.minimumByteLength())) { - LOG(FATAL) << "Attempted to create a CompressionDictionary with " - << dictionary_memory_size_ << " bytes of memory, which is insufficient for " - << non_null_codes << " entries of type " << type_.getName() << "."; - } - - variable_length_data_region_ = static_cast<const char*>(dictionary_memory_) - + (non_null_codes + 2) * sizeof(uint32_t); - - DCHECK(paranoidOffsetsCheck()); - } else { - if (non_null_codes * type_fixed_byte_length_ + 2 * sizeof(uint32_t) - < dictionary_memory_size_) { - LOG(FATAL) << "Attempted to create a CompressionDictionary with " - << dictionary_memory_size_ << " bytes of memory, which is insufficient for " - << non_null_codes << " entries of type " << type_.getName() << "."; - } - } - - // NOTE(chasseur): If dictionary_memory_size_ is larger than the required - // amount of memory, it's not strictly an error, but there will be wasted - // space. -} - -CompressionDictionaryLite::~CompressionDictionaryLite() { -} - -bool CompressionDictionaryLite::paranoidOffsetsCheck() const { - uint32_t num_codes = *static_cast<const uint32_t*>(dictionary_memory_); - size_t variable_length_offset = (num_codes + 2) * sizeof(uint32_t); - const uint32_t *offsets_array = static_cast<const uint32_t*>(dictionary_memory_) + 2; - - size_t last_offset = variable_length_offset - 1; - for (uint32_t code = 0; code < num_codes; ++code) { - size_t value_offset = variable_length_offset + offsets_array[code]; - if ((value_offset >= dictionary_memory_size_) - || (value_offset <= last_offset)) { - return false; - } - last_offset = value_offset; - } - - return true; -} - -} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionDictionaryLite.hpp ---------------------------------------------------------------------- diff --git a/compression/CompressionDictionaryLite.hpp b/compression/CompressionDictionaryLite.hpp deleted file mode 100644 index c4c338e..0000000 --- a/compression/CompressionDictionaryLite.hpp +++ /dev/null @@ -1,312 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#ifndef QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_LITE_HPP_ -#define QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_LITE_HPP_ - -#include <cstddef> -#include <cstdint> -#include <limits> - -#include "types/Type.hpp" -#include "types/TypedValue.hpp" -#include "utility/Macros.hpp" - -#include "glog/logging.h" - -namespace quickstep { - -/** \addtogroup Compression - * @{ - */ - -// In-memory format of a CompressionDictionary is as follows: -// std::uint32_t num_codes - number of codes/values stored in dictionary -// (excluding NULL) -// std::uint32_t null_code - a code representing a NULL value (equal to -// num_codes if NULL is present, or the maximum -// possible value of std::uint32_t if NULL is not -// present) -// For fixed-length types: -// array of num_codes values -// For variable-length types: -// array of num_codes offsets of variable-length values -// variable-length storage region, with values stored at offsets specified -// by previous array. - -/** - * @brief A dictionary which maps short integer codes to typed values. - * @note Codes in a CompressionDictionaryLite compare in the same order as the - * underlying values. - **/ -class CompressionDictionaryLite { - public: - /** - * @brief Constructor. - * - * @param type The type of values being compressed. LessComparison must be - * applicable to this Type. - * @param dictionary_memory The memory location of the physical dictionary. - * @param dictionary_memory_size The size (in bytes) of the physical - * dictionary at dictionary_memory. - **/ - CompressionDictionaryLite(const Type &type, - const void *dictionary_memory, - const std::size_t dictionary_memory_size); - - virtual ~CompressionDictionaryLite() = 0; - - /** - * @brief Get the number of code/value mappings in this dictionary. - * - * @return The number of codes/values in this dictionary. - **/ - inline std::uint32_t numberOfCodes() const { - return number_of_codes_including_null_; - } - - /** - * @brief Get the minimum number of bits needed to represent a code for this - * dictionary. - * - * @return The length of codes for this dictionary in bits. - **/ - inline std::uint8_t codeLengthBits() const { - return code_length_bits_; - } - - /** - * @brief Determine whether this dictionary contains a code for NULL values. - * - * @return Whether this dictionary contains NULL. - **/ - inline bool containsNull() const { - return getNullCode() != std::numeric_limits<std::uint32_t>::max(); - } - - /** - * @brief Get the code representing NULL in this dictionary, or the maximum - * possible value of a std::uint32_t if NULL is not present in this - * dictionary. - * - * @return The code representing NULL in this dictionary, or UINT32_MAX if - * NULL is not present in this dictionary. - **/ - inline std::uint32_t getNullCode() const { - return *(static_cast<const std::uint32_t*>(dictionary_memory_) + 1); - } - - /** - * @brief Get an untyped pointer to the value represented by the specified - * code. - * @note This version is for codes of 8 bits or less. Also see - * getUntypedValueForShortCode() and getUntypedValueForCode(). - * @warning It is an error to use this method with a code which does not - * exist in this dictionary, i.e. code must be less than - * numberOfCodes(). - * - * @param code The compressed code to get the value for. - * @return An untyped pointer to the value that corresponds to code. - **/ - template <bool check_null = true> - inline const void* getUntypedValueForByteCode(const std::uint8_t code) const { - if (type_is_variable_length_) { - return variableLengthGetUntypedValueHelper<std::uint8_t, check_null>(code); - } else { - return fixedLengthGetUntypedValueHelper<std::uint8_t, check_null>(code); - } - } - - /** - * @brief Get an untyped pointer to the value represented by the specified - * code. - * @note This version is for codes of 16 bits or less. Also see - * getUntypedValueForByteCode() and getUntypedValueForCode(). - * @warning It is an error to use this method with a code which does not - * exist in this dictionary, i.e. code must be less than - * numberOfCodes(). - * - * @param code The compressed code to get the value for. - * @return An untyped pointer to the value that corresponds to code. - **/ - template <bool check_null = true> - inline const void* getUntypedValueForShortCode(const std::uint16_t code) const { - if (type_is_variable_length_) { - return variableLengthGetUntypedValueHelper<std::uint16_t, check_null>(code); - } else { - return fixedLengthGetUntypedValueHelper<std::uint16_t, check_null>(code); - } - } - - /** - * @brief Get an untyped pointer to the value represented by the specified - * code. - * @note This version is for any code up to the maximum length of 32 bits. - * Also see getUntypedValueForByteCode() and - * getUntypedValueForShortCode(). - * @warning It is an error to use this method with a code which does not - * exist in this dictionary, i.e. code must be less than - * numberOfCodes(). - * - * @param code The compressed code to get the value for. - * @return An untyped pointer to the value that corresponds to code. - **/ - template <bool check_null = true> - inline const void* getUntypedValueForCode(const std::uint32_t code) const { - if (type_is_variable_length_) { - return variableLengthGetUntypedValueHelper<std::uint32_t, check_null>(code); - } else { - return fixedLengthGetUntypedValueHelper<std::uint32_t, check_null>(code); - } - } - - /** - * @brief Get the value represented by the specified code as a TypedValue. - * @note This version is for codes of 8 bits or less. Also see - * getTypedValueForShortCode() and getTypedValueForCode(). - * @warning It is an error to use this method with a code which does not - * exist in this dictionary, i.e. code must be less than - * numberOfCodes(). - * - * @param code The compressed code to get the value for. - * @return The typed value that corresponds to code. - **/ - inline TypedValue getTypedValueForByteCode(const std::uint8_t code) const { - if (type_is_variable_length_) { - return variableLengthGetTypedValueHelper<std::uint8_t>(code); - } else { - return fixedLengthGetTypedValueHelper<std::uint8_t>(code); - } - } - - /** - * @brief Get the value represented by the specified code as a TypedValue. - * @note This version is for codes of 16 bits or less. Also see - * getTypedValueForByteCode() and getTypedValueForCode(). - * @warning It is an error to use this method with a code which does not - * exist in this dictionary, i.e. code must be less than - * numberOfCodes(). - * - * @param code The compressed code to get the value for. - * @return The typed value that corresponds to code. - **/ - inline TypedValue getTypedValueForShortCode(const std::uint16_t code) const { - if (type_is_variable_length_) { - return variableLengthGetTypedValueHelper<std::uint16_t>(code); - } else { - return fixedLengthGetTypedValueHelper<std::uint16_t>(code); - } - } - - /** - * @brief Get the value represented by the specified code as a TypedValue. - * @note This version is for any code up to the maximum length of 32 bits. - * Also see getTypedValueForByteCode() and getTypedValueForShortCode(). - * @warning It is an error to use this method with a code which does not - * exist in this dictionary, i.e. code must be less than - * numberOfCodes(). - * - * @param code The compressed code to get the value for. - * @return The typed value that corresponds to code. - **/ - inline TypedValue getTypedValueForCode(const std::uint32_t code) const { - if (type_is_variable_length_) { - return variableLengthGetTypedValueHelper<std::uint32_t>(code); - } else { - return fixedLengthGetTypedValueHelper<std::uint32_t>(code); - } - } - - protected: - template <typename CodeType, bool check_null = true> - inline const void* fixedLengthGetUntypedValueHelper(const CodeType code) const { - if (check_null && (code == getNullCode())) { - return nullptr; - } - DCHECK_LT(code, numberOfCodes()); - return static_cast<const char*>(dictionary_memory_) - + 2 * sizeof(std::uint32_t) // Header. - + code * type_fixed_byte_length_; // Index into value array. - } - - template <typename CodeType, bool check_null = true> - inline const void* variableLengthGetUntypedValueHelper(const CodeType code) const { - if (check_null && (code == getNullCode())) { - return nullptr; - } - DCHECK_LT(code, numberOfCodes()); - const void *retval = variable_length_data_region_ - + static_cast<const std::uint32_t*>(dictionary_memory_)[code + 2]; - DCHECK_LT(retval, static_cast<const char*>(dictionary_memory_) + dictionary_memory_size_); - return retval; - } - - template <typename CodeType> - inline TypedValue fixedLengthGetTypedValueHelper(const CodeType code) const { - if (code == getNullCode()) { - return TypedValue(type_.getTypeID());; - } - DCHECK_LT(code, numberOfCodes()); - return type_.makeValue(static_cast<const char*>(dictionary_memory_) - + 2 * sizeof(std::uint32_t) // Header. - + code * type_fixed_byte_length_, // Index into value array. - type_fixed_byte_length_); - } - - template <typename CodeType> - inline TypedValue variableLengthGetTypedValueHelper(const CodeType code) const { - if (code == getNullCode()) { - return TypedValue(type_.getTypeID()); - } - DCHECK_LT(code, numberOfCodes()); - - std::uint32_t value_offset = static_cast<const std::uint32_t*>(dictionary_memory_)[code + 2]; - const void *data_ptr = variable_length_data_region_ + value_offset; - - DCHECK_LT(data_ptr, static_cast<const char*>(dictionary_memory_) + dictionary_memory_size_); - - std::size_t data_size = (code == *static_cast<const std::uint32_t*>(dictionary_memory_) - 1) ? - (static_cast<const char*>(dictionary_memory_) - + dictionary_memory_size_ - - static_cast<const char*>(data_ptr)) - : (static_cast<const std::uint32_t*>(dictionary_memory_)[code + 3] - value_offset); - return TypedValue(type_.getTypeID(), data_ptr, data_size); - } - - const Type &type_; - const bool type_is_variable_length_; - const void *dictionary_memory_; - const std::size_t dictionary_memory_size_; - std::uint32_t number_of_codes_including_null_; - std::uint8_t code_length_bits_; - - const std::size_t type_fixed_byte_length_; - const char *variable_length_data_region_; - - private: - bool paranoidOffsetsCheck() const; - - DISALLOW_COPY_AND_ASSIGN(CompressionDictionaryLite); -}; - -/** @} */ - -} // namespace quickstep - -#endif // QUICKSTEP_COMPRESSION_COMPRESSION_DICTIONARY_LITE_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/156290a4/compression/CompressionModule.hpp ---------------------------------------------------------------------- diff --git a/compression/CompressionModule.hpp b/compression/CompressionModule.hpp deleted file mode 100644 index f5e3e9b..0000000 --- a/compression/CompressionModule.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -/** @defgroup Compression - * - * Facilities for ordered dictionary-based compression of values to - * fixed-length integer codes. - **/