Repository: incubator-quickstep Updated Branches: refs/heads/fix-derived-accessor a100b9ecc -> 19e7c94be (forced update)
QUICKSTEP-73: Multi-attribute PartitionSchemeHeader. Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/05b47b5a Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/05b47b5a Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/05b47b5a Branch: refs/heads/fix-derived-accessor Commit: 05b47b5a9fa7bd550c284846c705765c99d409ba Parents: 42bf626 Author: Zuyu Zhang <zu...@apache.org> Authored: Sat Mar 18 03:28:38 2017 -0700 Committer: Zuyu Zhang <zu...@apache.org> Committed: Sun Mar 26 15:55:35 2017 -0700 ---------------------------------------------------------------------- catalog/CMakeLists.txt | 5 +- catalog/Catalog.proto | 10 +- catalog/CatalogRelation.cpp | 6 +- catalog/PartitionSchemeHeader.cpp | 97 ++++-- catalog/PartitionSchemeHeader.hpp | 170 ++++++---- catalog/tests/NUMAPlacementScheme_unittest.cpp | 2 +- catalog/tests/PartitionScheme_unittest.cpp | 321 +++++++++---------- parser/tests/Create.test | 38 +++ query_optimizer/ExecutionGenerator.cpp | 2 +- query_optimizer/resolver/Resolver.cpp | 35 +- .../tests/execution_generator/Create.test | 17 + .../tests/logical_generator/Create.test | 21 ++ .../tests/physical_generator/Create.test | 39 +++ query_optimizer/tests/resolver/Create.test | 21 ++ .../tests/HashJoinOperator_unittest.cpp | 4 +- storage/CMakeLists.txt | 1 + storage/InsertDestination.cpp | 48 +-- storage/InsertDestination.hpp | 6 +- storage/InsertDestinationInterface.hpp | 9 +- storage/StorageBlock.cpp | 3 +- 20 files changed, 536 insertions(+), 319 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/catalog/CMakeLists.txt b/catalog/CMakeLists.txt index 3c64e97..9684cfe 100644 --- a/catalog/CMakeLists.txt +++ b/catalog/CMakeLists.txt @@ -174,7 +174,9 @@ target_link_libraries(quickstep_catalog_PartitionSchemeHeader quickstep_types_TypedValue quickstep_types_TypedValue_proto quickstep_types_operations_comparisons_Comparison + quickstep_types_operations_comparisons_EqualComparison quickstep_types_operations_comparisons_LessComparison + quickstep_utility_CompositeHash quickstep_utility_Macros) # Module all-in-one library: @@ -276,7 +278,8 @@ target_link_libraries(PartitionScheme_unittest quickstep_types_TypeID quickstep_types_TypedValue quickstep_types_operations_comparisons_Comparison - quickstep_types_operations_comparisons_EqualComparison) + quickstep_types_operations_comparisons_EqualComparison + quickstep_utility_CompositeHash) add_test(PartitionScheme_unittest PartitionScheme_unittest) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/catalog_relation_statistics_test_data) http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/Catalog.proto ---------------------------------------------------------------------- diff --git a/catalog/Catalog.proto b/catalog/Catalog.proto index 89cb7e5..4e2fafe 100644 --- a/catalog/Catalog.proto +++ b/catalog/Catalog.proto @@ -39,18 +39,22 @@ message PartitionSchemeHeader { required PartitionType partition_type = 1; required uint64 num_partitions = 2; - required uint32 partition_attribute_id = 3; + repeated uint32 partition_attribute_ids = 3; // The convention for extension numbering is that extensions for a particular // PartitionType should begin from (partition_type + 1) * 16. extensions 16 to max; } +message PartitionValues { + repeated TypedValue partition_values = 1; +} + message RangePartitionSchemeHeader { extend PartitionSchemeHeader { // All required. - optional Type partition_attr_type = 32; - repeated TypedValue partition_range_boundaries = 33; + repeated Type partition_attr_types = 32; + repeated PartitionValues partition_range_boundaries = 33; } } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/CatalogRelation.cpp ---------------------------------------------------------------------- diff --git a/catalog/CatalogRelation.cpp b/catalog/CatalogRelation.cpp index 41c503c..793fc2d 100644 --- a/catalog/CatalogRelation.cpp +++ b/catalog/CatalogRelation.cpp @@ -88,7 +88,11 @@ CatalogRelation::CatalogRelation(const serialization::CatalogRelationSchema &pro const serialization::PartitionScheme &proto_partition_scheme = proto.GetExtension(serialization::CatalogRelation::partition_scheme); - DCHECK(hasAttributeWithId(proto_partition_scheme.header().partition_attribute_id())); +#ifdef QUICKSTEP_DEBUG + for (int i = 0; i < proto_partition_scheme.header().partition_attribute_ids_size(); ++i) { + DCHECK(hasAttributeWithId(proto_partition_scheme.header().partition_attribute_ids(i))); + } +#endif setPartitionScheme(PartitionScheme::ReconstructFromProto(proto_partition_scheme)); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/PartitionSchemeHeader.cpp ---------------------------------------------------------------------- diff --git a/catalog/PartitionSchemeHeader.cpp b/catalog/PartitionSchemeHeader.cpp index 0472fe6..30fa58d 100644 --- a/catalog/PartitionSchemeHeader.cpp +++ b/catalog/PartitionSchemeHeader.cpp @@ -20,10 +20,12 @@ #include "catalog/PartitionSchemeHeader.hpp" #include <cstddef> +#include <unordered_set> #include <utility> #include <vector> #include "catalog/Catalog.pb.h" +#include "catalog/CatalogTypedefs.hpp" #include "types/Type.hpp" #include "types/Type.pb.h" #include "types/TypeFactory.hpp" @@ -32,16 +34,30 @@ #include "glog/logging.h" +using std::move; +using std::size_t; +using std::vector; + namespace quickstep { PartitionSchemeHeader::PartitionSchemeHeader(const PartitionType type, const std::size_t num_partitions, - const attribute_id attr_id) + PartitionAttributeIds &&attr_ids) // NOLINT(whitespace/operators) : partition_type_(type), num_partitions_(num_partitions), - partition_attribute_id_(attr_id) { + partition_attribute_ids_(move(attr_ids)) { DCHECK_GT(num_partitions, 0u); - DCHECK_GE(attr_id, 0); + +#ifdef QUICKSTEP_DEBUG + // Ensure that no duplicated partition attributes exist. + std::unordered_set<attribute_id> partition_attribute_ids; + for (const attribute_id attr_id : partition_attribute_ids_) { + DCHECK_NE(attr_id, kInvalidCatalogId); + + CHECK_EQ(0u, partition_attribute_ids.count(attr_id)); + partition_attribute_ids.insert(attr_id); + } +#endif } bool PartitionSchemeHeader::ProtoIsValid( @@ -60,10 +76,12 @@ bool PartitionSchemeHeader::ProtoIsValid( case serialization::PartitionSchemeHeader::HASH: return true; case serialization::PartitionSchemeHeader::RANGE: { - const std::size_t num_ranges = - proto.ExtensionSize(serialization::RangePartitionSchemeHeader::partition_range_boundaries); - return num_ranges == proto.num_partitions() - 1 && - proto.HasExtension(serialization::RangePartitionSchemeHeader::partition_attr_type); + return proto.HasExtension(serialization::RangePartitionSchemeHeader::partition_range_boundaries) && + proto.ExtensionSize(serialization::RangePartitionSchemeHeader::partition_range_boundaries) == + static_cast<int>(proto.num_partitions() - 1) && + proto.HasExtension(serialization::RangePartitionSchemeHeader::partition_attr_types) && + proto.ExtensionSize(serialization::RangePartitionSchemeHeader::partition_attr_types) == + proto.partition_attribute_ids_size(); } default: // Partition type is unknown. @@ -77,31 +95,43 @@ PartitionSchemeHeader* PartitionSchemeHeader::ReconstructFromProto( << "Attempted to create PartitionSchemeHeader from an invalid proto description:\n" << proto.DebugString(); + PartitionAttributeIds partition_attribute_ids; + for (int i = 0; i < proto.partition_attribute_ids_size(); ++i) { + partition_attribute_ids.push_back(proto.partition_attribute_ids(i)); + } + switch (proto.partition_type()) { case serialization::PartitionSchemeHeader::HASH: { - return new HashPartitionSchemeHeader(proto.num_partitions(), proto.partition_attribute_id()); + return new HashPartitionSchemeHeader(proto.num_partitions(), move(partition_attribute_ids)); } case serialization::PartitionSchemeHeader::RANGE: { - const Type &attr_type = - TypeFactory::ReconstructFromProto(proto.GetExtension( - serialization::RangePartitionSchemeHeader::partition_attr_type)); - - std::vector<TypedValue> partition_ranges; - for (int i = 0; - i < proto.ExtensionSize(serialization::RangePartitionSchemeHeader::partition_range_boundaries); - ++i) { - partition_ranges.emplace_back( - TypedValue::ReconstructFromProto( - proto.GetExtension(serialization::RangePartitionSchemeHeader::partition_range_boundaries, i))); + std::vector<const Type*> attr_types; + for (int i = 0; i < proto.ExtensionSize(serialization::RangePartitionSchemeHeader::partition_attr_types); ++i) { + attr_types.push_back(&TypeFactory::ReconstructFromProto( + proto.GetExtension(serialization::RangePartitionSchemeHeader::partition_attr_types, i))); } - return new RangePartitionSchemeHeader(attr_type, - proto.num_partitions(), - proto.partition_attribute_id(), - std::move(partition_ranges)); + const int partition_ranges_size = + proto.ExtensionSize(serialization::RangePartitionSchemeHeader::partition_range_boundaries); + std::vector<PartitionValues> partition_ranges(partition_ranges_size); + for (int i = 0; i < partition_ranges_size; ++i) { + const auto &proto_partition_values = + proto.GetExtension(serialization::RangePartitionSchemeHeader::partition_range_boundaries, i); + for (int j = 0; j < proto_partition_values.partition_values_size(); ++j) { + partition_ranges[i].emplace_back(TypedValue::ReconstructFromProto( + proto_partition_values.partition_values(j))); + } + } + + return new RangePartitionSchemeHeader(proto.num_partitions(), + move(partition_attribute_ids), + move(attr_types), + move(partition_ranges)); } default: LOG(FATAL) << "Invalid partition scheme header."; + // Avoid '-Werror=return-type' using GCC. + return nullptr; } } @@ -120,7 +150,10 @@ serialization::PartitionSchemeHeader PartitionSchemeHeader::getProto() const { } proto.set_num_partitions(num_partitions_); - proto.set_partition_attribute_id(partition_attribute_id_); + + for (const attribute_id attr_id : partition_attribute_ids_) { + proto.add_partition_attribute_ids(attr_id); + } return proto; } @@ -128,12 +161,18 @@ serialization::PartitionSchemeHeader PartitionSchemeHeader::getProto() const { serialization::PartitionSchemeHeader RangePartitionSchemeHeader::getProto() const { serialization::PartitionSchemeHeader proto = PartitionSchemeHeader::getProto(); - proto.MutableExtension(serialization::RangePartitionSchemeHeader::partition_attr_type) - ->MergeFrom(partition_attr_type_->getProto()); + for (const Type *type : partition_attr_types_) { + proto.AddExtension(serialization::RangePartitionSchemeHeader::partition_attr_types) + ->MergeFrom(type->getProto()); + } + + for (const PartitionValues &partition_range_boundary : partition_range_boundaries_) { + serialization::PartitionValues *proto_range_boundary = + proto.AddExtension(serialization::RangePartitionSchemeHeader::partition_range_boundaries); - for (std::size_t i = 0; i < partition_range_boundaries_.size(); ++i) { - proto.AddExtension(serialization::RangePartitionSchemeHeader::partition_range_boundaries) - ->MergeFrom(partition_range_boundaries_[i].getProto()); + for (const TypedValue &value : partition_range_boundary) { + proto_range_boundary->add_partition_values()->MergeFrom(value.getProto()); + } } return proto; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/PartitionSchemeHeader.hpp ---------------------------------------------------------------------- diff --git a/catalog/PartitionSchemeHeader.hpp b/catalog/PartitionSchemeHeader.hpp index c1d65d1..a03b0e2 100644 --- a/catalog/PartitionSchemeHeader.hpp +++ b/catalog/PartitionSchemeHeader.hpp @@ -29,7 +29,9 @@ #include "catalog/CatalogTypedefs.hpp" #include "types/TypedValue.hpp" #include "types/operations/comparisons/Comparison.hpp" +#include "types/operations/comparisons/EqualComparison.hpp" #include "types/operations/comparisons/LessComparison.hpp" +#include "utility/CompositeHash.hpp" #include "utility/Macros.hpp" #include "glog/logging.h" @@ -48,6 +50,13 @@ class Type; **/ class PartitionSchemeHeader { public: + // A vector for partitioning catalog attributes. + typedef std::vector<attribute_id> PartitionAttributeIds; + + // The values for partition attributes. + // PartitionValues.size() should be equal to PartitionAttributeIds.size(). + typedef std::vector<TypedValue> PartitionValues; + enum PartitionType { kHash = 0, kRange @@ -84,8 +93,8 @@ class PartitionSchemeHeader { * @brief Calculate the partition id into which the attribute value should * be inserted. * - * @param value_of_attribute The attribute value for which the - * partition id is to be determined. + * @param value_of_attributes A vector of attribute values for which the + * partition id is to be determined. * @return The partition id of the partition for the attribute value. **/ // TODO(gerald): Make this method more efficient since currently this is @@ -93,7 +102,7 @@ class PartitionSchemeHeader { // once using a value accessor and create bitmaps for each partition with // tuples that correspond to those partitions. virtual partition_id getPartitionId( - const TypedValue &value_of_attribute) const = 0; + const PartitionValues &value_of_attributes) const = 0; /** * @brief Serialize the Partition Scheme as Protocol Buffer. @@ -121,13 +130,13 @@ class PartitionSchemeHeader { } /** - * @brief Get the partitioning attribute for the relation. + * @brief Get the partitioning attributes for the relation. * - * @return The partitioning attribute with which the relation + * @return The partitioning attributes with which the relation * is partitioned into. **/ - inline attribute_id getPartitionAttributeId() const { - return partition_attribute_id_; + inline const PartitionAttributeIds& getPartitionAttributeIds() const { + return partition_attribute_ids_; } protected: @@ -137,18 +146,18 @@ class PartitionSchemeHeader { * @param type The type of partitioning to be used to partition the * relation. * @param num_partitions The number of partitions to be created. - * @param attr_id The attribute on which the partitioning happens. + * @param attr_ids The attributes on which the partitioning happens. **/ PartitionSchemeHeader(const PartitionType type, const std::size_t num_partitions, - const attribute_id attr_id); + PartitionAttributeIds &&attr_ids); // NOLINT(whitespace/operators) // The type of partitioning: Hash or Range. const PartitionType partition_type_; // The number of partitions. const std::size_t num_partitions_; // The attribute of partioning. - const attribute_id partition_attribute_id_; + const PartitionAttributeIds partition_attribute_ids_; private: DISALLOW_COPY_AND_ASSIGN(PartitionSchemeHeader); @@ -158,16 +167,17 @@ class PartitionSchemeHeader { * @brief Implementation of PartitionSchemeHeader that partitions the tuples in * a relation based on a hash function on the partitioning attribute. **/ -class HashPartitionSchemeHeader : public PartitionSchemeHeader { +class HashPartitionSchemeHeader final : public PartitionSchemeHeader { public: /** * @brief Constructor. * * @param num_partitions The number of partitions to be created. - * @param attribute The attribute on which the partitioning happens. + * @param attributes A vector of attributes on which the partitioning happens. **/ - HashPartitionSchemeHeader(const std::size_t num_partitions, const attribute_id attribute) - : PartitionSchemeHeader(PartitionType::kHash, num_partitions, attribute) { + HashPartitionSchemeHeader(const std::size_t num_partitions, + PartitionAttributeIds &&attributes) // NOLINT(whitespace/operators) + : PartitionSchemeHeader(PartitionType::kHash, num_partitions, std::move(attributes)) { } /** @@ -176,20 +186,13 @@ class HashPartitionSchemeHeader : public PartitionSchemeHeader { ~HashPartitionSchemeHeader() override { } - /** - * @brief Calulate the partition id into which the attribute value - * should be inserted. - * - * @param value_of_attribute The attribute value for which the - * partition id is to be determined. - * @return The partition id of the partition for the attribute value. - **/ partition_id getPartitionId( - const TypedValue &value_of_attribute) const override { + const PartitionValues &value_of_attributes) const override { + DCHECK_EQ(partition_attribute_ids_.size(), value_of_attributes.size()); // TODO(gerald): Optimize for the case where the number of partitions is a // power of 2. We can just mask out the lower-order hash bits rather than // doing a division operation. - return value_of_attribute.getHash() % num_partitions_; + return HashCompositeKey(value_of_attributes) % num_partitions_; } private: @@ -200,39 +203,48 @@ class HashPartitionSchemeHeader : public PartitionSchemeHeader { * @brief Implementation of PartitionSchemeHeader that partitions the tuples in * a relation based on a given value range on the partitioning attribute. **/ -class RangePartitionSchemeHeader : public PartitionSchemeHeader { +class RangePartitionSchemeHeader final : public PartitionSchemeHeader { public: /** * @brief Constructor. * - * @param partition_attribute_type The type of CatalogAttribute that is used - * for partitioning. * @param num_partitions The number of partitions to be created. - * @param attribute The attribute_id on which the partitioning happens. - * @param partition_range The mapping between the partition ids and the upper - * bound of the range boundaries. If two ranges R1 and - * R2 are separated by a boundary value V, then V - * would fall into range R2. For creating a range - * partition scheme with n partitions, you need to - * specify n-1 boundary values. The first partition - * will have all the values less than the first - * boundary and the last partition would have all - * values greater than or equal to the last boundary - * value. + * @param attributes A vector of attribute_ids on which the partitioning + * happens. + * @param partition_attribute_types The types of CatalogAttributes used for + * partitioning. + * @param partition_ranges The mapping between the partition ids and the upper + * bound of the range boundaries. If two ranges R1 and R2 are separated + * by a vector of boundary values V, then V would fall into range R2. + * For creating a range partition scheme with n partitions, you need to + * specify n-1 range boundaries. The first partition will have all the + * values less than the first item of range boundaries and the last + * partition would have all values greater than or equal to the last + * item of range boundaries. **/ - RangePartitionSchemeHeader(const Type &partition_attribute_type, - const std::size_t num_partitions, - const attribute_id attribute, - std::vector<TypedValue> &&partition_range) - : PartitionSchemeHeader(PartitionType::kRange, num_partitions, attribute), - partition_attr_type_(&partition_attribute_type), - partition_range_boundaries_(std::move(partition_range)) { + RangePartitionSchemeHeader(const std::size_t num_partitions, + PartitionAttributeIds &&attributes, // NOLINT(whitespace/operators) + std::vector<const Type*> &&partition_attribute_types, + std::vector<PartitionValues> &&partition_ranges) + : PartitionSchemeHeader(PartitionType::kRange, num_partitions, std::move(attributes)), + partition_attr_types_(std::move(partition_attribute_types)), + partition_range_boundaries_(std::move(partition_ranges)) { + DCHECK_EQ(partition_attribute_ids_.size(), partition_attr_types_.size()); DCHECK_EQ(num_partitions - 1, partition_range_boundaries_.size()); const Comparison &less_comparison_op(LessComparison::Instance()); - less_unchecked_comparator_.reset( - less_comparison_op.makeUncheckedComparatorForTypes( - partition_attribute_type, partition_attribute_type)); + for (const Type *type : partition_attr_types_) { + std::unique_ptr<UncheckedComparator> less_unchecked_comparator( + less_comparison_op.makeUncheckedComparatorForTypes(*type, *type)); + less_unchecked_comparators_.emplace_back(std::move(less_unchecked_comparator)); + } + + const Comparison &equal_comparison_op = EqualComparison::Instance(); + for (const Type *type : partition_attr_types_) { + std::unique_ptr<UncheckedComparator> equal_unchecked_comparator( + equal_comparison_op.makeUncheckedComparatorForTypes(*type, *type)); + equal_unchecked_comparators_.emplace_back(std::move(equal_unchecked_comparator)); + } #ifdef QUICKSTEP_DEBUG checkPartitionRangeBoundaries(); @@ -245,24 +257,18 @@ class RangePartitionSchemeHeader : public PartitionSchemeHeader { ~RangePartitionSchemeHeader() override { } - /** - * @brief Calulate the partition id into which the attribute value - * should be inserted. - * - * @param value_of_attribute The attribute value for which the - * partition id is to be determined. - * @return The partition id of the partition for the attribute value. - **/ partition_id getPartitionId( - const TypedValue &value_of_attribute) const override { + const PartitionValues &value_of_attributes) const override { + DCHECK_EQ(partition_attribute_ids_.size(), value_of_attributes.size()); + partition_id start = 0, end = partition_range_boundaries_.size() - 1; - if (!less_unchecked_comparator_->compareTypedValues(value_of_attribute, partition_range_boundaries_[end])) { + if (!lessThan(value_of_attributes, partition_range_boundaries_[end])) { return num_partitions_ - 1; } while (start < end) { const partition_id mid = start + ((end - start) >> 1); - if (less_unchecked_comparator_->compareTypedValues(value_of_attribute, partition_range_boundaries_[mid])) { + if (lessThan(value_of_attributes, partition_range_boundaries_[mid])) { end = mid; } else { start = mid + 1; @@ -279,7 +285,7 @@ class RangePartitionSchemeHeader : public PartitionSchemeHeader { * * @return The vector of range boundaries for partitions. **/ - inline const std::vector<TypedValue>& getPartitionRangeBoundaries() const { + inline const std::vector<PartitionValues>& getPartitionRangeBoundaries() const { return partition_range_boundaries_; } @@ -288,22 +294,52 @@ class RangePartitionSchemeHeader : public PartitionSchemeHeader { * @brief Check if the partition range boundaries are in ascending order. **/ void checkPartitionRangeBoundaries() { + for (const PartitionValues &partition_range_boundary : partition_range_boundaries_) { + CHECK_EQ(partition_attribute_ids_.size(), partition_range_boundary.size()) + << "A partition boundary has different size than that of partition attributes."; + } + for (partition_id part_id = 1; part_id < partition_range_boundaries_.size(); ++part_id) { - if (less_unchecked_comparator_->compareTypedValues( - partition_range_boundaries_[part_id], - partition_range_boundaries_[part_id - 1])) { - LOG(FATAL) << "Partition boundaries are not in ascending order."; + CHECK(lessThan(partition_range_boundaries_[part_id - 1], partition_range_boundaries_[part_id])) + << "Partition boundaries are not in ascending order."; + } + } + + /** + * @brief Check if the partition values are in the lexicographical order. + * + * @note (l_0, l_1, ..., l_n) < (r_0, r_1, ..., r_n) iff l_0 < r_0, or + * (l_0 == r_0) && (l_1, ..., l_n) < (r_1, ..., r_n). + **/ + bool lessThan(const PartitionValues &lhs, const PartitionValues &rhs) const { + DCHECK_EQ(partition_attribute_ids_.size(), lhs.size()); + DCHECK_EQ(partition_attribute_ids_.size(), rhs.size()); + + for (std::size_t attr_index = 0; attr_index < partition_attribute_ids_.size(); ++attr_index) { + if (less_unchecked_comparators_[attr_index]->compareTypedValues(lhs[attr_index], rhs[attr_index])) { + break; + } else if (equal_unchecked_comparators_[attr_index]->compareTypedValues(lhs[attr_index], rhs[attr_index])) { + if (attr_index == partition_attribute_ids_.size() - 1) { + return false; + } + } else { + return false; } } + + return true; } - const Type* partition_attr_type_; + // The size is equal to 'partition_attribute_ids_.size()'. + const std::vector<const Type*> partition_attr_types_; // The boundaries for each range in the RangePartitionSchemeHeader. // The upper bound of the range is stored here. - const std::vector<TypedValue> partition_range_boundaries_; + const std::vector<PartitionValues> partition_range_boundaries_; - std::unique_ptr<UncheckedComparator> less_unchecked_comparator_; + // Both size are equal to 'partition_attr_types_.size()'. + std::vector<std::unique_ptr<UncheckedComparator>> less_unchecked_comparators_; + std::vector<std::unique_ptr<UncheckedComparator>> equal_unchecked_comparators_; DISALLOW_COPY_AND_ASSIGN(RangePartitionSchemeHeader); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/tests/NUMAPlacementScheme_unittest.cpp ---------------------------------------------------------------------- diff --git a/catalog/tests/NUMAPlacementScheme_unittest.cpp b/catalog/tests/NUMAPlacementScheme_unittest.cpp index 6a3b32f..7d13941 100644 --- a/catalog/tests/NUMAPlacementScheme_unittest.cpp +++ b/catalog/tests/NUMAPlacementScheme_unittest.cpp @@ -82,7 +82,7 @@ TEST(NUMAPlacementSchemeTest, NUMAPlacementSchemeSerializationTest) { // Create a HashPartitionSchemeHeader object with 64 partitions and attribute // 0 as the partitioning attribute. std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); // Create a NUMAPlacementScheme object with the num_partitions. std::unique_ptr<NUMAPlacementScheme> placement_scheme( http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/catalog/tests/PartitionScheme_unittest.cpp ---------------------------------------------------------------------- diff --git a/catalog/tests/PartitionScheme_unittest.cpp b/catalog/tests/PartitionScheme_unittest.cpp index 97a9092..c4f7dec 100644 --- a/catalog/tests/PartitionScheme_unittest.cpp +++ b/catalog/tests/PartitionScheme_unittest.cpp @@ -35,22 +35,22 @@ #include "types/TypedValue.hpp" #include "types/operations/comparisons/Comparison.hpp" #include "types/operations/comparisons/EqualComparison.hpp" +#include "utility/CompositeHash.hpp" #include "gtest/gtest.h" using std::move; using std::size_t; +using std::vector; namespace quickstep { -class Type; - TEST(PartitionSchemeHeaderTest, IntegerHashPartitionSchemeHeaderTest) { const std::size_t num_partitions = 4; std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const int kSampleInts[] = { 0, 1, 2, 3, 400, 501, 64783970, 78437883, -2784627}; const size_t num_ints = sizeof(kSampleInts) / sizeof(kSampleInts[0]); @@ -59,16 +59,16 @@ TEST(PartitionSchemeHeaderTest, IntegerHashPartitionSchemeHeaderTest) { // an integer is the same as the hash of the integer modulus the number // of partitions. EXPECT_EQ(TypedValue(kSampleInts[i]).getHash() % num_partitions, - partition_scheme_header->getPartitionId(TypedValue(kSampleInts[i]))); + partition_scheme_header->getPartitionId({ TypedValue(kSampleInts[i]) })); } } TEST(PartitionSchemeHeaderTest, LongHashPartitionSchemeHeaderTest) { const std::size_t num_partitions = 8; std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const std::int64_t kSampleLongs[] = {INT64_C(10), INT64_C(100), INT64_C(1025), @@ -80,16 +80,16 @@ TEST(PartitionSchemeHeaderTest, LongHashPartitionSchemeHeaderTest) { // of partitions. for (size_t i = 0; i < num_longs; ++i) { EXPECT_EQ(TypedValue(kSampleLongs[i]).getHash() % num_partitions, - partition_scheme_header->getPartitionId(TypedValue(kSampleLongs[i]))); + partition_scheme_header->getPartitionId({ TypedValue(kSampleLongs[i]) })); } } TEST(PartitionSchemeHeaderTest, FloatHashPartitionSchemeHeaderTest) { const std::size_t num_partitions = 5; std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const float kSampleFloats[] = { 285728.895680f, 924005.4989f, -8973494.37438f}; const size_t num_floats = sizeof(kSampleFloats) / sizeof(kSampleFloats[0]); @@ -98,16 +98,16 @@ TEST(PartitionSchemeHeaderTest, FloatHashPartitionSchemeHeaderTest) { // the number of partitions. for (size_t i = 0; i < num_floats; ++i) { EXPECT_EQ(TypedValue(kSampleFloats[i]).getHash() % num_partitions, - partition_scheme_header->getPartitionId(TypedValue(kSampleFloats[i]))); + partition_scheme_header->getPartitionId({ TypedValue(kSampleFloats[i]) })); } } TEST(PartitionSchemeHeaderTest, DoubleHashPartitionSchemeHeaderTest) { const std::size_t num_partitions = 6; std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const double kSampleDoubles[] = { 1.0378, 763624.46343453, -87238497384.3187431894713}; const size_t num_doubles = sizeof(kSampleDoubles) / sizeof(kSampleDoubles[0]); @@ -117,16 +117,16 @@ TEST(PartitionSchemeHeaderTest, DoubleHashPartitionSchemeHeaderTest) { for (size_t i = 0; i < num_doubles; ++i) { EXPECT_EQ( TypedValue(kSampleDoubles[i]).getHash() % num_partitions, - partition_scheme_header->getPartitionId(TypedValue(kSampleDoubles[i]))); + partition_scheme_header->getPartitionId({ TypedValue(kSampleDoubles[i]) })); } } TEST(PartitionSchemeHeaderTest, CharacterHashPartitionSchemeHeaderTest) { const std::size_t num_partitions = 7; std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const char *kSampleStrings[] = { "a", "gerald", "ram", "3081289", "=42?", "+-/*&^%", "hello_world"}; const size_t num_strings = sizeof(kSampleStrings) / sizeof(kSampleStrings[0]); @@ -139,17 +139,17 @@ TEST(PartitionSchemeHeaderTest, CharacterHashPartitionSchemeHeaderTest) { kChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1) .getHash() % num_partitions, - partition_scheme_header->getPartitionId(TypedValue( - kChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1))); + partition_scheme_header->getPartitionId({ TypedValue( + kChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1) })); } } TEST(PartitionSchemeHeaderTest, VarCharHashPartitionSchemeHeaderTest) { const std::size_t num_partitions = 7; std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new HashPartitionSchemeHeader(num_partitions, 0)); + new HashPartitionSchemeHeader(num_partitions, { 0 })); EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const char *kSampleStrings[] = { "hello", "world", "1234567", "!@#$^&*", "pa345+="}; const size_t num_strings = sizeof(kSampleStrings) / sizeof(kSampleStrings[0]); @@ -163,131 +163,153 @@ TEST(PartitionSchemeHeaderTest, VarCharHashPartitionSchemeHeaderTest) { .getHash() % num_partitions, partition_scheme_header->getPartitionId( - TypedValue(kVarChar, - kSampleStrings[i], - std::strlen(kSampleStrings[i]) + 1))); + { TypedValue(kVarChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1) })); + } +} + +TEST(PartitionSchemeHeaderTest, MultiAttributeHashPartitionSchemeHeaderTest) { + const std::size_t num_partitions = 4; + constexpr attribute_id kPartitioningFirstAttribute = 0; + constexpr attribute_id kPartitioningLastAttribute = 2; + std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( + new HashPartitionSchemeHeader(num_partitions, { kPartitioningFirstAttribute, kPartitioningLastAttribute })); + EXPECT_EQ(num_partitions, partition_scheme_header->getNumPartitions()); + EXPECT_EQ(kPartitioningFirstAttribute, partition_scheme_header->getPartitionAttributeIds().front()); + EXPECT_EQ(kPartitioningLastAttribute, partition_scheme_header->getPartitionAttributeIds().back()); + const int kSampleInts[] = { + 0, 78437883, -2784627}; + const double kSampleDoubles[] = { + 1.0378, 763624.46343453, -87238497384.3187431894713}; + const size_t num_ints = sizeof(kSampleInts) / sizeof(kSampleInts[0]); + for (size_t i = 0; i < num_ints; ++i) { + const PartitionSchemeHeader::PartitionValues values = + { TypedValue(kSampleInts[i]), TypedValue(kSampleDoubles[i]) }; + // Check if the partition id returned by the partition scheme for + // an integer is the same as the hash of the integer modulus the number + // of partitions. + EXPECT_EQ(HashCompositeKey(values) % num_partitions, + partition_scheme_header->getPartitionId(values)); } } TEST(PartitionSchemeHeaderTest, IntegerRangePartitionSchemeHeaderTest) { - std::vector<TypedValue> partition_range; + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; // Partition boundaries are 0, 10, 20. // Last partition can hold upto infinity. // First partition can hold from -infinity to -1. for (int i = 0; i < 3; ++i) { - partition_range.push_back(TypedValue(i * 10)); + partition_ranges.push_back({ TypedValue(i * 10) }); } std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new RangePartitionSchemeHeader(TypeFactory::GetType(kInt), 4, 0, move(partition_range))); + new RangePartitionSchemeHeader(4, { 0 }, { &TypeFactory::GetType(kInt) }, move(partition_ranges))); EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); // Check if the partition id returned by the Range Partition Scheme for // integers is the same as the partition id into which it is supposed to // be based on the partition boundaries that we have defined. - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(0))); - EXPECT_EQ(2u, partition_scheme_header->getPartitionId(TypedValue(10))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(20))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(30))); - EXPECT_EQ(0u, partition_scheme_header->getPartitionId(TypedValue(-4))); - EXPECT_EQ(2u, partition_scheme_header->getPartitionId(TypedValue(15))); - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(6))); - EXPECT_EQ(0u, partition_scheme_header->getPartitionId(TypedValue(-70))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(1000))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(20000))); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); + + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(0) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(10) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(20) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(30) })); + EXPECT_EQ(0u, partition_scheme_header->getPartitionId({ TypedValue(-4) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(15) })); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(6) })); + EXPECT_EQ(0u, partition_scheme_header->getPartitionId({ TypedValue(-70) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(1000) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(20000) })); } TEST(PartitionSchemeHeaderTest, LongRangePartitionSchemeHeaderTest) { - std::vector<TypedValue> partition_range; + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; // Partition boundaries are 0, 10000, 20000, 30000 for (int i = 0; i < 3; ++i) { - partition_range.push_back(TypedValue(i * INT64_C(10000))); + partition_ranges.push_back({ TypedValue(i * INT64_C(10000)) }); } std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new RangePartitionSchemeHeader(TypeFactory::GetType(kLong), 4, 0, move(partition_range))); + new RangePartitionSchemeHeader(4, { 0 }, { &TypeFactory::GetType(kLong) }, move(partition_ranges))); EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); // Check if the partition id returned by the Range Partition Scheme for // long numbers is the same as the partition id into which it is supposed to // be based on the partition boundaries that we have defined. - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(INT64_C(0)))); - EXPECT_EQ(2u, partition_scheme_header->getPartitionId(TypedValue(INT64_C(13456)))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(INT64_C(20000)))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(INT64_C(300123)))); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(INT64_C(0)) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(INT64_C(13456)) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(INT64_C(20000)) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(INT64_C(300123)) })); EXPECT_EQ(0u, - partition_scheme_header->getPartitionId(TypedValue(INT64_C(-400000)))); - EXPECT_EQ(2u, partition_scheme_header->getPartitionId(TypedValue(INT64_C(15123)))); - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(INT64_C(6012)))); + partition_scheme_header->getPartitionId({ TypedValue(INT64_C(-400000)) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(INT64_C(15123)) })); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(INT64_C(6012)) })); EXPECT_EQ(0u, - partition_scheme_header->getPartitionId(TypedValue(INT64_C(-7000000)))); + partition_scheme_header->getPartitionId({ TypedValue(INT64_C(-7000000)) })); } TEST(PartitionSchemeHeaderTest, FloatRangePartitionSchemeHeaderTest) { - std::vector<TypedValue> partition_range; + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; // Partition boundaries are 0.0, 10.0, 20.0 for (int i = 0; i < 3; ++i) { - partition_range.push_back(TypedValue(i * 10.0f)); + partition_ranges.push_back({ TypedValue(i * 10.0f) }); } std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new RangePartitionSchemeHeader(TypeFactory::GetType(kFloat), 4, 0, move(partition_range))); + new RangePartitionSchemeHeader(4, { 0 }, { &TypeFactory::GetType(kFloat) }, move(partition_ranges))); EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); // Check if the partition id returned by the Range Partition Scheme for // floats is the same as the partition id into which it is supposed to // be based on the partition boundaries that we have defined. - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(0.1f))); - EXPECT_EQ(2u, partition_scheme_header->getPartitionId(TypedValue(10.00000000f))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(20.23f))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(30.56f))); - EXPECT_EQ(0u, partition_scheme_header->getPartitionId(TypedValue(-4.5f))); - EXPECT_EQ(2u, partition_scheme_header->getPartitionId(TypedValue(15.034f))); - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(6.987f))); - EXPECT_EQ(0u, partition_scheme_header->getPartitionId(TypedValue(-70.384f))); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(0.1f) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(10.00000000f) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(20.23f) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(30.56f) })); + EXPECT_EQ(0u, partition_scheme_header->getPartitionId({ TypedValue(-4.5f) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(15.034f) })); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(6.987f) })); + EXPECT_EQ(0u, partition_scheme_header->getPartitionId({ TypedValue(-70.384f) })); } TEST(PartitionSchemeHeaderTest, DoubleRangePartitionSchemeHeaderTest) { - std::vector<TypedValue> partition_range; + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; // Partition boundaries are 0.00000, 10.00000, 20.00000 for (int i = 0; i < 3; ++i) { - partition_range.push_back(TypedValue(i * 10.00000)); + partition_ranges.push_back({ TypedValue(i * 10.00000) }); } std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new RangePartitionSchemeHeader(TypeFactory::GetType(kDouble), 4, 0, move(partition_range))); + new RangePartitionSchemeHeader(4, { 0 }, { &TypeFactory::GetType(kDouble) }, move(partition_ranges))); EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); // Check if the partition id returned by the Range Partition Scheme for // doubles is the same as the partition id into which it is supposed to // be based on the partition boundaries that we have defined. - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(0.1897438974))); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(0.1897438974) })); EXPECT_EQ(2u, - partition_scheme_header->getPartitionId(TypedValue(10.00000000287489))); + partition_scheme_header->getPartitionId({ TypedValue(10.00000000287489) })); EXPECT_EQ(3u, - partition_scheme_header->getPartitionId(TypedValue(20.23249859403750))); - EXPECT_EQ(3u, partition_scheme_header->getPartitionId(TypedValue(30.567866347563))); + partition_scheme_header->getPartitionId({ TypedValue(20.23249859403750) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(30.567866347563) })); EXPECT_EQ(0u, - partition_scheme_header->getPartitionId(TypedValue(-4.57583978935689))); + partition_scheme_header->getPartitionId({ TypedValue(-4.57583978935689) })); EXPECT_EQ(2u, - partition_scheme_header->getPartitionId(TypedValue(15.034248758978936))); - EXPECT_EQ(1u, partition_scheme_header->getPartitionId(TypedValue(6.98792489))); + partition_scheme_header->getPartitionId({ TypedValue(15.034248758978936) })); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(6.98792489) })); EXPECT_EQ( - 0u, partition_scheme_header->getPartitionId(TypedValue(-70.38454985893768738))); + 0u, partition_scheme_header->getPartitionId({ TypedValue(-70.38454985893768738) })); } TEST(PartitionSchemeHeaderTest, CharacterRangePartitionSchemeHeaderTest) { - std::vector<TypedValue> partition_range; + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; // Partition boundaries are the following 3 characters. const char *kRangeBoundaryStrings[] = {"don", "hippo", "pattasu"}; const size_t num_boundaries = sizeof(kRangeBoundaryStrings) / sizeof(kRangeBoundaryStrings[0]); for (size_t i = 0; i < num_boundaries; ++i) { - partition_range.push_back( - TypedValue(kChar, - kRangeBoundaryStrings[i], - std::strlen(kRangeBoundaryStrings[i]) + 1)); + partition_ranges.push_back( + { TypedValue(kChar, kRangeBoundaryStrings[i], std::strlen(kRangeBoundaryStrings[i]) + 1) }); } std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new RangePartitionSchemeHeader(TypeFactory::GetType(kChar, 20, false), 4, 0, move(partition_range))); + new RangePartitionSchemeHeader(4, { 0 }, { &TypeFactory::GetType(kChar, 20, false) }, move(partition_ranges))); EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const char *kSampleStrings[] = {"amma", "ganesh", "e", @@ -306,27 +328,25 @@ TEST(PartitionSchemeHeaderTest, CharacterRangePartitionSchemeHeaderTest) { for (size_t i = 0; i < num_strings; ++i) { EXPECT_EQ( kExpectedPartitions[i], - partition_scheme_header->getPartitionId(TypedValue( - kChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1))); + partition_scheme_header->getPartitionId({ TypedValue( + kChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1) })); } } TEST(PartitionSchemeHeaderTest, VarCharRangePartitionSchemeHeaderTest) { - std::vector<TypedValue> partition_range; + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; // Partition boundaries are the following 3 strings. const char *kRangeBoundaryStrings[] = { "elephant", "jamaica", "zorgonz"}; const size_t num_boundaries = sizeof(kRangeBoundaryStrings) / sizeof(kRangeBoundaryStrings[0]); for (size_t i = 0; i < num_boundaries; ++i) { - partition_range.push_back( - TypedValue(kVarChar, - kRangeBoundaryStrings[i], - std::strlen(kRangeBoundaryStrings[i]) + 1)); + partition_ranges.push_back( + { TypedValue(kVarChar, kRangeBoundaryStrings[i], std::strlen(kRangeBoundaryStrings[i]) + 1) }); } std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( - new RangePartitionSchemeHeader(TypeFactory::GetType(kVarChar, 20, false), 4, 0, move(partition_range))); + new RangePartitionSchemeHeader(4, { 0 }, { &TypeFactory::GetType(kVarChar, 20, false) }, move(partition_ranges))); EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); - EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme_header->getPartitionAttributeIds().front()); const char *kSampleStrings[] = {"apple", "halloween", "mango", @@ -343,15 +363,43 @@ TEST(PartitionSchemeHeaderTest, VarCharRangePartitionSchemeHeaderTest) { for (size_t i = 0; i < num_strings; ++i) { EXPECT_EQ(kExpectedPartitions[i], partition_scheme_header->getPartitionId( - TypedValue(kVarChar, - kSampleStrings[i], - std::strlen(kSampleStrings[i]) + 1))); + { TypedValue(kVarChar, kSampleStrings[i], std::strlen(kSampleStrings[i]) + 1) })); + } +} + +TEST(PartitionSchemeHeaderTest, MultiAttributeRangePartitionSchemeHeaderTest) { + vector<PartitionSchemeHeader::PartitionValues> partition_ranges; + // Partition boundaries are { 0, 0.00000 }, { 10, 10.0000 }, { 20, 20.00000 } + // Last partition can hold upto infinity. + // First partition can hold from { -infinity, -infinity } to { 0, -2^(-1074) }. + for (int i = 0; i < 3; ++i) { + partition_ranges.push_back({ TypedValue(i * 10), TypedValue(i * 10.00000) }); } + + constexpr attribute_id kPartitioningFirstAttribute = 0; + constexpr attribute_id kPartitioningLastAttribute = 2; + std::unique_ptr<PartitionSchemeHeader> partition_scheme_header( + new RangePartitionSchemeHeader(4, { kPartitioningFirstAttribute, kPartitioningLastAttribute }, + { &TypeFactory::GetType(kInt), &TypeFactory::GetType(kDouble) }, + move(partition_ranges))); + EXPECT_EQ(4u, partition_scheme_header->getNumPartitions()); + EXPECT_EQ(kPartitioningFirstAttribute, partition_scheme_header->getPartitionAttributeIds().front()); + EXPECT_EQ(kPartitioningLastAttribute, partition_scheme_header->getPartitionAttributeIds().back()); + + // Check if the partition id returned by the Range Partition Scheme for + // { int, double } is the same as the partition id into which it is supposed + // to be based on the partition boundaries that we have defined. + EXPECT_EQ(0u, partition_scheme_header->getPartitionId({ TypedValue(-70), TypedValue(30.567866347563) })); + EXPECT_EQ(0u, partition_scheme_header->getPartitionId({ TypedValue(0), TypedValue(-4.57583978935689) })); + EXPECT_EQ(1u, partition_scheme_header->getPartitionId({ TypedValue(6), TypedValue(15.034248758978936) })); + EXPECT_EQ(2u, partition_scheme_header->getPartitionId({ TypedValue(10), TypedValue(10.00000000287489) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(20), TypedValue(20.23249859403750) })); + EXPECT_EQ(3u, partition_scheme_header->getPartitionId({ TypedValue(300), TypedValue(-70.38454985893768738) })); } TEST(PartitionSchemeTest, AddBlocksToPartitionTest) { std::unique_ptr<PartitionScheme> partition_scheme( - new PartitionScheme(new HashPartitionSchemeHeader(4, 0))); + new PartitionScheme(new HashPartitionSchemeHeader(4, { 0 }))); for (int i = 0; i < 10; ++i) { partition_scheme->addBlockToPartition(i, i % 4); } @@ -367,7 +415,7 @@ TEST(PartitionSchemeTest, AddBlocksToPartitionTest) { partition_scheme->getBlocksInPartition(3); EXPECT_EQ(4u, partition_scheme->getPartitionSchemeHeader().getNumPartitions()); - EXPECT_EQ(0, partition_scheme->getPartitionSchemeHeader().getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme->getPartitionSchemeHeader().getPartitionAttributeIds().front()); // Check if the blocks are present in the partitions that we // expect them to be based on where we inserted them. @@ -420,13 +468,13 @@ TEST(PartitionSchemeTest, AddBlocksToPartitionTest) { TEST(PartitionSchemeTest, RemoveBlocksFromPartitionTest) { std::unique_ptr<PartitionScheme> partition_scheme( - new PartitionScheme(new HashPartitionSchemeHeader(4, 0))); + new PartitionScheme(new HashPartitionSchemeHeader(4, { 0 }))); for (int i = 0; i < 10; ++i) { partition_scheme->addBlockToPartition(i, i % 4); } EXPECT_EQ(4u, partition_scheme->getPartitionSchemeHeader().getNumPartitions()); - EXPECT_EQ(0, partition_scheme->getPartitionSchemeHeader().getPartitionAttributeId()); + EXPECT_EQ(0, partition_scheme->getPartitionSchemeHeader().getPartitionAttributeIds().front()); // remove block 0 from partition 0 partition_scheme->removeBlockFromPartition(0, 0); const std::vector<block_id> blocks_in_partition_zero = @@ -504,7 +552,7 @@ TEST(PartitionSchemeTest, RemoveBlocksFromPartitionTest) { TEST(PartitionSchemeTest, CheckHashPartitionSchemeSerialization) { const std::size_t num_partitions = 4; std::unique_ptr<PartitionScheme> part_scheme( - new PartitionScheme(new HashPartitionSchemeHeader(num_partitions, 0))); + new PartitionScheme(new HashPartitionSchemeHeader(num_partitions, { 0 }))); // Add some blocks to each partition. for (int i = 0; i < 10; ++i) { part_scheme->addBlockToPartition(i, i % num_partitions); @@ -523,8 +571,8 @@ TEST(PartitionSchemeTest, CheckHashPartitionSchemeSerialization) { EXPECT_EQ(header.getNumPartitions(), header_from_proto.getNumPartitions()); // Check the partition attribute id - EXPECT_EQ(header.getPartitionAttributeId(), - header_from_proto.getPartitionAttributeId()); + EXPECT_EQ(header.getPartitionAttributeIds(), + header_from_proto.getPartitionAttributeIds()); // Check the block in each partition for (partition_id part_id = 0; part_id < num_partitions; ++part_id) { // Collect the blocks from C++ Partition Scheme object. @@ -542,75 +590,8 @@ TEST(PartitionSchemeTest, CheckHashPartitionSchemeSerialization) { } } -TEST(PartitionSchemeTest, CheckRangePartitionSchemeSerialization) { - const Type &type = TypeFactory::GetType(kInt); - const std::size_t num_partitions = 4; - std::vector<TypedValue> partition_range; - // Partition boundaries are 0, 10, 20. - // Last partition can hold upto infinity. - // First partition can hold from -infinity to -1. - for (std::size_t i = 0; i < num_partitions - 1; ++i) { - partition_range.push_back(TypedValue(static_cast<int>(i * 10))); - } - std::unique_ptr<PartitionScheme> part_scheme( - new PartitionScheme( - new RangePartitionSchemeHeader(type, num_partitions, 0, move(partition_range)))); - for (int i = 0; i < 10; ++i) { - part_scheme->addBlockToPartition(i * 5, i % num_partitions); - } - std::unique_ptr<PartitionScheme> part_scheme_from_proto; - - part_scheme_from_proto.reset( - PartitionScheme::ReconstructFromProto(part_scheme->getProto())); - - const PartitionSchemeHeader &header = part_scheme->getPartitionSchemeHeader(); - const PartitionSchemeHeader &header_from_proto = part_scheme_from_proto->getPartitionSchemeHeader(); - - // Check the partition type - EXPECT_EQ(header.getPartitionType(), - header_from_proto.getPartitionType()); - - // Check number of partitions - EXPECT_EQ(header.getNumPartitions(), - header_from_proto.getNumPartitions()); - - // Check the partition attribute id - EXPECT_EQ(header.getPartitionAttributeId(), - header_from_proto.getPartitionAttributeId()); - - // Check the partition range boundaries' size. - const std::vector<TypedValue> &range_boundaries_part_scheme = - static_cast<const RangePartitionSchemeHeader&>(header).getPartitionRangeBoundaries(); - const std::vector<TypedValue> &range_boundaries_part_scheme_from_proto = - static_cast<const RangePartitionSchemeHeader&>(header_from_proto).getPartitionRangeBoundaries(); - EXPECT_EQ(range_boundaries_part_scheme.size(), - range_boundaries_part_scheme_from_proto.size()); - - // Check the partition range boundaries' values. - const Comparison &equal_comparison_op(EqualComparison::Instance()); - std::unique_ptr<UncheckedComparator> equal_unchecked_comparator; - equal_unchecked_comparator.reset( - equal_comparison_op.makeUncheckedComparatorForTypes( - TypeFactory::GetType(kInt), TypeFactory::GetType(kInt))); - for (std::size_t i = 0; i < range_boundaries_part_scheme.size(); ++i) { - EXPECT_TRUE(equal_unchecked_comparator->compareTypedValues( - range_boundaries_part_scheme[i], - range_boundaries_part_scheme_from_proto[i])); - } - - // Check the blocks in each partition from both the Partition Scheme's - // C++ object and protocol buffer. - for (partition_id part_id = 0; part_id < num_partitions; ++part_id) { - std::vector<block_id> blocks_in_part_scheme = - part_scheme->getBlocksInPartition(part_id); - std::vector<block_id> blocks_in_part_scheme_from_proto = - part_scheme_from_proto->getBlocksInPartition(part_id); - std::sort(blocks_in_part_scheme.begin(), blocks_in_part_scheme.end()); - std::sort(blocks_in_part_scheme_from_proto.begin(), - blocks_in_part_scheme_from_proto.end()); - EXPECT_EQ(blocks_in_part_scheme, blocks_in_part_scheme_from_proto); - } -} +// TODO(quickstep-team): Add back CheckRangePartitionSchemeSerialization test +// due to QUICKSTEP-86. TEST(PartitionSchemeTest, CheckBlocksInPartitionTest) { std::unique_ptr<PartitionScheme> partition_scheme; @@ -619,7 +600,7 @@ TEST(PartitionSchemeTest, CheckBlocksInPartitionTest) { constexpr attribute_id kPartitioningAttribute = 0; // Create a partition scheme object. partition_scheme.reset( - new PartitionScheme(new HashPartitionSchemeHeader(kNumPartitions, kPartitioningAttribute))); + new PartitionScheme(new HashPartitionSchemeHeader(kNumPartitions, { kPartitioningAttribute }))); // Add blocks to different partitions. for (std::size_t block_id = 0; block_id < kNumBlocks; ++block_id) { partition_scheme->addBlockToPartition(block_id, @@ -630,7 +611,7 @@ TEST(PartitionSchemeTest, CheckBlocksInPartitionTest) { // Check the number of partitions and the partitioning attribute. EXPECT_EQ(kNumPartitions, header.getNumPartitions()); - EXPECT_EQ(kPartitioningAttribute, header.getPartitionAttributeId()); + EXPECT_EQ(kPartitioningAttribute, header.getPartitionAttributeIds().front()); // Check if the blocks are correctly assigned to its partitions. EXPECT_EQ(0u, partition_scheme->getPartitionForBlock(0)); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/parser/tests/Create.test ---------------------------------------------------------------------- diff --git a/parser/tests/Create.test b/parser/tests/Create.test index eee44a6..8c11054 100644 --- a/parser/tests/Create.test +++ b/parser/tests/Create.test @@ -209,6 +209,44 @@ CREATE TABLE test (attr INT) PARTITIONS 4 ^ == +# Duplicate partition attributes. +CREATE TABLE test (attr INT) PARTITION BY HASH(attr, attr) PARTITIONS 4 +-- +CreateTableStatement[relation_name=test] ++-attribute_list= +| +-AttributeDefinition[name=attr,type=Int] ++-partition_clause= + +-PartitionClause[partition_type=hash] + +-Number of Partitions=NumericLiteral[numeric_string=4,float_like=false] + +-attribute_name_list= + +-String[value=attr] + +-String[value=attr] +== + +# Multiple partition attributes. +CREATE TABLE test (attr1 INT, attr2 LONG, attr3 FLOAT, attr4 DOUBLE, attr5 CHAR(5), attr6 VARCHAR(4)) +PARTITION BY HASH(attr1, attr2, attr3, attr4, attr5, attr6) PARTITIONS 4 +-- +CreateTableStatement[relation_name=test] ++-attribute_list= +| +-AttributeDefinition[name=attr1,type=Int] +| +-AttributeDefinition[name=attr2,type=Long] +| +-AttributeDefinition[name=attr3,type=Float] +| +-AttributeDefinition[name=attr4,type=Double] +| +-AttributeDefinition[name=attr5,type=Char(5)] +| +-AttributeDefinition[name=attr6,type=VarChar(4)] ++-partition_clause= + +-PartitionClause[partition_type=hash] + +-Number of Partitions=NumericLiteral[numeric_string=4,float_like=false] + +-attribute_name_list= + +-String[value=attr1] + +-String[value=attr2] + +-String[value=attr3] + +-String[value=attr4] + +-String[value=attr5] + +-String[value=attr6] +== + CREATE TABLE test (attr INT) WITH BLOCKPROPERTIES (TYPE rowstore) -- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index 70b69e0..6fec85b 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -791,7 +791,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { build_attribute_ids.size() == 1) { const PartitionSchemeHeader &partition_scheme_header = build_relation->getPartitionScheme()->getPartitionSchemeHeader(); - if (build_attribute_ids[0] == partition_scheme_header.getPartitionAttributeId()) { + if (build_attribute_ids[0] == partition_scheme_header.getPartitionAttributeIds().front()) { // TODO(zuyu): add optimizer support for partitioned hash joins. hash_table_context_proto->set_num_partitions(num_partitions); } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/query_optimizer/resolver/Resolver.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/resolver/Resolver.cpp b/query_optimizer/resolver/Resolver.cpp index df589fd..ed465e5 100644 --- a/query_optimizer/resolver/Resolver.cpp +++ b/query_optimizer/resolver/Resolver.cpp @@ -695,23 +695,7 @@ const S::PartitionSchemeHeader* Resolver::resolvePartitionClause( << "Partition type must be specified and be a string."; } - const PtrList<ParseString> &attribute_name_list = partition_clause->attribute_name_list(); - if (attribute_name_list.size() != 1) { - THROW_SQL_ERROR_AT(partition_clause) - << "Partition is supported on only one attribute."; - } - - const ParseString &partition_attribute_name = attribute_name_list.front(); - const attribute_id attr_id = GetAttributeIdFromName(create_table_statement.attribute_definition_list(), - partition_attribute_name.value()); - if (attr_id == kInvalidAttributeID) { - THROW_SQL_ERROR_AT(&partition_attribute_name) - << "The given attribute was not found."; - } - auto proto = make_unique<S::PartitionSchemeHeader>(); - proto->set_num_partitions(partition_clause->num_partitions()->long_value()); - proto->set_partition_attribute_id(attr_id); const std::string partition_type = ToLower(partition_type_string->value()); if (partition_type == kHashPartitionType) { @@ -724,6 +708,25 @@ const S::PartitionSchemeHeader* Resolver::resolvePartitionClause( THROW_SQL_ERROR_AT(partition_type_string) << "Unrecognized partition type: " << partition_type; } + proto->set_num_partitions(partition_clause->num_partitions()->long_value()); + + std::unordered_set<attribute_id> unique_partition_attrs; + for (const ParseString &partition_attribute_name : partition_clause->attribute_name_list()) { + const attribute_id attr_id = GetAttributeIdFromName(create_table_statement.attribute_definition_list(), + partition_attribute_name.value()); + if (attr_id == kInvalidAttributeID) { + THROW_SQL_ERROR_AT(&partition_attribute_name) + << "The given attribute was not found."; + } else if (unique_partition_attrs.find(attr_id) != unique_partition_attrs.end()) { + THROW_SQL_ERROR_AT(&partition_attribute_name) + << "A duplicate partition attribute was found."; + } + + unique_partition_attrs.insert(attr_id); + + proto->add_partition_attribute_ids(attr_id); + } + return proto.release(); } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/query_optimizer/tests/execution_generator/Create.test ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/execution_generator/Create.test b/query_optimizer/tests/execution_generator/Create.test index 4ffa665..b3ef724 100644 --- a/query_optimizer/tests/execution_generator/Create.test +++ b/query_optimizer/tests/execution_generator/Create.test @@ -49,3 +49,20 @@ SELECT * FROM foo3; |attr | +-----------+ +-----------+ +== + +CREATE TABLE foo4 (attr INT) PARTITION BY HASH(attr, attr) PARTITIONS 4; +-- +ERROR: A duplicate partition attribute was found. (1 : 54) +...foo4 (attr INT) PARTITION BY HASH(attr, attr) PARTITIONS 4; + ^ +== + +CREATE TABLE foo5 (attr1 INT, attr2 LONG, attr3 FLOAT, attr4 DOUBLE, attr5 CHAR(5), attr6 VARCHAR(4)) +PARTITION BY HASH(attr1, attr2, attr3, attr4, attr5, attr6) PARTITIONS 4; +SELECT * FROM foo5; +-- ++-----------+--------------------+---------------+------------------------+-----+-----+ +|attr1 |attr2 |attr3 |attr4 |attr5|attr6| ++-----------+--------------------+---------------+------------------------+-----+-----+ ++-----------+--------------------+---------------+------------------------+-----+-----+ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/query_optimizer/tests/logical_generator/Create.test ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/logical_generator/Create.test b/query_optimizer/tests/logical_generator/Create.test index 04134f9..aac49fb 100644 --- a/query_optimizer/tests/logical_generator/Create.test +++ b/query_optimizer/tests/logical_generator/Create.test @@ -56,3 +56,24 @@ TopLevelPlan | +-AttributeReference[id=0,name=attr,relation=foo,type=Int] +-output_attributes= +-AttributeReference[id=0,name=attr,relation=foo,type=Int] +== + +CREATE TABLE foo (attr1 INT, attr2 LONG, attr3 FLOAT, attr4 DOUBLE, attr5 CHAR(5), attr6 VARCHAR(4)) +PARTITION BY HASH(attr1, attr2, attr3, attr4, attr5, attr6) PARTITIONS 4; +-- +TopLevelPlan ++-plan=CreateTable[relation=foo] +| +-attributes= +| +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] +| +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] +| +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] +| +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] +| +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] +| +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] ++-output_attributes= + +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] + +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] + +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] + +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] + +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] + +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/query_optimizer/tests/physical_generator/Create.test ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/physical_generator/Create.test b/query_optimizer/tests/physical_generator/Create.test index c555371..161cc00 100644 --- a/query_optimizer/tests/physical_generator/Create.test +++ b/query_optimizer/tests/physical_generator/Create.test @@ -133,3 +133,42 @@ TopLevelPlan | +-AttributeReference[id=0,name=attr,relation=foo,type=Int] +-output_attributes= +-AttributeReference[id=0,name=attr,relation=foo,type=Int] +== + +CREATE TABLE foo (attr1 INT, attr2 LONG, attr3 FLOAT, attr4 DOUBLE, attr5 CHAR(5), attr6 VARCHAR(4)) +PARTITION BY HASH(attr1, attr2, attr3, attr4, attr5, attr6) PARTITIONS 4; +-- +[Optimized Logical Plan] +TopLevelPlan ++-plan=CreateTable[relation=foo] +| +-attributes= +| +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] +| +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] +| +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] +| +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] +| +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] +| +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] ++-output_attributes= + +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] + +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] + +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] + +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] + +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] + +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] +[Physical Plan] +TopLevelPlan ++-plan=CreateTable[relation=foo] +| +-attributes= +| +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] +| +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] +| +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] +| +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] +| +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] +| +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] ++-output_attributes= + +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] + +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] + +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] + +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] + +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] + +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/query_optimizer/tests/resolver/Create.test ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/resolver/Create.test b/query_optimizer/tests/resolver/Create.test index 28bd4f5..1372cf4 100644 --- a/query_optimizer/tests/resolver/Create.test +++ b/query_optimizer/tests/resolver/Create.test @@ -239,3 +239,24 @@ TopLevelPlan | +-AttributeReference[id=0,name=attr,relation=foo,type=Int] +-output_attributes= +-AttributeReference[id=0,name=attr,relation=foo,type=Int] +== + +CREATE TABLE foo (attr1 INT, attr2 LONG, attr3 FLOAT, attr4 DOUBLE, attr5 CHAR(5), attr6 VARCHAR(4)) +PARTITION BY HASH(attr1, attr2, attr3, attr4, attr5, attr6) PARTITIONS 4; +-- +TopLevelPlan ++-plan=CreateTable[relation=foo] +| +-attributes= +| +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] +| +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] +| +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] +| +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] +| +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] +| +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] ++-output_attributes= + +-AttributeReference[id=0,name=attr1,relation=foo,type=Int] + +-AttributeReference[id=1,name=attr2,relation=foo,type=Long] + +-AttributeReference[id=2,name=attr3,relation=foo,type=Float] + +-AttributeReference[id=3,name=attr4,relation=foo,type=Double] + +-AttributeReference[id=4,name=attr5,relation=foo,type=Char(5)] + +-AttributeReference[id=5,name=attr6,relation=foo,type=VarChar(4)] http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/relational_operators/tests/HashJoinOperator_unittest.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/tests/HashJoinOperator_unittest.cpp b/relational_operators/tests/HashJoinOperator_unittest.cpp index 03350d4..8338872 100644 --- a/relational_operators/tests/HashJoinOperator_unittest.cpp +++ b/relational_operators/tests/HashJoinOperator_unittest.cpp @@ -269,11 +269,11 @@ class HashJoinOperatorTest : public ::testing::TestWithParam<HashTableImplType> void insertTuplesWithSingleAttributePartitions() { // Set PartitionScheme. dim_part_scheme_ = new PartitionScheme( - new HashPartitionSchemeHeader(kMultiplePartitions, dim_table_->getAttributeByName("long")->getID())); + new HashPartitionSchemeHeader(kMultiplePartitions, { dim_table_->getAttributeByName("long")->getID() })); dim_table_->setPartitionScheme(dim_part_scheme_); fact_part_scheme_ = new PartitionScheme( - new HashPartitionSchemeHeader(kMultiplePartitions, fact_table_->getAttributeByName("long")->getID())); + new HashPartitionSchemeHeader(kMultiplePartitions, { fact_table_->getAttributeByName("long")->getID() })); fact_table_->setPartitionScheme(fact_part_scheme_); // Create StorageLayout http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/storage/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt index 6fe6436..cb1f098 100644 --- a/storage/CMakeLists.txt +++ b/storage/CMakeLists.txt @@ -779,6 +779,7 @@ target_link_libraries(quickstep_storage_InsertDestination tmb) target_link_libraries(quickstep_storage_InsertDestinationInterface quickstep_catalog_CatalogTypedefs + quickstep_catalog_PartitionSchemeHeader quickstep_types_containers_Tuple) target_link_libraries(quickstep_storage_InsertDestination_proto quickstep_catalog_Catalog_proto http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/storage/InsertDestination.cpp ---------------------------------------------------------------------- diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp index 75e1217..b6a9e3a 100644 --- a/storage/InsertDestination.cpp +++ b/storage/InsertDestination.cpp @@ -560,14 +560,16 @@ const std::vector<block_id>& PartitionAwareInsertDestination::getTouchedBlocksIn return done_block_ids_[part_id]; } -attribute_id PartitionAwareInsertDestination::getPartitioningAttribute() const { - return partition_scheme_header_->getPartitionAttributeId(); +PartitionSchemeHeader::PartitionAttributeIds PartitionAwareInsertDestination::getPartitioningAttributes() const { + return partition_scheme_header_->getPartitionAttributeIds(); } void PartitionAwareInsertDestination::insertTuple(const Tuple &tuple) { - const partition_id part_id = - partition_scheme_header_->getPartitionId( - tuple.getAttributeValue(partition_scheme_header_->getPartitionAttributeId())); + PartitionSchemeHeader::PartitionValues values; + for (const attribute_id attr_id : partition_scheme_header_->getPartitionAttributeIds()) { + values.push_back(tuple.getAttributeValue(attr_id)); + } + const partition_id part_id = partition_scheme_header_->getPartitionId(values); MutableBlockReference output_block = getBlockForInsertionInPartition(part_id); @@ -586,9 +588,11 @@ void PartitionAwareInsertDestination::insertTuple(const Tuple &tuple) { } void PartitionAwareInsertDestination::insertTupleInBatch(const Tuple &tuple) { - const partition_id part_id = - partition_scheme_header_->getPartitionId( - tuple.getAttributeValue(partition_scheme_header_->getPartitionAttributeId())); + PartitionSchemeHeader::PartitionValues values; + for (const attribute_id attr_id : partition_scheme_header_->getPartitionAttributeIds()) { + values.push_back(tuple.getAttributeValue(attr_id)); + } + const partition_id part_id = partition_scheme_header_->getPartitionId(values); MutableBlockReference output_block = getBlockForInsertionInPartition(part_id); @@ -608,12 +612,10 @@ void PartitionAwareInsertDestination::insertTupleInBatch(const Tuple &tuple) { void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor, bool always_mark_full) { const std::size_t num_partitions = partition_scheme_header_->getNumPartitions(); - const attribute_id partition_attribute_id = partition_scheme_header_->getPartitionAttributeId(); InvokeOnAnyValueAccessor( accessor, [this, - &partition_attribute_id, &always_mark_full, &num_partitions](auto *accessor) -> void { // NOLINT(build/c++11) std::vector<std::unique_ptr<TupleIdSequence>> partition_membership; @@ -627,8 +629,11 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor, // set a bit in the appropriate TupleIdSequence. accessor->beginIteration(); while (accessor->next()) { - TypedValue attr_val = accessor->getTypedValue(partition_attribute_id); - partition_membership[partition_scheme_header_->getPartitionId(attr_val)] + PartitionSchemeHeader::PartitionValues values; + for (const attribute_id attr_id : partition_scheme_header_->getPartitionAttributeIds()) { + values.push_back(accessor->getTypedValue(attr_id)); + } + partition_membership[partition_scheme_header_->getPartitionId(values)] ->set(accessor->getCurrentPosition(), true); } @@ -662,12 +667,10 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor, void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes( const std::vector<attribute_id> &attribute_map, ValueAccessor *accessor, bool always_mark_full) { const std::size_t num_partitions = partition_scheme_header_->getNumPartitions(); - const attribute_id partition_attribute_id = partition_scheme_header_->getPartitionAttributeId(); InvokeOnAnyValueAccessor( accessor, [this, - &partition_attribute_id, &attribute_map, &always_mark_full, &num_partitions](auto *accessor) -> void { // NOLINT(build/c++11) @@ -682,8 +685,11 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes( // set a bit in the appropriate TupleIdSequence. accessor->beginIteration(); while (accessor->next()) { - TypedValue attr_val = accessor->getTypedValue(attribute_map[partition_attribute_id]); - partition_membership[partition_scheme_header_->getPartitionId(attr_val)] + PartitionSchemeHeader::PartitionValues values; + for (const attribute_id attr_id : partition_scheme_header_->getPartitionAttributeIds()) { + values.push_back(accessor->getTypedValue(attr_id)); + } + partition_membership[partition_scheme_header_->getPartitionId(values)] ->set(accessor->getCurrentPosition(), true); } @@ -720,10 +726,14 @@ void PartitionAwareInsertDestination::insertTuplesFromVector(std::vector<Tuple>: return; } - const attribute_id partition_attribute_id = partition_scheme_header_->getPartitionAttributeId(); for (; begin != end; ++begin) { - const partition_id part_id = - partition_scheme_header_->getPartitionId(begin->getAttributeValue(partition_attribute_id)); + PartitionSchemeHeader::PartitionValues values; + for (const attribute_id attr_id : partition_scheme_header_->getPartitionAttributeIds()) { + values.push_back(begin->getAttributeValue(attr_id)); + } + + const partition_id part_id = partition_scheme_header_->getPartitionId(values); + MutableBlockReference dest_block = getBlockForInsertionInPartition(part_id); // FIXME(chasseur): Deal with TupleTooLargeForBlock exception. while (!dest_block->insertTupleInBatch(*begin)) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/storage/InsertDestination.hpp ---------------------------------------------------------------------- diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp index e9335ce..dc5a093 100644 --- a/storage/InsertDestination.hpp +++ b/storage/InsertDestination.hpp @@ -137,8 +137,8 @@ class InsertDestination : public InsertDestinationInterface { return relation_; } - attribute_id getPartitioningAttribute() const override { - return -1; + PartitionSchemeHeader::PartitionAttributeIds getPartitioningAttributes() const override { + return {}; } void insertTuple(const Tuple &tuple) override; @@ -515,7 +515,7 @@ class PartitionAwareInsertDestination : public InsertDestination { available_block_refs_[part_id].clear(); } - attribute_id getPartitioningAttribute() const override; + PartitionSchemeHeader::PartitionAttributeIds getPartitioningAttributes() const override; void insertTuple(const Tuple &tuple) override; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/storage/InsertDestinationInterface.hpp ---------------------------------------------------------------------- diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp index be6b0c2..b8c584b 100644 --- a/storage/InsertDestinationInterface.hpp +++ b/storage/InsertDestinationInterface.hpp @@ -24,6 +24,7 @@ #include <vector> #include "catalog/CatalogTypedefs.hpp" +#include "catalog/PartitionSchemeHeader.hpp" #include "types/containers/Tuple.hpp" namespace quickstep { @@ -58,15 +59,15 @@ class InsertDestinationInterface { virtual const CatalogRelationSchema& getRelation() const = 0; /** - * @brief Get the attribute ID used for partitioning, if any. + * @brief Get the attribute IDs used for partitioning, if any. * @note This is intended only for use by StorageBlock::update(), to * determine whether it is safe to do in-place updates or whether all * updated Tuples must be relocated to land in the correct partition. * - * @return The ID of the attribute used for partitioning, or -1 if there is - * no partitioning. + * @return The IDs of the attribute used for partitioning, or empty if there + * is no partitioning. **/ - virtual attribute_id getPartitioningAttribute() const = 0; + virtual PartitionSchemeHeader::PartitionAttributeIds getPartitioningAttributes() const = 0; /** * @brief Insert a single tuple into a block managed by this http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/05b47b5a/storage/StorageBlock.cpp ---------------------------------------------------------------------- diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp index 0cc7735..e91c1ac 100644 --- a/storage/StorageBlock.cpp +++ b/storage/StorageBlock.cpp @@ -429,8 +429,7 @@ StorageBlock::UpdateResult StorageBlock::update( // To be safe, relocate ALL tuples if the relation is partitioned and we are // updating the partitioning attribute. - const bool relocate_all = - assignments.find(relocation_destination->getPartitioningAttribute()) != assignments.end(); + const bool relocate_all = !relocation_destination->getPartitioningAttributes().empty(); // IDs of tuples which should be re-added to indices. TupleIdSequence in_place_ids(tuple_store_->getMaxTupleID() + 1);