Adds support for PartialBulkInserts in StorageBlocks - Enables use of PartialBulkInserts in StorageBlocks - Value accessor changes to allow use of 2 insert destinations - Enables PartialInserts for SplitRow - Changes HashJoin operator so that it can take advantage of the PartialInserts code. - This also cleans up code from Previous commit.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2d11ec58 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2d11ec58 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2d11ec58 Branch: refs/heads/min-max-stats Commit: 2d11ec588e7e8d7a7a1a8adfc28fb30e5fe0852e Parents: 172b51b Author: navsan <nav...@gmail.com> Authored: Mon Nov 7 14:35:01 2016 -0600 Committer: cramja <marc.spehlm...@gmail.com> Committed: Mon Nov 21 14:28:02 2016 -0600 ---------------------------------------------------------------------- relational_operators/HashJoinOperator.cpp | 150 ++++++++++++++++--- storage/InsertDestination.cpp | 84 +++++++++++ storage/InsertDestination.hpp | 16 ++ storage/InsertDestinationInterface.hpp | 22 +++ storage/SplitRowStoreTupleStorageSubBlock.hpp | 4 +- storage/StorageBlock.cpp | 24 +++ storage/StorageBlock.hpp | 44 ++++++ storage/TupleStorageSubBlock.hpp | 50 +++++++ types/containers/ColumnVectorsValueAccessor.hpp | 4 + 9 files changed, 373 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/relational_operators/HashJoinOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp index 4a91f86..2028046 100644 --- a/relational_operators/HashJoinOperator.cpp +++ b/relational_operators/HashJoinOperator.cpp @@ -65,10 +65,11 @@ namespace { // Functor passed to HashTable::getAllFromValueAccessor() to collect matching // tuples from the inner relation. It stores matching tuple ID pairs -// in an unordered_map keyed by inner block ID. -class MapBasedJoinedTupleCollector { +// in an unordered_map keyed by inner block ID and a vector of +// pairs of (build-tupleID, probe-tuple-ID). +class VectorsOfPairsJoinedTuplesCollector { public: - MapBasedJoinedTupleCollector() { + VectorsOfPairsJoinedTuplesCollector() { } template <typename ValueAccessorT> @@ -95,6 +96,34 @@ class MapBasedJoinedTupleCollector { std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> joined_tuples_; }; +// Another collector using an unordered_map keyed on inner block just like above, +// except that it uses of a pair of (build-tupleIDs-vector, probe-tuple-IDs-vector). +class PairsOfVectorsJoinedTuplesCollector { + public: + PairsOfVectorsJoinedTuplesCollector() { + } + + template <typename ValueAccessorT> + inline void operator()(const ValueAccessorT &accessor, + const TupleReference &tref) { + joined_tuples_[tref.block].first.push_back(tref.tuple); + joined_tuples_[tref.block].second.push_back(accessor.getCurrentPosition()); + } + + // Get a mutable pointer to the collected map of joined tuple ID pairs. The + // key is inner block_id, value is a pair consisting of + // inner block tuple IDs (first) and outer block tuple IDs (second). + inline std::unordered_map< block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>* + getJoinedTuples() { + return &joined_tuples_; + } + + private: + std::unordered_map< + block_id, + std::pair<std::vector<tuple_id>, std::vector<tuple_id>>> joined_tuples_; +}; + class SemiAntiJoinTupleCollector { public: explicit SemiAntiJoinTupleCollector(TupleIdSequence *filter) @@ -432,7 +461,7 @@ void HashInnerJoinWorkOrder::execute() { base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map)); } - MapBasedJoinedTupleCollector collector; + PairsOfVectorsJoinedTuplesCollector collector; if (join_key_attributes_.size() == 1) { hash_table_.getAllFromValueAccessor( probe_accessor.get(), @@ -450,12 +479,14 @@ void HashInnerJoinWorkOrder::execute() { const relation_id build_relation_id = build_relation_.getID(); const relation_id probe_relation_id = probe_relation_.getID(); - for (std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>> + for (std::pair<const block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>> &build_block_entry : *collector.getJoinedTuples()) { BlockReference build_block = storage_manager_->getBlock(build_block_entry.first, build_relation_); const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock(); std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor()); + const std::vector<tuple_id> &build_tids = build_block_entry.second.first; + const std::vector<tuple_id> &probe_tids = build_block_entry.second.second; // Evaluate '*residual_predicate_', if any. // @@ -468,17 +499,16 @@ void HashInnerJoinWorkOrder::execute() { // hash join is below a reasonable threshold so that we don't blow up // temporary memory requirements to an unreasonable degree. if (residual_predicate_ != nullptr) { - std::vector<std::pair<tuple_id, tuple_id>> filtered_matches; - - for (const std::pair<tuple_id, tuple_id> &hash_match - : build_block_entry.second) { + std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches; + for (std::size_t i = 0; i < build_tids.size(); ++i) { if (residual_predicate_->matchesForJoinedTuples(*build_accessor, build_relation_id, - hash_match.first, + build_tids[i], *probe_accessor, probe_relation_id, - hash_match.second)) { - filtered_matches.emplace_back(hash_match); + probe_tids[i])) { + filtered_matches.first.push_back(build_tids[i]); + filtered_matches.second.push_back(probe_tids[i]); } } @@ -501,22 +531,96 @@ void HashInnerJoinWorkOrder::execute() { // benefit (probably only a real performance win when there are very few // matching tuples in each individual inner block but very many inner // blocks with at least one match). + + // We now create ordered value accessors for both build and probe side, + // using the joined tuple TIDs. Note that we have to use this Lambda-based + // invocation method here because the accessors don't have a virtual + // function that creates such an OrderedTupleIdSequenceAdapterValueAccessor. + std::unique_ptr<ValueAccessor> ordered_build_accessor, ordered_probe_accessor; + InvokeOnValueAccessorNotAdapter( + build_accessor.get(), + [&](auto *accessor) -> void { // NOLINT(build/c++11) + ordered_build_accessor.reset( + accessor->createSharedOrderedTupleIdSequenceAdapter(build_tids)); + }); + + if (probe_accessor->isTupleIdSequenceAdapter()) { + InvokeOnTupleIdSequenceAdapterValueAccessor( + probe_accessor.get(), + [&](auto *accessor) -> void { // NOLINT(build/c++11) + ordered_probe_accessor.reset( + accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids)); + }); + } else { + InvokeOnValueAccessorNotAdapter( + probe_accessor.get(), + [&](auto *accessor) -> void { // NOLINT(build/c++11) + ordered_probe_accessor.reset( + accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids)); + }); + } + + + // We also need a temp value accessor to store results of any scalar expressions. ColumnVectorsValueAccessor temp_result; - for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin(); - selection_cit != selection_.end(); - ++selection_cit) { - temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id, - build_accessor.get(), - probe_relation_id, - probe_accessor.get(), - build_block_entry.second)); + + // Create a map of ValueAccessors and what attributes we want to pick from them + std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map; + const std::vector<ValueAccessor *> accessors{ + ordered_build_accessor.get(), ordered_probe_accessor.get(), &temp_result}; + const unsigned int build_index = 0, probe_index = 1, temp_index = 2; + for (auto &accessor : accessors) { + accessor_attribute_map.push_back(std::make_pair( + accessor, + std::vector<attribute_id>(selection_.size(), kInvalidCatalogId))); + } + + attribute_id dest_attr = 0; + std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids; + + for (auto &selection_cit : selection_) { + // If the Scalar (column) is not an attribute in build/probe blocks, then + // insert it into a ColumnVectorsValueAccessor. + if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) { + // Current destination attribute maps to the column we'll create now. + accessor_attribute_map[temp_index].second[dest_attr] = temp_result.getNumColumns(); + + if (temp_result.getNumColumns() == 0) { + // The getAllValuesForJoin function below needs joined tuple IDs as + // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have + // a pair of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So + // we'll have to zip our two vectors together. We do this inside + // the loop because most queries don't exercise this code since + // they don't have scalar expressions with attributes from both + // build and probe relations (other expressions would have been + // pushed down to before the join). + zipped_joined_tuple_ids.reserve(build_tids.size()); + for (std::size_t i = 0; i < build_tids.size(); ++i) { + zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], probe_tids[i])); + } + } + temp_result.addColumn( + selection_cit + ->getAllValuesForJoin(build_relation_id, build_accessor.get(), + probe_relation_id, probe_accessor.get(), + zipped_joined_tuple_ids)); + } else { + auto scalar_attr = static_cast<const ScalarAttribute *>(selection_cit.get()); + const attribute_id attr_id = scalar_attr->getAttribute().getID(); + if (scalar_attr->getAttribute().getParent().getID() == build_relation_id) { + accessor_attribute_map[build_index].second[dest_attr] = attr_id; + } else { + accessor_attribute_map[probe_index].second[dest_attr] = attr_id; + } + } + ++dest_attr; } // NOTE(chasseur): calling the bulk-insert method of InsertDestination once // for each pair of joined blocks incurs some extra overhead that could be // avoided by keeping checked-out MutableBlockReferences across iterations // of this loop, but that would get messy when combined with partitioning. - output_destination_->bulkInsertTuples(&temp_result); + output_destination_->bulkInsertTuplesFromValueAccessors(accessor_attribute_map); } } @@ -550,7 +654,7 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() { // We collect all the matching probe relation tuples, as there's a residual // preidcate that needs to be applied after collecting these matches. - MapBasedJoinedTupleCollector collector; + VectorsOfPairsJoinedTuplesCollector collector; if (join_key_attributes_.size() == 1) { hash_table_.getAllFromValueAccessor( probe_accessor.get(), @@ -759,7 +863,7 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() { base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map)); } - MapBasedJoinedTupleCollector collector; + VectorsOfPairsJoinedTuplesCollector collector; // We probe the hash table and get all the matches. Unlike // executeWithoutResidualPredicate(), we have to collect all the matching // tuples, because after this step we still have to evalute the residual http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestination.cpp ---------------------------------------------------------------------- diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp index 5e83453..067edf6 100644 --- a/storage/InsertDestination.cpp +++ b/storage/InsertDestination.cpp @@ -247,6 +247,90 @@ void InsertDestination::bulkInsertTuplesWithRemappedAttributes( }); } +// A common case that we can optimize away is when the attribute_map +// for an accessor only contains gaps. e.g. This happens for a join when +// there are no attributes selected from one side. +void removeGapOnlyAccessors( + const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>>* accessor_attribute_map, + std::vector<std::pair<ValueAccessor *, const std::vector<attribute_id>>>* reduced_accessor_attribute_map) { + for (std::size_t i = 0; i < accessor_attribute_map->size(); ++i) { + bool all_gaps = true; + for (const auto &attr : (*accessor_attribute_map)[i].second) + if (attr != kInvalidCatalogId) { + all_gaps = false; + break; + } + if (all_gaps) + continue; + reduced_accessor_attribute_map->push_back((*accessor_attribute_map)[i]); + (*accessor_attribute_map)[i].first->beginIterationVirtual(); + } +} + +void InsertDestination::bulkInsertTuplesFromValueAccessors( + const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map, + bool always_mark_full) { + // Handle pathological corner case where there are no accessors + if (accessor_attribute_map.size() == 0) + return; + + std::vector<std::pair<ValueAccessor *, const std::vector<attribute_id>>> reduced_accessor_attribute_map; + removeGapOnlyAccessors(&accessor_attribute_map, &reduced_accessor_attribute_map); + + // We assume that all input accessors have the same number of tuples, so + // the iterations finish together. Therefore, we can just check the first one. + auto first_accessor = reduced_accessor_attribute_map[0].first; + while (!first_accessor->iterationFinishedVirtual()) { + tuple_id num_tuples_to_insert = kCatalogMaxID; + tuple_id num_tuples_inserted = 0; + MutableBlockReference output_block = this->getBlockForInsertion(); + + // Now iterate through all the accessors and do one round of bulk-insertion + // of partial tuples into the selected output_block. + // While inserting from the first ValueAccessor, space is reserved for + // all the columns including those coming from other ValueAccessors. + // Thereafter, in a given round, we only insert the remaining columns of the + // same tuples from the other ValueAccessors. + for (auto &p : reduced_accessor_attribute_map) { + ValueAccessor *accessor = p.first; + std::vector<attribute_id> attribute_map = p.second; + + + InvokeOnAnyValueAccessor( + accessor, + [&](auto *accessor) -> void { // NOLINT(build/c++11) + num_tuples_inserted = output_block->bulkInsertPartialTuples( + attribute_map, accessor, num_tuples_to_insert); + }); + + if (accessor == first_accessor) { + // Now we know how many full tuples can be inserted into this + // output_block (viz. number of tuples inserted from first ValueAccessor). + // We should only insert that many tuples from the remaining + // ValueAccessors as well. + num_tuples_to_insert = num_tuples_inserted; + } else { + // Since the bulk insertion of the first ValueAccessor should already + // have reserved the space for all the other ValueAccessors' columns, + // we must have been able to insert all the tuples we asked to insert. + DCHECK(num_tuples_inserted == num_tuples_to_insert); + } + } + + // After one round of insertions, we have successfully inserted as many + // tuples as possible into the output_block. Strictly speaking, it's + // possible that there is more space for insertions because the size + // estimation of variable length columns is conservative. But we will ignore + // that case and proceed assuming that this output_block is full. + + // Update the header for output_block and then return it. + output_block->bulkInsertPartialTuplesFinalize(num_tuples_inserted); + const bool mark_full = always_mark_full + || !first_accessor->iterationFinishedVirtual(); + this->returnBlock(std::move(output_block), mark_full); + } +} + void InsertDestination::insertTuplesFromVector(std::vector<Tuple>::const_iterator begin, std::vector<Tuple>::const_iterator end) { if (begin == end) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestination.hpp ---------------------------------------------------------------------- diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp index 408e76b..3487638 100644 --- a/storage/InsertDestination.hpp +++ b/storage/InsertDestination.hpp @@ -152,6 +152,10 @@ class InsertDestination : public InsertDestinationInterface { ValueAccessor *accessor, bool always_mark_full = false) override; + void bulkInsertTuplesFromValueAccessors( + const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map, + bool always_mark_full = false) override; + void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin, std::vector<Tuple>::const_iterator end) override; @@ -313,6 +317,12 @@ class AlwaysCreateBlockInsertDestination : public InsertDestination { ~AlwaysCreateBlockInsertDestination() override { } + void bulkInsertTuplesFromValueAccessors( + const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map, + bool always_mark_full = false) override { + LOG(FATAL) << "bulkInsertTuplesFromValueAccessors is not implemented for AlwaysCreateBlockInsertDestination"; + } + protected: MutableBlockReference getBlockForInsertion() override; @@ -517,6 +527,12 @@ class PartitionAwareInsertDestination : public InsertDestination { ValueAccessor *accessor, bool always_mark_full = false) override; + void bulkInsertTuplesFromValueAccessors( + const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map, + bool always_mark_full = false) override { + LOG(FATAL) << "bulkInsertTuplesFromValueAccessors is not implemented for PartitionAwareInsertDestination"; + } + void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin, std::vector<Tuple>::const_iterator end) override; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestinationInterface.hpp ---------------------------------------------------------------------- diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp index 423dff1..b62d3e5 100644 --- a/storage/InsertDestinationInterface.hpp +++ b/storage/InsertDestinationInterface.hpp @@ -20,6 +20,7 @@ #ifndef QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_ #define QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_ +#include <utility> #include <vector> #include "catalog/CatalogTypedefs.hpp" @@ -122,6 +123,27 @@ class InsertDestinationInterface { bool always_mark_full = false) = 0; /** + * @brief Bulk-insert tuples from one or more ValueAccessors + * into blocks managed by this InsertDestination. + * + * @warning It is implicitly assumed that all the input ValueAccessors have + * the same number of tuples in them. + * + * @param accessor_attribute_map A vector of pairs of ValueAccessor and + * corresponding attribute map + * The i-th attribute ID in the attr map for a value accessor is "n" + * if the attribute_id "i" in the output relation + * is the attribute_id "n" in corresponding input value accessor. + * Set the i-th element to kInvalidCatalogId if it doesn't come from + * the corresponding value accessor. + * @param always_mark_full If \c true, always mark the blocks full after + * insertion from ValueAccessor even when partially full. + **/ + virtual void bulkInsertTuplesFromValueAccessors( + const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map, + bool always_mark_full = false) = 0; + + /** * @brief Insert tuples from a range of Tuples in a vector. * @warning Unlike bulkInsertTuples(), this is not well-optimized and not * intended for general use. It should only be used by http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/SplitRowStoreTupleStorageSubBlock.hpp ---------------------------------------------------------------------- diff --git a/storage/SplitRowStoreTupleStorageSubBlock.hpp b/storage/SplitRowStoreTupleStorageSubBlock.hpp index 681001e..89c756d 100644 --- a/storage/SplitRowStoreTupleStorageSubBlock.hpp +++ b/storage/SplitRowStoreTupleStorageSubBlock.hpp @@ -304,9 +304,9 @@ class SplitRowStoreTupleStorageSubBlock: public TupleStorageSubBlock { tuple_id bulkInsertPartialTuples( const std::vector<attribute_id> &attribute_map, ValueAccessor *accessor, - const tuple_id max_num_tuples_to_insert); + const tuple_id max_num_tuples_to_insert) override; - void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted); + void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted) override; const void* getAttributeValue(const tuple_id tuple, const attribute_id attr) const override; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/StorageBlock.cpp ---------------------------------------------------------------------- diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp index ea74ee6..6267d6b 100644 --- a/storage/StorageBlock.cpp +++ b/storage/StorageBlock.cpp @@ -284,6 +284,30 @@ tuple_id StorageBlock::bulkInsertTuplesWithRemappedAttributes( return num_inserted; } +tuple_id StorageBlock::bulkInsertPartialTuples( + const std::vector<attribute_id> &attribute_map, + ValueAccessor *accessor, + const tuple_id max_num_tuples_to_insert) { + const tuple_id num_inserted + = tuple_store_->bulkInsertPartialTuples(attribute_map, + accessor, + max_num_tuples_to_insert); + if (num_inserted != 0) { + invalidateAllIndexes(); + dirty_ = true; + } else if (tuple_store_->isEmpty()) { + if (!accessor->iterationFinishedVirtual()) { + throw TupleTooLargeForBlock(0); + } + } + return num_inserted; +} + +void StorageBlock::bulkInsertPartialTuplesFinalize( + const tuple_id num_tuples_inserted) { + tuple_store_->bulkInsertPartialTuplesFinalize(num_tuples_inserted); +} + void StorageBlock::sample(const bool is_block_sample, const int percentage, InsertDestinationInterface *destination) const { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/StorageBlock.hpp ---------------------------------------------------------------------- diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp index 56b3bdc..ed252c5 100644 --- a/storage/StorageBlock.hpp +++ b/storage/StorageBlock.hpp @@ -307,6 +307,7 @@ class StorageBlock : public StorageBlockBase { * iteration will be advanced to the first non-inserted tuple or, if * all accessible tuples were inserted in this block, to the end * position. + * @param max_tuples_to_insert Insert at most these many tuples * @return The number of tuples inserted from accessor. **/ tuple_id bulkInsertTuplesWithRemappedAttributes( @@ -314,6 +315,49 @@ class StorageBlock : public StorageBlockBase { ValueAccessor *accessor); /** + * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor + * as a single batch, using the attribute_map to project and reorder + * columns from the input ValueAccessor. Does not update header. + * + * @note Typical usage is where you want to bulk-insert columns from two + * or more value accessors. Instead of writing out the columns into + * one or more column vector value accessors, you can simply use this + * function with the appropriate attribute_map for each value + * accessor (InsertDestination::bulkInsertTuplesFromValueAccessors + * handles all the details) to insert tuples without an extra temp copy. + * + * @warning Must call bulkInsertPartialTuplesFinalize() to update the header, + * until which point, the insertion is not visible to others. + * @warning The inserted tuples may be placed in sub-optimal locations in this + * TupleStorageSubBlock. + * + * @param attribute_map A vector which maps the attributes of this + * TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId) + * to the corresponding attributes which should be read from accessor. + * @param accessor A ValueAccessor to insert tuples from. The accessor's + * iteration will be advanced to the first non-inserted tuple or, if + * all accessible tuples were inserted in this sub-block, to the end + * position. + * @return The number of tuples inserted from accessor. + **/ + tuple_id bulkInsertPartialTuples( + const std::vector<attribute_id> &attribute_map, + ValueAccessor *accessor, + const tuple_id max_num_tuples_to_insert); + + /** + * @brief Update header after a bulkInsertPartialTuples. + * + * @warning Only call this after a bulkInsertPartialTuples, passing in the + * number of tuples that were inserted (return value of that function). + * + * @param num_tuples_inserted Number of tuples inserted (i.e., how much to + * advance the header.num_tuples by). Should be equal to the return + * value of bulkInsertPartialTuples. + **/ + void bulkInsertPartialTuplesFinalize(tuple_id num_tuples_inserted); + + /** * @brief Get the IDs of tuples in this StorageBlock which match a given Predicate. * * @param predicate The predicate to match. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/TupleStorageSubBlock.hpp ---------------------------------------------------------------------- diff --git a/storage/TupleStorageSubBlock.hpp b/storage/TupleStorageSubBlock.hpp index aed6eea..26e8027 100644 --- a/storage/TupleStorageSubBlock.hpp +++ b/storage/TupleStorageSubBlock.hpp @@ -272,6 +272,56 @@ class TupleStorageSubBlock { ValueAccessor *accessor) = 0; /** + * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor + * as a single batch, using the attribute_map to project and reorder + * columns from the input ValueAccessor. Does not update header. + * + * @note Typical usage is where you want to bulk-insert columns from two + * or more value accessors. Instead of writing out the columns into + * one or more column vector value accessors, you can simply use this + * function with the appropriate attribute_map for each value + * accessor (InsertDestination::bulkInsertTuplesFromValueAccessors + * handles all the details) to insert tuples without an extra temp copy. + * + * @warning Must call bulkInsertPartialTuplesFinalize() to update the header, + * until which point, the insertion is not visible to others. + * @warning The inserted tuples may be placed in a suboptimal position in the + * block. + * + * @param attribute_map A vector which maps the attributes of this + * TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId) + * to the corresponding attributes which should be read from accessor. + * @param accessor A ValueAccessor to insert tuples from. The accessor's + * iteration will be advanced to the first non-inserted tuple or, if + * all accessible tuples were inserted in this sub-block, to the end + * position. + * @return The number of tuples inserted from accessor. + **/ + virtual tuple_id bulkInsertPartialTuples( + const std::vector<attribute_id> &attribute_map, + ValueAccessor *accessor, + const tuple_id max_num_tuples_to_insert) { + LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type (" + << getTupleStorageSubBlockType() << ")."; + } + + /** + * @brief Update header after a bulkInsertPartialTuples. + * + * @warning Only call this after a bulkInsertPartialTuples, passing in the + * number of tuples that were inserted (return value of that function). + * + * @param num_tuples_inserted Number of tuples inserted (i.e., how much to + * advance the header.num_tuples by). Should be equal to the return + * value of bulkInsertPartialTuples. + **/ + virtual void bulkInsertPartialTuplesFinalize( + const tuple_id num_tuples_inserted) { + LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type (" + << getTupleStorageSubBlockType() << ")."; + } + + /** * @brief Get the (untyped) value of an attribute in a tuple in this buffer. * @warning This method may not be supported for all implementations of * TupleStorageSubBlock. supportsUntypedGetAttributeValue() MUST be http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/types/containers/ColumnVectorsValueAccessor.hpp ---------------------------------------------------------------------- diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp index fe413a0..fbbdc1b 100644 --- a/types/containers/ColumnVectorsValueAccessor.hpp +++ b/types/containers/ColumnVectorsValueAccessor.hpp @@ -139,6 +139,10 @@ class ColumnVectorsValueAccessor : public ValueAccessor { return nullptr; } + inline std::size_t getNumColumns() const { + return columns_.size(); + } + template <bool check_null = true> inline const void* getUntypedValue(const attribute_id attr_id) const { return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);