[24/39] incubator-quickstep git commit: Adds support for PartialBulkInserts in StorageBlocks

jianqiao Wed, 14 Dec 2016 19:53:15 -0800

Adds support for PartialBulkInserts in StorageBlocks

- Enables use of PartialBulkInserts in StorageBlocks
- Value accessor changes to allow use of 2 insert destinations
- Enables PartialInserts for SplitRow
- Changes HashJoin operator so that it can take advantage of the
PartialInserts code.
- This also cleans up code from Previous commit.



Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2d11ec58
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2d11ec58
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2d11ec58

Branch: refs/heads/min-max-stats
Commit: 2d11ec588e7e8d7a7a1a8adfc28fb30e5fe0852e
Parents: 172b51b
Author: navsan <nav...@gmail.com>
Authored: Mon Nov 7 14:35:01 2016 -0600
Committer: cramja <marc.spehlm...@gmail.com>
Committed: Mon Nov 21 14:28:02 2016 -0600

----------------------------------------------------------------------
 relational_operators/HashJoinOperator.cpp       | 150 ++++++++++++++++---
 storage/InsertDestination.cpp                   |  84 +++++++++++
 storage/InsertDestination.hpp                   |  16 ++
 storage/InsertDestinationInterface.hpp          |  22 +++
 storage/SplitRowStoreTupleStorageSubBlock.hpp   |   4 +-
 storage/StorageBlock.cpp                        |  24 +++
 storage/StorageBlock.hpp                        |  44 ++++++
 storage/TupleStorageSubBlock.hpp                |  50 +++++++
 types/containers/ColumnVectorsValueAccessor.hpp |   4 +
 9 files changed, 373 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp 
b/relational_operators/HashJoinOperator.cpp
index 4a91f86..2028046 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -65,10 +65,11 @@ namespace {
 
 // Functor passed to HashTable::getAllFromValueAccessor() to collect matching
 // tuples from the inner relation. It stores matching tuple ID pairs
-// in an unordered_map keyed by inner block ID.
-class MapBasedJoinedTupleCollector {
+// in an unordered_map keyed by inner block ID and a vector of
+// pairs of (build-tupleID, probe-tuple-ID).
+class VectorsOfPairsJoinedTuplesCollector {
  public:
-  MapBasedJoinedTupleCollector() {
+  VectorsOfPairsJoinedTuplesCollector() {
   }
 
   template <typename ValueAccessorT>
@@ -95,6 +96,34 @@ class MapBasedJoinedTupleCollector {
   std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> 
joined_tuples_;
 };
 
+// Another collector using an unordered_map keyed on inner block just like 
above,
+// except that it uses of a pair of (build-tupleIDs-vector, 
probe-tuple-IDs-vector).
+class PairsOfVectorsJoinedTuplesCollector {
+ public:
+  PairsOfVectorsJoinedTuplesCollector() {
+  }
+
+  template <typename ValueAccessorT>
+  inline void operator()(const ValueAccessorT &accessor,
+                         const TupleReference &tref) {
+    joined_tuples_[tref.block].first.push_back(tref.tuple);
+    joined_tuples_[tref.block].second.push_back(accessor.getCurrentPosition());
+  }
+
+  // Get a mutable pointer to the collected map of joined tuple ID pairs. The
+  // key is inner block_id, value is a pair consisting of
+  // inner block tuple IDs (first) and outer block tuple IDs (second).
+  inline std::unordered_map< block_id, std::pair<std::vector<tuple_id>, 
std::vector<tuple_id>>>*
+      getJoinedTuples() {
+    return &joined_tuples_;
+  }
+
+ private:
+  std::unordered_map<
+    block_id,
+    std::pair<std::vector<tuple_id>, std::vector<tuple_id>>> joined_tuples_;
+};
+
 class SemiAntiJoinTupleCollector {
  public:
   explicit SemiAntiJoinTupleCollector(TupleIdSequence *filter)
@@ -432,7 +461,7 @@ void HashInnerJoinWorkOrder::execute() {
         
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
   }
 
-  MapBasedJoinedTupleCollector collector;
+  PairsOfVectorsJoinedTuplesCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(
         probe_accessor.get(),
@@ -450,12 +479,14 @@ void HashInnerJoinWorkOrder::execute() {
   const relation_id build_relation_id = build_relation_.getID();
   const relation_id probe_relation_id = probe_relation_.getID();
 
-  for (std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
+  for (std::pair<const block_id, std::pair<std::vector<tuple_id>, 
std::vector<tuple_id>>>
            &build_block_entry : *collector.getJoinedTuples()) {
     BlockReference build_block =
         storage_manager_->getBlock(build_block_entry.first, build_relation_);
     const TupleStorageSubBlock &build_store = 
build_block->getTupleStorageSubBlock();
     std::unique_ptr<ValueAccessor> 
build_accessor(build_store.createValueAccessor());
+    const std::vector<tuple_id> &build_tids = build_block_entry.second.first;
+    const std::vector<tuple_id> &probe_tids = build_block_entry.second.second;
 
     // Evaluate '*residual_predicate_', if any.
     //
@@ -468,17 +499,16 @@ void HashInnerJoinWorkOrder::execute() {
     // hash join is below a reasonable threshold so that we don't blow up
     // temporary memory requirements to an unreasonable degree.
     if (residual_predicate_ != nullptr) {
-      std::vector<std::pair<tuple_id, tuple_id>> filtered_matches;
-
-      for (const std::pair<tuple_id, tuple_id> &hash_match
-           : build_block_entry.second) {
+      std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches;
+      for (std::size_t i = 0; i < build_tids.size(); ++i) {
         if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
                                                         build_relation_id,
-                                                        hash_match.first,
+                                                        build_tids[i],
                                                         *probe_accessor,
                                                         probe_relation_id,
-                                                        hash_match.second)) {
-          filtered_matches.emplace_back(hash_match);
+                                                        probe_tids[i])) {
+          filtered_matches.first.push_back(build_tids[i]);
+          filtered_matches.second.push_back(probe_tids[i]);
         }
       }
 
@@ -501,22 +531,96 @@ void HashInnerJoinWorkOrder::execute() {
     // benefit (probably only a real performance win when there are very few
     // matching tuples in each individual inner block but very many inner
     // blocks with at least one match).
+
+    // We now create ordered value accessors for both build and probe side,
+    // using the joined tuple TIDs. Note that we have to use this Lambda-based
+    // invocation method here because the accessors don't have a virtual
+    // function that creates such an 
OrderedTupleIdSequenceAdapterValueAccessor.
+    std::unique_ptr<ValueAccessor> ordered_build_accessor, 
ordered_probe_accessor;
+    InvokeOnValueAccessorNotAdapter(
+        build_accessor.get(),
+        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+          ordered_build_accessor.reset(
+              accessor->createSharedOrderedTupleIdSequenceAdapter(build_tids));
+        });
+
+    if (probe_accessor->isTupleIdSequenceAdapter()) {
+      InvokeOnTupleIdSequenceAdapterValueAccessor(
+        probe_accessor.get(),
+        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+          ordered_probe_accessor.reset(
+            accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
+        });
+    } else {
+      InvokeOnValueAccessorNotAdapter(
+        probe_accessor.get(),
+        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+          ordered_probe_accessor.reset(
+            accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
+        });
+    }
+
+
+    // We also need a temp value accessor to store results of any scalar 
expressions.
     ColumnVectorsValueAccessor temp_result;
-    for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = 
selection_.begin();
-         selection_cit != selection_.end();
-         ++selection_cit) {
-      
temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
-                                                                  
build_accessor.get(),
-                                                                  
probe_relation_id,
-                                                                  
probe_accessor.get(),
-                                                                  
build_block_entry.second));
+
+    // Create a map of ValueAccessors and what attributes we want to pick from 
them
+    std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> 
accessor_attribute_map;
+    const std::vector<ValueAccessor *> accessors{
+        ordered_build_accessor.get(), ordered_probe_accessor.get(), 
&temp_result};
+    const unsigned int build_index = 0, probe_index = 1, temp_index = 2;
+    for (auto &accessor : accessors) {
+      accessor_attribute_map.push_back(std::make_pair(
+          accessor,
+          std::vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
+    }
+
+    attribute_id dest_attr = 0;
+    std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
+
+    for (auto &selection_cit : selection_) {
+      // If the Scalar (column) is not an attribute in build/probe blocks, then
+      // insert it into a ColumnVectorsValueAccessor.
+      if (selection_cit->getDataSource() != 
Scalar::ScalarDataSource::kAttribute) {
+        // Current destination attribute maps to the column we'll create now.
+        accessor_attribute_map[temp_index].second[dest_attr] = 
temp_result.getNumColumns();
+
+        if (temp_result.getNumColumns() == 0) {
+          // The getAllValuesForJoin function below needs joined tuple IDs as
+          // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
+          // a pair of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So
+          // we'll have to zip our two vectors together. We do this inside
+          // the loop because most queries don't exercise this code since
+          // they don't have scalar expressions with attributes from both
+          // build and probe relations (other expressions would have been
+          // pushed down to before the join).
+          zipped_joined_tuple_ids.reserve(build_tids.size());
+          for (std::size_t i = 0; i < build_tids.size(); ++i) {
+            zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], 
probe_tids[i]));
+          }
+        }
+        temp_result.addColumn(
+            selection_cit
+                ->getAllValuesForJoin(build_relation_id, build_accessor.get(),
+                                      probe_relation_id, probe_accessor.get(),
+                                      zipped_joined_tuple_ids));
+      } else {
+        auto scalar_attr = static_cast<const ScalarAttribute 
*>(selection_cit.get());
+        const attribute_id attr_id = scalar_attr->getAttribute().getID();
+        if (scalar_attr->getAttribute().getParent().getID() == 
build_relation_id) {
+          accessor_attribute_map[build_index].second[dest_attr] = attr_id;
+        } else {
+          accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
+        }
+      }
+      ++dest_attr;
     }
 
     // NOTE(chasseur): calling the bulk-insert method of InsertDestination once
     // for each pair of joined blocks incurs some extra overhead that could be
     // avoided by keeping checked-out MutableBlockReferences across iterations
     // of this loop, but that would get messy when combined with partitioning.
-    output_destination_->bulkInsertTuples(&temp_result);
+    
output_destination_->bulkInsertTuplesFromValueAccessors(accessor_attribute_map);
   }
 }
 
@@ -550,7 +654,7 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
 
   // We collect all the matching probe relation tuples, as there's a residual
   // preidcate that needs to be applied after collecting these matches.
-  MapBasedJoinedTupleCollector collector;
+  VectorsOfPairsJoinedTuplesCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(
         probe_accessor.get(),
@@ -759,7 +863,7 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() {
         
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
   }
 
-  MapBasedJoinedTupleCollector collector;
+  VectorsOfPairsJoinedTuplesCollector collector;
   // We probe the hash table and get all the matches. Unlike
   // executeWithoutResidualPredicate(), we have to collect all the matching
   // tuples, because after this step we still have to evalute the residual

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 5e83453..067edf6 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -247,6 +247,90 @@ void 
InsertDestination::bulkInsertTuplesWithRemappedAttributes(
   });
 }
 
+// A common case that we can optimize away is when the attribute_map
+// for an accessor only contains gaps. e.g. This happens for a join when
+// there are no attributes selected from one side.
+void removeGapOnlyAccessors(
+  const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>>* 
accessor_attribute_map,
+  std::vector<std::pair<ValueAccessor *, const std::vector<attribute_id>>>* 
reduced_accessor_attribute_map) {
+  for (std::size_t i = 0; i < accessor_attribute_map->size(); ++i) {
+    bool all_gaps = true;
+    for (const auto &attr : (*accessor_attribute_map)[i].second)
+      if (attr != kInvalidCatalogId) {
+        all_gaps = false;
+        break;
+      }
+    if (all_gaps)
+      continue;
+    reduced_accessor_attribute_map->push_back((*accessor_attribute_map)[i]);
+    (*accessor_attribute_map)[i].first->beginIterationVirtual();
+  }
+}
+
+void InsertDestination::bulkInsertTuplesFromValueAccessors(
+    const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> 
&accessor_attribute_map,
+    bool always_mark_full) {
+  // Handle pathological corner case where there are no accessors
+  if (accessor_attribute_map.size() == 0)
+    return;
+
+  std::vector<std::pair<ValueAccessor *, const std::vector<attribute_id>>> 
reduced_accessor_attribute_map;
+  removeGapOnlyAccessors(&accessor_attribute_map, 
&reduced_accessor_attribute_map);
+
+  // We assume that all input accessors have the same number of tuples, so
+  // the iterations finish together. Therefore, we can just check the first 
one.
+  auto first_accessor = reduced_accessor_attribute_map[0].first;
+  while (!first_accessor->iterationFinishedVirtual()) {
+    tuple_id num_tuples_to_insert = kCatalogMaxID;
+    tuple_id num_tuples_inserted = 0;
+    MutableBlockReference output_block = this->getBlockForInsertion();
+
+    // Now iterate through all the accessors and do one round of bulk-insertion
+    // of partial tuples into the selected output_block.
+    // While inserting from the first ValueAccessor, space is reserved for
+    // all the columns including those coming from other ValueAccessors.
+    // Thereafter, in a given round, we only insert the remaining columns of 
the
+    // same tuples from the other ValueAccessors.
+    for (auto &p : reduced_accessor_attribute_map) {
+      ValueAccessor *accessor = p.first;
+      std::vector<attribute_id> attribute_map = p.second;
+
+
+      InvokeOnAnyValueAccessor(
+          accessor,
+          [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+            num_tuples_inserted = output_block->bulkInsertPartialTuples(
+                attribute_map, accessor, num_tuples_to_insert);
+      });
+
+      if (accessor == first_accessor) {
+        // Now we know how many full tuples can be inserted into this
+        // output_block (viz. number of tuples inserted from first 
ValueAccessor).
+        // We should only insert that many tuples from the remaining
+        // ValueAccessors as well.
+        num_tuples_to_insert = num_tuples_inserted;
+      } else {
+        // Since the bulk insertion of the first ValueAccessor should already
+        // have reserved the space for all the other ValueAccessors' columns,
+        // we must have been able to insert all the tuples we asked to insert.
+        DCHECK(num_tuples_inserted == num_tuples_to_insert);
+      }
+    }
+
+    // After one round of insertions, we have successfully inserted as many
+    // tuples as possible into the output_block. Strictly speaking, it's
+    // possible that there is more space for insertions because the size
+    // estimation of variable length columns is conservative. But we will 
ignore
+    // that case and proceed assuming that this output_block is full.
+
+    // Update the header for output_block and then return it.
+    output_block->bulkInsertPartialTuplesFinalize(num_tuples_inserted);
+    const bool mark_full = always_mark_full
+                           || !first_accessor->iterationFinishedVirtual();
+    this->returnBlock(std::move(output_block), mark_full);
+  }
+}
+
 void 
InsertDestination::insertTuplesFromVector(std::vector<Tuple>::const_iterator 
begin,
                                                
std::vector<Tuple>::const_iterator end) {
   if (begin == end) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index 408e76b..3487638 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -152,6 +152,10 @@ class InsertDestination : public 
InsertDestinationInterface {
       ValueAccessor *accessor,
       bool always_mark_full = false) override;
 
+  void bulkInsertTuplesFromValueAccessors(
+      const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> 
&accessor_attribute_map,
+      bool always_mark_full = false) override;
+
   void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
                               std::vector<Tuple>::const_iterator end) override;
 
@@ -313,6 +317,12 @@ class AlwaysCreateBlockInsertDestination : public 
InsertDestination {
   ~AlwaysCreateBlockInsertDestination() override {
   }
 
+  void bulkInsertTuplesFromValueAccessors(
+      const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> 
&accessor_attribute_map,
+      bool always_mark_full = false) override  {
+    LOG(FATAL) << "bulkInsertTuplesFromValueAccessors is not implemented for 
AlwaysCreateBlockInsertDestination";
+  }
+
  protected:
   MutableBlockReference getBlockForInsertion() override;
 
@@ -517,6 +527,12 @@ class PartitionAwareInsertDestination : public 
InsertDestination {
       ValueAccessor *accessor,
       bool always_mark_full = false) override;
 
+  void bulkInsertTuplesFromValueAccessors(
+      const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> 
&accessor_attribute_map,
+      bool always_mark_full = false) override  {
+    LOG(FATAL) << "bulkInsertTuplesFromValueAccessors is not implemented for 
PartitionAwareInsertDestination";
+  }
+
   void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
                               std::vector<Tuple>::const_iterator end) override;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestinationInterface.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestinationInterface.hpp 
b/storage/InsertDestinationInterface.hpp
index 423dff1..b62d3e5 100644
--- a/storage/InsertDestinationInterface.hpp
+++ b/storage/InsertDestinationInterface.hpp
@@ -20,6 +20,7 @@
 #ifndef QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_
 #define QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_
 
+#include <utility>
 #include <vector>
 
 #include "catalog/CatalogTypedefs.hpp"
@@ -122,6 +123,27 @@ class InsertDestinationInterface {
       bool always_mark_full = false) = 0;
 
   /**
+   * @brief Bulk-insert tuples from one or more ValueAccessors
+   *        into blocks managed by this InsertDestination.
+   *
+   * @warning It is implicitly assumed that all the input ValueAccessors have
+   *          the same number of tuples in them.
+   *
+   * @param accessor_attribute_map A vector of pairs of ValueAccessor and
+   *        corresponding attribute map
+   *        The i-th attribute ID in the attr map for a value accessor is "n" 
+   *        if the attribute_id "i" in the output relation
+   *        is the attribute_id "n" in corresponding input value accessor.
+   *        Set the i-th element to kInvalidCatalogId if it doesn't come from
+   *        the corresponding value accessor.
+   * @param always_mark_full If \c true, always mark the blocks full after
+   *        insertion from ValueAccessor even when partially full.
+   **/
+  virtual void bulkInsertTuplesFromValueAccessors(
+      const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> 
&accessor_attribute_map,
+      bool always_mark_full = false) = 0;
+
+  /**
    * @brief Insert tuples from a range of Tuples in a vector.
    * @warning Unlike bulkInsertTuples(), this is not well-optimized and not
    *          intended for general use. It should only be used by

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/SplitRowStoreTupleStorageSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreTupleStorageSubBlock.hpp 
b/storage/SplitRowStoreTupleStorageSubBlock.hpp
index 681001e..89c756d 100644
--- a/storage/SplitRowStoreTupleStorageSubBlock.hpp
+++ b/storage/SplitRowStoreTupleStorageSubBlock.hpp
@@ -304,9 +304,9 @@ class SplitRowStoreTupleStorageSubBlock: public 
TupleStorageSubBlock {
   tuple_id bulkInsertPartialTuples(
     const std::vector<attribute_id> &attribute_map,
     ValueAccessor *accessor,
-    const tuple_id max_num_tuples_to_insert);
+    const tuple_id max_num_tuples_to_insert) override;
 
-  void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted);
+  void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted) 
override;
 
   const void* getAttributeValue(const tuple_id tuple,
                                 const attribute_id attr) const override;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index ea74ee6..6267d6b 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -284,6 +284,30 @@ tuple_id 
StorageBlock::bulkInsertTuplesWithRemappedAttributes(
   return num_inserted;
 }
 
+tuple_id StorageBlock::bulkInsertPartialTuples(
+    const std::vector<attribute_id> &attribute_map,
+    ValueAccessor *accessor,
+    const tuple_id max_num_tuples_to_insert) {
+  const tuple_id num_inserted
+      = tuple_store_->bulkInsertPartialTuples(attribute_map,
+                                              accessor,
+                                              max_num_tuples_to_insert);
+  if (num_inserted != 0) {
+    invalidateAllIndexes();
+    dirty_ = true;
+  } else if (tuple_store_->isEmpty()) {
+    if (!accessor->iterationFinishedVirtual()) {
+      throw TupleTooLargeForBlock(0);
+    }
+  }
+  return num_inserted;
+}
+
+void StorageBlock::bulkInsertPartialTuplesFinalize(
+    const tuple_id num_tuples_inserted) {
+  tuple_store_->bulkInsertPartialTuplesFinalize(num_tuples_inserted);
+}
+
 void StorageBlock::sample(const bool is_block_sample,
                           const int percentage,
                           InsertDestinationInterface *destination) const {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index 56b3bdc..ed252c5 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -307,6 +307,7 @@ class StorageBlock : public StorageBlockBase {
    *        iteration will be advanced to the first non-inserted tuple or, if
    *        all accessible tuples were inserted in this block, to the end
    *        position.
+   * @param max_tuples_to_insert Insert at most these many tuples
    * @return The number of tuples inserted from accessor.
    **/
   tuple_id bulkInsertTuplesWithRemappedAttributes(
@@ -314,6 +315,49 @@ class StorageBlock : public StorageBlockBase {
       ValueAccessor *accessor);
 
   /**
+   * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
+   *        as a single batch, using the attribute_map to project and reorder
+   *        columns from the input ValueAccessor. Does not update header.
+   *
+   * @note Typical usage is where you want to bulk-insert columns from two
+   *       or more value accessors. Instead of writing out the columns into
+   *       one or more column vector value accessors, you can simply use this
+   *       function with the appropriate attribute_map for each value
+   *       accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
+   *       handles all the details) to insert tuples without an extra temp 
copy.
+   * 
+   * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
+   *          until which point, the insertion is not visible to others.
+   * @warning The inserted tuples may be placed in sub-optimal locations in 
this
+   *          TupleStorageSubBlock.
+   *
+   * @param attribute_map A vector which maps the attributes of this
+   *        TupleStorageSubBlock's relation (gaps indicated with 
kInvalidCatalogId)
+   *         to the corresponding attributes which should be read from 
accessor.
+   * @param accessor A ValueAccessor to insert tuples from. The accessor's
+   *        iteration will be advanced to the first non-inserted tuple or, if
+   *        all accessible tuples were inserted in this sub-block, to the end
+   *        position.
+   * @return The number of tuples inserted from accessor.
+   **/
+  tuple_id bulkInsertPartialTuples(
+    const std::vector<attribute_id> &attribute_map,
+    ValueAccessor *accessor,
+    const tuple_id max_num_tuples_to_insert);
+
+  /**
+   * @brief Update header after a bulkInsertPartialTuples.
+   *
+   * @warning Only call this after a bulkInsertPartialTuples, passing in the
+   *          number of tuples that were inserted (return value of that 
function).
+   *
+   * @param num_tuples_inserted Number of tuples inserted (i.e., how much to
+   *        advance the header.num_tuples by). Should be equal to the return
+   *        value of bulkInsertPartialTuples.
+   **/
+  void bulkInsertPartialTuplesFinalize(tuple_id num_tuples_inserted);
+
+  /**
    * @brief Get the IDs of tuples in this StorageBlock which match a given 
Predicate.
    *
    * @param predicate The predicate to match.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/TupleStorageSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/TupleStorageSubBlock.hpp b/storage/TupleStorageSubBlock.hpp
index aed6eea..26e8027 100644
--- a/storage/TupleStorageSubBlock.hpp
+++ b/storage/TupleStorageSubBlock.hpp
@@ -272,6 +272,56 @@ class TupleStorageSubBlock {
       ValueAccessor *accessor) = 0;
 
   /**
+   * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
+   *        as a single batch, using the attribute_map to project and reorder
+   *        columns from the input ValueAccessor. Does not update header.
+   *
+   * @note Typical usage is where you want to bulk-insert columns from two
+   *       or more value accessors. Instead of writing out the columns into
+   *       one or more column vector value accessors, you can simply use this
+   *       function with the appropriate attribute_map for each value
+   *       accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
+   *       handles all the details) to insert tuples without an extra temp 
copy.
+   * 
+   * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
+   *          until which point, the insertion is not visible to others.
+   * @warning The inserted tuples may be placed in a suboptimal position in the
+   *          block.
+   *
+   * @param attribute_map A vector which maps the attributes of this
+   *        TupleStorageSubBlock's relation (gaps indicated with 
kInvalidCatalogId)
+   *         to the corresponding attributes which should be read from 
accessor.
+   * @param accessor A ValueAccessor to insert tuples from. The accessor's
+   *        iteration will be advanced to the first non-inserted tuple or, if
+   *        all accessible tuples were inserted in this sub-block, to the end
+   *        position.
+   * @return The number of tuples inserted from accessor.
+   **/
+  virtual tuple_id bulkInsertPartialTuples(
+      const std::vector<attribute_id> &attribute_map,
+      ValueAccessor *accessor,
+      const tuple_id max_num_tuples_to_insert) {
+    LOG(FATAL) << "Partial bulk insert is not supported for this 
TupleStorageBlock type ("
+               << getTupleStorageSubBlockType() << ").";
+  }
+
+  /**
+   * @brief Update header after a bulkInsertPartialTuples.
+   *
+   * @warning Only call this after a bulkInsertPartialTuples, passing in the
+   *          number of tuples that were inserted (return value of that 
function).
+   *
+   * @param num_tuples_inserted Number of tuples inserted (i.e., how much to
+   *        advance the header.num_tuples by). Should be equal to the return
+   *        value of bulkInsertPartialTuples.
+   **/
+  virtual void bulkInsertPartialTuplesFinalize(
+      const tuple_id num_tuples_inserted) {
+    LOG(FATAL) << "Partial bulk insert is not supported for this 
TupleStorageBlock type ("
+               << getTupleStorageSubBlockType() << ").";
+  }
+
+  /**
    * @brief Get the (untyped) value of an attribute in a tuple in this buffer.
    * @warning This method may not be supported for all implementations of
    *          TupleStorageSubBlock. supportsUntypedGetAttributeValue() MUST be

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp 
b/types/containers/ColumnVectorsValueAccessor.hpp
index fe413a0..fbbdc1b 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -139,6 +139,10 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
     return nullptr;
   }
 
+  inline std::size_t getNumColumns() const {
+    return columns_.size();
+  }
+
   template <bool check_null = true>
   inline const void* getUntypedValue(const attribute_id attr_id) const {
     return getUntypedValueAtAbsolutePosition<check_null>(attr_id, 
current_position_);

[24/39] incubator-quickstep git commit: Adds support for PartialBulkInserts in StorageBlocks

Reply via email to