This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b27db958c5bd99b778f739171d0bfe4d66ec9715
Author: Adar Dembo <[email protected]>
AuthorDate: Thu Apr 11 20:37:46 2019 -0700

    rowblock: add copying functionality
    
    This patch adds RowBlock::CopyTo, a function that enables copying of row
    data between RowBlocks. It's a building block for the "whole block copy"
    MergeIterator optimization, wherein part of a (or an entire) sub-iterator
    RowBlock is copied to the client's RowBlock.
    
    Change-Id: I735796f11e3a388ffc66e3d92f8c2097cdec3a91
    Reviewed-on: http://gerrit.cloudera.org:8080/13008
    Reviewed-by: Mike Percy <[email protected]>
    Tested-by: Adar Dembo <[email protected]>
---
 src/kudu/common/CMakeLists.txt      |   1 +
 src/kudu/common/columnblock-test.cc | 110 ++++++++++++++++++++++++++++++++++++
 src/kudu/common/columnblock.cc      |  60 ++++++++++++++++++++
 src/kudu/common/columnblock.h       |  31 ++++++++--
 src/kudu/common/rowblock.cc         |  17 ++++--
 src/kudu/common/rowblock.h          |  51 +++++++++++++++++
 6 files changed, 262 insertions(+), 8 deletions(-)

diff --git a/src/kudu/common/CMakeLists.txt b/src/kudu/common/CMakeLists.txt
index fd8d64f..eb6a783 100644
--- a/src/kudu/common/CMakeLists.txt
+++ b/src/kudu/common/CMakeLists.txt
@@ -40,6 +40,7 @@ ADD_EXPORTABLE_LIBRARY(wire_protocol_proto
   NONLINK_DEPS ${WIRE_PROTOCOL_PROTO_TGTS})
 
 set(COMMON_SRCS
+  columnblock.cc
   column_predicate.cc
   encoded_key.cc
   generic_iterators.cc
diff --git a/src/kudu/common/columnblock-test.cc 
b/src/kudu/common/columnblock-test.cc
index 9ded126..5bed5d4 100644
--- a/src/kudu/common/columnblock-test.cc
+++ b/src/kudu/common/columnblock-test.cc
@@ -17,9 +17,23 @@
 
 #include "kudu/common/columnblock.h"
 
+#include <string>
+
 #include <gtest/gtest.h>
 
 #include "kudu/common/common.pb.h"
+#include "kudu/common/rowblock.h"
+#include "kudu/common/types.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/memory/arena.h"
+#include "kudu/util/test_macros.h"
+
+namespace kudu {
+class Slice;
+} // namespace kudu
+
+using std::string;
+using strings::Substitute;
 
 namespace kudu {
 
@@ -56,4 +70,100 @@ TEST(TestColumnBlock, TestEquals) {
   ASSERT_EQ(scb5, scb6);
 }
 
+TEST(TestColumnBlock, TestCopyTo) {
+  ScopedColumnBlock<UINT32> src(8, /*allow_nulls=*/false);
+  ScopedColumnBlock<UINT32> dst(8, /*allow_nulls=*/false);
+
+  for (int i = 0; i < src.nrows(); i++) {
+    src[i] = i;
+  }
+  for (int i = 0; i < dst.nrows(); i++) {
+    dst[i] = 100;
+  }
+
+  SelectionVector sv(src.nrows());
+  sv.SetAllTrue();
+
+  // src: 0   1   2   3   4   5   6   7
+  // dst: 100 100 100 100 100 100 100 100
+  // ------------------------------------
+  // dst: 100 100 100 100 100 3   4   5
+  ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+  for (int i = 0; i < dst.nrows(); i++) {
+    int expected_val = i < 5 ? 100 : i - 2;
+    ASSERT_EQ(expected_val, dst[i]);
+  }
+}
+
+TEST(TestColumnBlock, TestCopyToIndirectData) {
+  ScopedColumnBlock<STRING> src(8, /*allow_nulls=*/false);
+  ScopedColumnBlock<STRING> dst(8, /*allow_nulls=*/false);
+
+  // Ignore idx 3, and poke a corresponding hole in the selection vector.
+  Slice* next_cell = reinterpret_cast<Slice*>(src.data());
+  for (int i = 0; i < src.nrows(); i++, next_cell++) {
+    if (i == 3) continue;
+    ASSERT_TRUE(src.arena()->RelocateSlice(Substitute("h$0", i), next_cell));
+  }
+  next_cell = reinterpret_cast<Slice*>(dst.data());
+  for (int i = 0; i < dst.nrows(); i++, next_cell++) {
+    ASSERT_TRUE(dst.arena()->RelocateSlice("", next_cell));
+  }
+
+  SelectionVector sv(src.nrows());
+  sv.SetAllTrue();
+  sv.SetRowUnselected(3);
+
+  // src: h0 h1 h2 ?? h4 h5 h6 h7
+  // dst: "" "" "" "" "" "" "" ""
+  // ----------------------------
+  // dst: "" "" "" "" "" "" h4 h5
+  ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+  for (int i = 0; i < dst.nrows(); i++) {
+    string expected_val = i < 6 ? "" : Substitute("h$0", i - 2);
+    ASSERT_EQ(expected_val, dst[i].ToString());
+  }
+}
+
+TEST(TestColumnBlock, TestCopyToNulls) {
+  ScopedColumnBlock<UINT32> src(8);
+  ScopedColumnBlock<UINT32> dst(8);
+
+  // Initialize idx 3 to null in both 'src' and 'dst'.
+  for (int i = 0; i < src.nrows(); i++) {
+    src.SetCellIsNull(i, i == 3);
+    if (i != 3) {
+      src[i] = i;
+    }
+  }
+  for (int i = 0; i < dst.nrows(); i++) {
+    dst.SetCellIsNull(i, i == 3);
+    if (i != 3) {
+      dst[i] = 100;
+    }
+  }
+
+  SelectionVector sv(src.nrows());
+  sv.SetAllTrue();
+
+  // src: 0   1   2   null 4   5    6   7
+  // dst: 100 100 100 null 100 100  100 100
+  // --------------------------------------
+  // dst: 100 100 100 null 100 null 4   5
+  ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+  for (int i = 0; i < dst.nrows(); i++) {
+    SCOPED_TRACE(i);
+    if (i == 3 || i == 5) {
+      ASSERT_TRUE(dst.is_null(i));
+    } else {
+      ASSERT_FALSE(dst.is_null(i));
+      int expected_val = i < 6 ? 100 : i - 2;
+      ASSERT_EQ(expected_val, dst[i]);
+    }
+  }
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/columnblock.cc b/src/kudu/common/columnblock.cc
new file mode 100644
index 0000000..68ce18c
--- /dev/null
+++ b/src/kudu/common/columnblock.cc
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/common/columnblock.h"
+
+#include <cstring>
+
+#include "kudu/common/row.h"
+#include "kudu/common/rowblock.h"
+
+namespace kudu {
+
+Status ColumnBlock::CopyTo(const SelectionVector& sel_vec,
+                           ColumnBlock* dst, size_t src_cell_off,
+                           size_t dst_cell_off, size_t num_cells) const {
+  DCHECK_EQ(type_, dst->type_);
+  DCHECK_EQ(is_nullable(), dst->is_nullable());
+  DCHECK_GE(nrows_, src_cell_off + num_cells);
+  DCHECK_GE(dst->nrows_, dst_cell_off + num_cells);
+
+  // Columns with indirect data need to be copied cell-by-cell in order to
+  // perform arena relocation. Deselected cells must be skipped; the source
+  // content could be garbage so it'd be unsafe to access it as indirect data.
+  if (type_->physical_type() == BINARY) {
+    for (size_t cell_idx = 0; cell_idx < num_cells; cell_idx++) {
+      if (sel_vec.IsRowSelected(src_cell_off + cell_idx)) {
+        Cell s(cell(src_cell_off + cell_idx));
+        Cell d(dst->cell(dst_cell_off + cell_idx));
+        RETURN_NOT_OK(CopyCell(s, &d, dst->arena())); // Also copies 
nullability.
+      }
+    }
+  } else {
+    memcpy(dst->data_ + (dst_cell_off * type_->size()),
+           data_ + (src_cell_off * type_->size()),
+           num_cells * type_->size());
+    if (null_bitmap_) {
+      BitmapCopy(dst->null_bitmap_, dst_cell_off,
+                 null_bitmap_, src_cell_off,
+                 num_cells);
+  }
+}
+
+  return Status::OK();
+}
+
+} // namespace kudu
diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h
index fe23093..45d0d2a 100644
--- a/src/kudu/common/columnblock.h
+++ b/src/kudu/common/columnblock.h
@@ -14,14 +14,21 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-#ifndef KUDU_COMMON_COLUMNBLOCK_H
-#define KUDU_COMMON_COLUMNBLOCK_H
 
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
 #include <string>
 
-#include "kudu/common/row.h"
+#include <glog/logging.h>
+
+#include "kudu/common/common.pb.h"
 #include "kudu/common/types.h"
 #include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/gutil/strings/fastmem.h"
+#include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/util/bitmap.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/memory/overwrite.h"
@@ -30,6 +37,7 @@
 namespace kudu {
 
 class ColumnBlockCell;
+class SelectionVector;
 
 // A block of data all belonging to a single column.
 // This is simply a view into a buffer - it does not have any associated
@@ -121,6 +129,22 @@ class ColumnBlock {
     return s;
   }
 
+  // Copies a range of cells between two ColumnBlocks.
+  //
+  // The extent of the range is designated by 'src_cell_off' and 'num_cells'. 
It
+  // is copied to 'dst' at 'dst_cell_off'.
+  //
+  // Note: The inclusion of 'sel_vec' in this function is an admission that
+  // ColumnBlocks are always used via RowBlocks, and a requirement for safe
+  // handling of types with indirect data (i.e. deselected cells are not
+  // relocated because doing so would be unsafe).
+  //
+  // TODO(adar): for columns with indirect data, existing arena allocations
+  // belonging to cells in 'dst' that are overwritten will NOT be deallocated.
+  Status CopyTo(const SelectionVector& sel_vec,
+                ColumnBlock* dst, size_t src_cell_off,
+                size_t dst_cell_off, size_t num_cells) const;
+
  private:
   friend class ColumnBlockCell;
   friend class ColumnDataView;
@@ -295,4 +319,3 @@ class ScopedColumnBlock : public ColumnBlock {
 };
 
 } // namespace kudu
-#endif
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 32cce4b..8c4a160 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -19,6 +19,7 @@
 #include <glog/logging.h>
 
 #include "kudu/gutil/bits.h"
+#include "kudu/gutil/port.h"
 #include "kudu/util/bitmap.h"
 
 namespace kudu {
@@ -32,6 +33,10 @@ SelectionVector::SelectionVector(size_t row_capacity)
 }
 
 void SelectionVector::Resize(size_t n_rows) {
+  if (PREDICT_FALSE(n_rows == n_rows_)) {
+    return;
+  }
+
   size_t new_bytes = BitmapSize(n_rows);
   CHECK_LE(new_bytes, bytes_capacity_);
   n_rows_ = n_rows;
@@ -143,10 +148,14 @@ RowBlock::~RowBlock() {
   }
 }
 
-void RowBlock::Resize(size_t new_size) {
-  CHECK_LE(new_size, row_capacity_);
-  nrows_ = new_size;
-  sel_vec_.Resize(new_size);
+void RowBlock::Resize(size_t n_rows) {
+  if (PREDICT_FALSE(n_rows == nrows_)) {
+    return;
+  }
+
+  CHECK_LE(n_rows, row_capacity_);
+  nrows_ = n_rows;
+  sel_vec_.Resize(n_rows);
 }
 
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index d2cfc82..aee55a1 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -31,6 +31,7 @@
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/util/bitmap.h"
+#include "kudu/util/status.h"
 
 namespace kudu {
 
@@ -128,6 +129,27 @@ class SelectionVector {
 
   size_t nrows() const { return n_rows_; }
 
+  // Copies a range of bits between two SelectionVectors.
+  //
+  // The extent of the range is designated by 'src_row_off' and 'num_rows'. It
+  // is copied to 'dst' at 'dst_row_off'.
+  //
+  // Note: 'dst' will be resized if the copy causes it to grow (though this is
+  // just a "logical" resize; no reallocation takes place).
+  void CopyTo(SelectionVector* dst, size_t src_row_off,
+              size_t dst_row_off, size_t num_rows) const {
+    DCHECK_GE(n_rows_, src_row_off + num_rows);
+
+    size_t new_num_rows = dst_row_off + num_rows;
+    if (new_num_rows > dst->nrows()) {
+      // This will crash if 'dst' lacks adequate capacity.
+      dst->Resize(new_num_rows);
+    }
+
+    BitmapCopy(dst->mutable_bitmap(), dst_row_off,
+               bitmap_.get(), src_row_off, num_rows);
+  }
+
  private:
   // The number of allocated bytes in bitmap_
   size_t bytes_capacity_;
@@ -277,6 +299,35 @@ class RowBlock {
     return &sel_vec_;
   }
 
+  // Copies a range of rows between two RowBlocks.
+  //
+  // The extent of the range is designated by 'src_row_off' and 'num_rows'. It
+  // is copied to 'dst' at 'dst_row_off'.
+  //
+  // Note: 'dst' will be resized if the copy causes it to grow (though this is
+  // just a "logical" resize; no reallocation takes place).
+  Status CopyTo(RowBlock* dst, size_t src_row_off,
+                size_t dst_row_off, size_t num_rows) const {
+    DCHECK_SCHEMA_EQ(*schema_, *dst->schema());
+    DCHECK_GE(nrows_, src_row_off + num_rows);
+
+    size_t new_num_rows = dst_row_off + num_rows;
+    if (new_num_rows > dst->nrows()) {
+      // This will crash if 'dst' lacks adequate capacity.
+      dst->Resize(new_num_rows);
+    }
+
+    for (size_t col_idx = 0; col_idx < schema_->num_columns(); col_idx++) {
+      ColumnBlock src_cb(column_block(col_idx));
+      ColumnBlock dst_cb(dst->column_block(col_idx));
+      RETURN_NOT_OK(src_cb.CopyTo(sel_vec_, &dst_cb,
+                                  src_row_off, dst_row_off, num_rows));
+    }
+
+    sel_vec_.CopyTo(&dst->sel_vec_, src_row_off, dst_row_off, num_rows);
+    return Status::OK();
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(RowBlock);
 

Reply via email to