This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b27db958c5bd99b778f739171d0bfe4d66ec9715 Author: Adar Dembo <[email protected]> AuthorDate: Thu Apr 11 20:37:46 2019 -0700 rowblock: add copying functionality This patch adds RowBlock::CopyTo, a function that enables copying of row data between RowBlocks. It's a building block for the "whole block copy" MergeIterator optimization, wherein part of a (or an entire) sub-iterator RowBlock is copied to the client's RowBlock. Change-Id: I735796f11e3a388ffc66e3d92f8c2097cdec3a91 Reviewed-on: http://gerrit.cloudera.org:8080/13008 Reviewed-by: Mike Percy <[email protected]> Tested-by: Adar Dembo <[email protected]> --- src/kudu/common/CMakeLists.txt | 1 + src/kudu/common/columnblock-test.cc | 110 ++++++++++++++++++++++++++++++++++++ src/kudu/common/columnblock.cc | 60 ++++++++++++++++++++ src/kudu/common/columnblock.h | 31 ++++++++-- src/kudu/common/rowblock.cc | 17 ++++-- src/kudu/common/rowblock.h | 51 +++++++++++++++++ 6 files changed, 262 insertions(+), 8 deletions(-) diff --git a/src/kudu/common/CMakeLists.txt b/src/kudu/common/CMakeLists.txt index fd8d64f..eb6a783 100644 --- a/src/kudu/common/CMakeLists.txt +++ b/src/kudu/common/CMakeLists.txt @@ -40,6 +40,7 @@ ADD_EXPORTABLE_LIBRARY(wire_protocol_proto NONLINK_DEPS ${WIRE_PROTOCOL_PROTO_TGTS}) set(COMMON_SRCS + columnblock.cc column_predicate.cc encoded_key.cc generic_iterators.cc diff --git a/src/kudu/common/columnblock-test.cc b/src/kudu/common/columnblock-test.cc index 9ded126..5bed5d4 100644 --- a/src/kudu/common/columnblock-test.cc +++ b/src/kudu/common/columnblock-test.cc @@ -17,9 +17,23 @@ #include "kudu/common/columnblock.h" +#include <string> + #include <gtest/gtest.h> #include "kudu/common/common.pb.h" +#include "kudu/common/rowblock.h" +#include "kudu/common/types.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/memory/arena.h" +#include "kudu/util/test_macros.h" + +namespace kudu { +class Slice; +} // namespace kudu + +using std::string; +using strings::Substitute; namespace kudu { @@ -56,4 +70,100 @@ TEST(TestColumnBlock, TestEquals) { ASSERT_EQ(scb5, scb6); } +TEST(TestColumnBlock, TestCopyTo) { + ScopedColumnBlock<UINT32> src(8, /*allow_nulls=*/false); + ScopedColumnBlock<UINT32> dst(8, /*allow_nulls=*/false); + + for (int i = 0; i < src.nrows(); i++) { + src[i] = i; + } + for (int i = 0; i < dst.nrows(); i++) { + dst[i] = 100; + } + + SelectionVector sv(src.nrows()); + sv.SetAllTrue(); + + // src: 0 1 2 3 4 5 6 7 + // dst: 100 100 100 100 100 100 100 100 + // ------------------------------------ + // dst: 100 100 100 100 100 3 4 5 + ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3)); + + for (int i = 0; i < dst.nrows(); i++) { + int expected_val = i < 5 ? 100 : i - 2; + ASSERT_EQ(expected_val, dst[i]); + } +} + +TEST(TestColumnBlock, TestCopyToIndirectData) { + ScopedColumnBlock<STRING> src(8, /*allow_nulls=*/false); + ScopedColumnBlock<STRING> dst(8, /*allow_nulls=*/false); + + // Ignore idx 3, and poke a corresponding hole in the selection vector. + Slice* next_cell = reinterpret_cast<Slice*>(src.data()); + for (int i = 0; i < src.nrows(); i++, next_cell++) { + if (i == 3) continue; + ASSERT_TRUE(src.arena()->RelocateSlice(Substitute("h$0", i), next_cell)); + } + next_cell = reinterpret_cast<Slice*>(dst.data()); + for (int i = 0; i < dst.nrows(); i++, next_cell++) { + ASSERT_TRUE(dst.arena()->RelocateSlice("", next_cell)); + } + + SelectionVector sv(src.nrows()); + sv.SetAllTrue(); + sv.SetRowUnselected(3); + + // src: h0 h1 h2 ?? h4 h5 h6 h7 + // dst: "" "" "" "" "" "" "" "" + // ---------------------------- + // dst: "" "" "" "" "" "" h4 h5 + ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3)); + + for (int i = 0; i < dst.nrows(); i++) { + string expected_val = i < 6 ? "" : Substitute("h$0", i - 2); + ASSERT_EQ(expected_val, dst[i].ToString()); + } +} + +TEST(TestColumnBlock, TestCopyToNulls) { + ScopedColumnBlock<UINT32> src(8); + ScopedColumnBlock<UINT32> dst(8); + + // Initialize idx 3 to null in both 'src' and 'dst'. + for (int i = 0; i < src.nrows(); i++) { + src.SetCellIsNull(i, i == 3); + if (i != 3) { + src[i] = i; + } + } + for (int i = 0; i < dst.nrows(); i++) { + dst.SetCellIsNull(i, i == 3); + if (i != 3) { + dst[i] = 100; + } + } + + SelectionVector sv(src.nrows()); + sv.SetAllTrue(); + + // src: 0 1 2 null 4 5 6 7 + // dst: 100 100 100 null 100 100 100 100 + // -------------------------------------- + // dst: 100 100 100 null 100 null 4 5 + ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3)); + + for (int i = 0; i < dst.nrows(); i++) { + SCOPED_TRACE(i); + if (i == 3 || i == 5) { + ASSERT_TRUE(dst.is_null(i)); + } else { + ASSERT_FALSE(dst.is_null(i)); + int expected_val = i < 6 ? 100 : i - 2; + ASSERT_EQ(expected_val, dst[i]); + } + } +} + } // namespace kudu diff --git a/src/kudu/common/columnblock.cc b/src/kudu/common/columnblock.cc new file mode 100644 index 0000000..68ce18c --- /dev/null +++ b/src/kudu/common/columnblock.cc @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/common/columnblock.h" + +#include <cstring> + +#include "kudu/common/row.h" +#include "kudu/common/rowblock.h" + +namespace kudu { + +Status ColumnBlock::CopyTo(const SelectionVector& sel_vec, + ColumnBlock* dst, size_t src_cell_off, + size_t dst_cell_off, size_t num_cells) const { + DCHECK_EQ(type_, dst->type_); + DCHECK_EQ(is_nullable(), dst->is_nullable()); + DCHECK_GE(nrows_, src_cell_off + num_cells); + DCHECK_GE(dst->nrows_, dst_cell_off + num_cells); + + // Columns with indirect data need to be copied cell-by-cell in order to + // perform arena relocation. Deselected cells must be skipped; the source + // content could be garbage so it'd be unsafe to access it as indirect data. + if (type_->physical_type() == BINARY) { + for (size_t cell_idx = 0; cell_idx < num_cells; cell_idx++) { + if (sel_vec.IsRowSelected(src_cell_off + cell_idx)) { + Cell s(cell(src_cell_off + cell_idx)); + Cell d(dst->cell(dst_cell_off + cell_idx)); + RETURN_NOT_OK(CopyCell(s, &d, dst->arena())); // Also copies nullability. + } + } + } else { + memcpy(dst->data_ + (dst_cell_off * type_->size()), + data_ + (src_cell_off * type_->size()), + num_cells * type_->size()); + if (null_bitmap_) { + BitmapCopy(dst->null_bitmap_, dst_cell_off, + null_bitmap_, src_cell_off, + num_cells); + } +} + + return Status::OK(); +} + +} // namespace kudu diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h index fe23093..45d0d2a 100644 --- a/src/kudu/common/columnblock.h +++ b/src/kudu/common/columnblock.h @@ -14,14 +14,21 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -#ifndef KUDU_COMMON_COLUMNBLOCK_H -#define KUDU_COMMON_COLUMNBLOCK_H +#pragma once + +#include <cstddef> +#include <cstdint> +#include <ostream> #include <string> -#include "kudu/common/row.h" +#include <glog/logging.h> + +#include "kudu/common/common.pb.h" #include "kudu/common/types.h" #include "kudu/gutil/gscoped_ptr.h" +#include "kudu/gutil/strings/fastmem.h" +#include "kudu/gutil/strings/stringpiece.h" #include "kudu/util/bitmap.h" #include "kudu/util/memory/arena.h" #include "kudu/util/memory/overwrite.h" @@ -30,6 +37,7 @@ namespace kudu { class ColumnBlockCell; +class SelectionVector; // A block of data all belonging to a single column. // This is simply a view into a buffer - it does not have any associated @@ -121,6 +129,22 @@ class ColumnBlock { return s; } + // Copies a range of cells between two ColumnBlocks. + // + // The extent of the range is designated by 'src_cell_off' and 'num_cells'. It + // is copied to 'dst' at 'dst_cell_off'. + // + // Note: The inclusion of 'sel_vec' in this function is an admission that + // ColumnBlocks are always used via RowBlocks, and a requirement for safe + // handling of types with indirect data (i.e. deselected cells are not + // relocated because doing so would be unsafe). + // + // TODO(adar): for columns with indirect data, existing arena allocations + // belonging to cells in 'dst' that are overwritten will NOT be deallocated. + Status CopyTo(const SelectionVector& sel_vec, + ColumnBlock* dst, size_t src_cell_off, + size_t dst_cell_off, size_t num_cells) const; + private: friend class ColumnBlockCell; friend class ColumnDataView; @@ -295,4 +319,3 @@ class ScopedColumnBlock : public ColumnBlock { }; } // namespace kudu -#endif diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc index 32cce4b..8c4a160 100644 --- a/src/kudu/common/rowblock.cc +++ b/src/kudu/common/rowblock.cc @@ -19,6 +19,7 @@ #include <glog/logging.h> #include "kudu/gutil/bits.h" +#include "kudu/gutil/port.h" #include "kudu/util/bitmap.h" namespace kudu { @@ -32,6 +33,10 @@ SelectionVector::SelectionVector(size_t row_capacity) } void SelectionVector::Resize(size_t n_rows) { + if (PREDICT_FALSE(n_rows == n_rows_)) { + return; + } + size_t new_bytes = BitmapSize(n_rows); CHECK_LE(new_bytes, bytes_capacity_); n_rows_ = n_rows; @@ -143,10 +148,14 @@ RowBlock::~RowBlock() { } } -void RowBlock::Resize(size_t new_size) { - CHECK_LE(new_size, row_capacity_); - nrows_ = new_size; - sel_vec_.Resize(new_size); +void RowBlock::Resize(size_t n_rows) { + if (PREDICT_FALSE(n_rows == nrows_)) { + return; + } + + CHECK_LE(n_rows, row_capacity_); + nrows_ = n_rows; + sel_vec_.Resize(n_rows); } } // namespace kudu diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h index d2cfc82..aee55a1 100644 --- a/src/kudu/common/rowblock.h +++ b/src/kudu/common/rowblock.h @@ -31,6 +31,7 @@ #include "kudu/gutil/macros.h" #include "kudu/gutil/strings/stringpiece.h" #include "kudu/util/bitmap.h" +#include "kudu/util/status.h" namespace kudu { @@ -128,6 +129,27 @@ class SelectionVector { size_t nrows() const { return n_rows_; } + // Copies a range of bits between two SelectionVectors. + // + // The extent of the range is designated by 'src_row_off' and 'num_rows'. It + // is copied to 'dst' at 'dst_row_off'. + // + // Note: 'dst' will be resized if the copy causes it to grow (though this is + // just a "logical" resize; no reallocation takes place). + void CopyTo(SelectionVector* dst, size_t src_row_off, + size_t dst_row_off, size_t num_rows) const { + DCHECK_GE(n_rows_, src_row_off + num_rows); + + size_t new_num_rows = dst_row_off + num_rows; + if (new_num_rows > dst->nrows()) { + // This will crash if 'dst' lacks adequate capacity. + dst->Resize(new_num_rows); + } + + BitmapCopy(dst->mutable_bitmap(), dst_row_off, + bitmap_.get(), src_row_off, num_rows); + } + private: // The number of allocated bytes in bitmap_ size_t bytes_capacity_; @@ -277,6 +299,35 @@ class RowBlock { return &sel_vec_; } + // Copies a range of rows between two RowBlocks. + // + // The extent of the range is designated by 'src_row_off' and 'num_rows'. It + // is copied to 'dst' at 'dst_row_off'. + // + // Note: 'dst' will be resized if the copy causes it to grow (though this is + // just a "logical" resize; no reallocation takes place). + Status CopyTo(RowBlock* dst, size_t src_row_off, + size_t dst_row_off, size_t num_rows) const { + DCHECK_SCHEMA_EQ(*schema_, *dst->schema()); + DCHECK_GE(nrows_, src_row_off + num_rows); + + size_t new_num_rows = dst_row_off + num_rows; + if (new_num_rows > dst->nrows()) { + // This will crash if 'dst' lacks adequate capacity. + dst->Resize(new_num_rows); + } + + for (size_t col_idx = 0; col_idx < schema_->num_columns(); col_idx++) { + ColumnBlock src_cb(column_block(col_idx)); + ColumnBlock dst_cb(dst->column_block(col_idx)); + RETURN_NOT_OK(src_cb.CopyTo(sel_vec_, &dst_cb, + src_row_off, dst_row_off, num_rows)); + } + + sel_vec_.CopyTo(&dst->sel_vec_, src_row_off, dst_row_off, num_rows); + return Status::OK(); + } + private: DISALLOW_COPY_AND_ASSIGN(RowBlock);
