This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 4258f83e2 [common] small ColumnBlock cleanup
4258f83e2 is described below
commit 4258f83e24b2c28136c152e1c29c70db315d2b94
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Aug 28 18:38:13 2024 -0700
[common] small ColumnBlock cleanup
Since I was planning to change the code in ColumnBlock a bit,
I separated non-functional modifications in this separate patch.
This patch fixes const-correctness of a few methods, updates the code
to be style-compliant, and addresses other nits in the related code.
This patch doesn't contain any functional modifications.
Change-Id: I3bc2a58be0ff6791ca6d596173624176a20fcaf0
Reviewed-on: http://gerrit.cloudera.org:8080/21734
Reviewed-by: Abhishek Chennaka <[email protected]>
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Yifan Zhang <[email protected]>
---
src/kudu/common/columnar_serialization-test.cc | 9 +++--
src/kudu/common/columnar_serialization.cc | 23 ++++++------
src/kudu/common/columnar_serialization.h | 4 +--
src/kudu/common/columnblock-test-util.h | 23 ++++++------
src/kudu/common/columnblock.cc | 7 ++--
src/kudu/common/columnblock.h | 49 +++++++++++++++-----------
6 files changed, 61 insertions(+), 54 deletions(-)
diff --git a/src/kudu/common/columnar_serialization-test.cc
b/src/kudu/common/columnar_serialization-test.cc
index 9cff4255c..25ca43052 100644
--- a/src/kudu/common/columnar_serialization-test.cc
+++ b/src/kudu/common/columnar_serialization-test.cc
@@ -20,7 +20,6 @@
#include <cstddef>
#include <cstdint>
#include <ostream>
-#include <string>
#include <utility>
#include <vector>
@@ -94,8 +93,8 @@ TEST_F(ColumnarSerializationTest, TestZeroNullValues) {
internal::ZeroNullValues(
kTypeSize, /* dst_idx= */0,
data.vals.size(),
- reinterpret_cast<uint8_t*>(data.vals.data()),
- data.non_nulls.data());
+ data.non_nulls.data(),
+ reinterpret_cast<uint8_t*>(data.vals.data()));
ASSERT_NO_FATAL_FAILURE(data.VerifyNullsAreZeroed());
}
@@ -110,8 +109,8 @@ TEST_F(ColumnarSerializationTest,
TestZeroNullValuesWithOffset) {
auto n = rng_.Uniform(rem) + 1;
internal::ZeroNullValues(
kTypeSize, dst_idx, n,
- reinterpret_cast<uint8_t*>(data.vals.data()),
- data.non_nulls.data());
+ data.non_nulls.data(),
+ reinterpret_cast<uint8_t*>(data.vals.data()));
dst_idx += n;
}
ASSERT_NO_FATAL_FAILURE(data.VerifyNullsAreZeroed());
diff --git a/src/kudu/common/columnar_serialization.cc
b/src/kudu/common/columnar_serialization.cc
index 3685e31de..0dbb9d10b 100644
--- a/src/kudu/common/columnar_serialization.cc
+++ b/src/kudu/common/columnar_serialization.cc
@@ -71,7 +71,7 @@ struct BitWriter {
}
~BitWriter() {
- CHECK(flushed_) << "must flush";
+ DCHECK(flushed_) << "must flush";
}
void Put(uint64_t v, int num_bits) {
@@ -92,7 +92,7 @@ struct BitWriter {
}
void Flush() {
- CHECK(!flushed_) << "must only flush once";
+ DCHECK(!flushed_) << "must only flush once";
while (num_buffered_bits_ > 0) {
*dst_++ = buffered_values_ & 0xff;
buffered_values_ >>= 8;
@@ -126,8 +126,8 @@ template<int sizeof_type>
ATTRIBUTE_NOINLINE
void ZeroNullValuesImpl(int dst_idx,
int n_rows,
- uint8_t* __restrict__ dst_values_buf,
- uint8_t* __restrict__ non_null_bitmap) {
+ const uint8_t* __restrict__ non_null_bitmap,
+ uint8_t* __restrict__ dst_values_buf) {
int aligned_dst_idx = KUDU_ALIGN_DOWN(dst_idx, 8);
int aligned_n_sel = n_rows + (dst_idx - aligned_dst_idx);
@@ -156,14 +156,14 @@ void ZeroNullValuesImpl(int dst_idx,
void ZeroNullValues(int sizeof_type,
int dst_idx,
int n_rows,
- uint8_t* dst_values_buf,
- uint8_t* dst_non_null_bitmap) {
+ const uint8_t* dst_non_null_bitmap,
+ uint8_t* dst_values_buf) {
// Delegate to specialized implementations for each type size.
// This changes variable-length memsets into inlinable single instructions.
switch (sizeof_type) {
#define CASE(size) \
case size: \
- ZeroNullValuesImpl<size>(dst_idx, n_rows, dst_values_buf,
dst_non_null_bitmap); \
+ ZeroNullValuesImpl<size>(dst_idx, n_rows, dst_non_null_bitmap,
dst_values_buf); \
break;
CASE(1);
CASE(2);
@@ -504,7 +504,7 @@ void CopySelectedCellsFromColumn(const ColumnBlock& cblock,
initial_rows, cblock.nrows(),
dst->non_null_bitmap->data());
ZeroNullValues(sizeof_type, initial_rows, n_sel,
- dst->data.data(), dst->non_null_bitmap->data());
+ dst->non_null_bitmap->data(), dst->data.data());
}
}
@@ -560,7 +560,7 @@ void CopySelectedVarlenCellsFromColumn(const ColumnBlock&
cblock,
// If this is the first call, append a '0' entry for the offset of the first
string.
if (dst->data.size() == 0) {
- CHECK_EQ(dst->varlen_data->size(), 0);
+ DCHECK_EQ(dst->varlen_data->size(), 0);
offset_type zero_offset = 0;
dst->data.append(&zero_offset, sizeof(zero_offset));
}
@@ -580,7 +580,8 @@ void CopySelectedVarlenCellsFromColumn(const ColumnBlock&
cblock,
initial_rows, cblock.nrows(),
dst->non_null_bitmap->data());
ZeroNullValues(sizeof(Slice), 0, cblock.nrows(),
- const_cast<ColumnBlock&>(cblock).data(),
cblock.non_null_bitmap());
+ cblock.non_null_bitmap(),
+ const_cast<ColumnBlock&>(cblock).data());
}
dst->data.resize_with_extra_capacity(sizeof(offset_type) * new_offset_count);
offset_type* dst_offset = reinterpret_cast<offset_type*>(dst->data.data()) +
initial_offset_count;
@@ -603,7 +604,7 @@ ColumnarSerializedBatch::ColumnarSerializedBatch(const
Schema& rowblock_schema,
auto& col = columns_.back();
col.rowblock_schema_col_idx =
rowblock_schema.find_column(schema_col.name());
- CHECK_NE(col.rowblock_schema_col_idx, -1);
+ DCHECK_NE(col.rowblock_schema_col_idx, Schema::kColumnNotFound);
// Size the initial buffer based on the percentage of the total row that
this column
// takes up. This isn't fully accurate because of costs like the null
bitmap or varlen
diff --git a/src/kudu/common/columnar_serialization.h
b/src/kudu/common/columnar_serialization.h
index e08807355..e372bc53f 100644
--- a/src/kudu/common/columnar_serialization.h
+++ b/src/kudu/common/columnar_serialization.h
@@ -87,8 +87,8 @@ namespace internal {
void ZeroNullValues(int sizeof_type,
int dst_idx,
int n_rows,
- uint8_t* dst_values_buf,
- uint8_t* non_null_bitmap);
+ const uint8_t* non_null_bitmap,
+ uint8_t* dst_values_buf);
void CopyNonNullBitmap(const uint8_t* non_null_bitmap,
const uint8_t* sel_bitmap,
diff --git a/src/kudu/common/columnblock-test-util.h
b/src/kudu/common/columnblock-test-util.h
index db3764b24..bd8e7a104 100644
--- a/src/kudu/common/columnblock-test-util.h
+++ b/src/kudu/common/columnblock-test-util.h
@@ -40,28 +40,27 @@ class ScopedColumnBlock : public ColumnBlock {
new cpp_type[n_rows],
n_rows,
new RowBlockMemory()),
- non_null_bitmap_(non_null_bitmap()),
- data_(reinterpret_cast<cpp_type *>(data())),
- memory_(memory()) {
+ non_null_bitmap_buf_(non_null_bitmap_),
+ data_buf_(reinterpret_cast<cpp_type*>(data_)),
+ memory_buf_(memory_) {
if (allow_nulls) {
// All rows begin null.
- BitmapChangeBits(non_null_bitmap(), /*offset=*/ 0, n_rows, /*value=*/
false);
+ BitmapChangeBits(non_null_bitmap_, /*offset=*/ 0, n_rows, /*value=*/
false);
}
}
- const cpp_type &operator[](size_t idx) const {
- return data_[idx];
+ const cpp_type& operator[](size_t idx) const {
+ return data_buf_[idx];
}
- cpp_type &operator[](size_t idx) {
- return data_[idx];
+ cpp_type& operator[](size_t idx) {
+ return data_buf_[idx];
}
private:
- std::unique_ptr<uint8_t[]> non_null_bitmap_;
- std::unique_ptr<cpp_type[]> data_;
- std::unique_ptr<RowBlockMemory> memory_;
-
+ std::unique_ptr<uint8_t[]> non_null_bitmap_buf_;
+ std::unique_ptr<cpp_type[]> data_buf_;
+ std::unique_ptr<RowBlockMemory> memory_buf_;
};
} // namespace kudu
diff --git a/src/kudu/common/columnblock.cc b/src/kudu/common/columnblock.cc
index 2f5bf8dbb..83a62265a 100644
--- a/src/kudu/common/columnblock.cc
+++ b/src/kudu/common/columnblock.cc
@@ -22,13 +22,14 @@
#include "kudu/common/common.pb.h"
#include "kudu/common/row.h"
#include "kudu/common/rowblock.h"
-#include "kudu/util/memory/arena.h"
namespace kudu {
Status ColumnBlock::CopyTo(const SelectionVector& sel_vec,
- ColumnBlock* dst, size_t src_cell_off,
- size_t dst_cell_off, size_t num_cells) const {
+ ColumnBlock* dst,
+ size_t src_cell_off,
+ size_t dst_cell_off,
+ size_t num_cells) const {
DCHECK_EQ(type_, dst->type_);
DCHECK_EQ(is_nullable(), dst->is_nullable());
DCHECK_GE(nrows_, src_cell_off + num_cells);
diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h
index a5c046158..4b030d0f4 100644
--- a/src/kudu/common/columnblock.h
+++ b/src/kudu/common/columnblock.h
@@ -63,26 +63,26 @@ class ColumnBlock {
BitmapChange(non_null_bitmap_, idx, !is_null);
}
- void SetCellValue(size_t idx, const void *new_val) {
+ void SetCellValue(size_t idx, const void* new_val) {
strings::memcpy_inlined(mutable_cell_ptr(idx), new_val, type_->size());
}
#ifndef NDEBUG
void OverwriteWithPattern(size_t idx, StringPiece pattern) {
- char *col_data = reinterpret_cast<char *>(mutable_cell_ptr(idx));
+ char* col_data = reinterpret_cast<char*>(mutable_cell_ptr(idx));
kudu::OverwriteWithPattern(col_data, type_->size(), pattern);
}
#endif
// Return a pointer to the given cell.
- const uint8_t *cell_ptr(size_t idx) const {
+ const uint8_t* cell_ptr(size_t idx) const {
DCHECK_LT(idx, nrows_);
return data_ + type_->size() * idx;
}
// Returns a pointer to the given cell or NULL.
- const uint8_t *nullable_cell_ptr(size_t idx) const {
- return is_null(idx) ? NULL : cell_ptr(idx);
+ const uint8_t* nullable_cell_ptr(size_t idx) const {
+ return is_null(idx) ? nullptr : cell_ptr(idx);
}
Cell cell(size_t idx) const;
@@ -91,7 +91,11 @@ class ColumnBlock {
// A set bit indicates non-NULL.
//
// This returns nullptr for a non-nullable column.
- uint8_t *non_null_bitmap() const {
+ const uint8_t* non_null_bitmap() const {
+ return non_null_bitmap_;
+ }
+
+ uint8_t* mutable_non_null_bitmap() {
return non_null_bitmap_;
}
@@ -145,24 +149,26 @@ class ColumnBlock {
// TODO(adar): for columns with indirect data, existing arena allocations
// belonging to cells in 'dst' that are overwritten will NOT be deallocated.
Status CopyTo(const SelectionVector& sel_vec,
- ColumnBlock* dst, size_t src_cell_off,
- size_t dst_cell_off, size_t num_cells) const;
+ ColumnBlock* dst,
+ size_t src_cell_off,
+ size_t dst_cell_off,
+ size_t num_cells) const;
- private:
+ protected:
friend class ColumnBlockCell;
friend class ColumnDataView;
// Return a pointer to the given cell.
- uint8_t *mutable_cell_ptr(size_t idx) {
+ uint8_t* mutable_cell_ptr(size_t idx) {
DCHECK_LT(idx, nrows_);
return data_ + type_->size() * idx;
}
- const TypeInfo *type_;
- uint8_t *non_null_bitmap_;
+ const TypeInfo* const type_;
+ uint8_t* non_null_bitmap_;
- uint8_t *data_;
- size_t nrows_;
+ uint8_t* data_;
+ const size_t nrows_;
RowBlockMemory* memory_;
};
@@ -231,8 +237,9 @@ inline ColumnBlockCell ColumnBlock::cell(size_t idx) const {
// Used by the reader and block encoders to read/write raw data.
class ColumnDataView {
public:
- explicit ColumnDataView(ColumnBlock *column_block, size_t first_row_idx = 0)
- : column_block_(column_block), row_offset_(0) {
+ explicit ColumnDataView(ColumnBlock* column_block, size_t first_row_idx = 0)
+ : column_block_(column_block),
+ row_offset_(0) {
Advance(first_row_idx);
}
@@ -250,14 +257,14 @@ class ColumnDataView {
// Set 'nrows' bits of the the null-bitmap to "value"
// true if not null, false if null.
void SetNullBits(size_t nrows, bool value) {
- BitmapChangeBits(column_block_->non_null_bitmap(), row_offset_, nrows,
value);
+ BitmapChangeBits(column_block_->mutable_non_null_bitmap(), row_offset_,
nrows, value);
}
- uint8_t *data() {
+ uint8_t* data() {
return column_block_->mutable_cell_ptr(row_offset_);
}
- const uint8_t *data() const {
+ const uint8_t* data() const {
return column_block_->cell_ptr(row_offset_);
}
@@ -269,7 +276,7 @@ class ColumnDataView {
return column_block_->nrows() - row_offset_;
}
- const size_t stride() const {
+ size_t stride() const {
return column_block_->stride();
}
@@ -278,7 +285,7 @@ class ColumnDataView {
}
private:
- ColumnBlock *column_block_;
+ ColumnBlock* column_block_;
size_t row_offset_;
};