This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ae2421ae [cfile] 'null bitmap' --> 'non-null bitmap'
9ae2421ae is described below

commit 9ae2421aeb2bf208240dd3802783e87be95db39f
Author: Alexey Serbin <[email protected]>
AuthorDate: Sun Nov 17 14:15:48 2024 -0800

    [cfile] 'null bitmap' --> 'non-null bitmap'
    
    The corresponding bitmaps have always had the semantics that a set bit
    indicated a non-null entry.  Thus naming any of those as a 'null bitmap'
    is quite misleading.  Also, since changelist [1] renamed a subset of
    entities already, now it's even more confusing.
    
    I think it's never too late to add more clarity, so this patch renames
    the rest of the entities in the code and also updates the documentation.
    
    This patch doesn't contain any functional modifications.
    
    This is a follow-up to [1].
    
    [1] https://github.com/apache/kudu/commit/ffef96b26
    
    Change-Id: Ib298b5b6bca786dc69928b960c535f95f177d604
    Reviewed-on: http://gerrit.cloudera.org:8080/22075
    Tested-by: Kudu Jenkins
    Reviewed-by: Yifan Zhang <[email protected]>
---
 docs/design-docs/cfile.md                 | 17 ++++++++++-------
 src/kudu/cfile/cfile_reader.cc            |  6 +++---
 src/kudu/cfile/cfile_util.cc              |  8 ++++++--
 src/kudu/cfile/cfile_writer.cc            |  6 +++---
 src/kudu/cfile/cfile_writer.h             |  4 ++--
 src/kudu/common/columnar_serialization.cc |  4 ++--
 src/kudu/common/columnblock.h             |  4 ++--
 src/kudu/common/row_operations.cc         | 29 ++++++++++++++---------------
 src/kudu/common/row_operations.h          |  4 ++--
 src/kudu/common/rowblock.h                |  4 ++--
 10 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/docs/design-docs/cfile.md b/docs/design-docs/cfile.md
index bc600f622..e2b037028 100644
--- a/docs/design-docs/cfile.md
+++ b/docs/design-docs/cfile.md
@@ -95,20 +95,23 @@ compressed before the checksum is appended.
 
 ### Nullable Data Block Layout
 
-Columns marked as nullable in the schema are stored in a nullable block, which
-includes a bitmap which tracks which rows (ordinal offsets) are null and not
-null.
+A column marked as nullable in the schema is stored in a nullable data block.
+Since null cells aren't stored in the encoded data, it's necessary to preserve
+information on nullity of the cells elsewhere. To do so, such a block includes
+an additional bitmap to track that. Bit positions correspond to ordinal offsets
+of the cells in their rowset. Non-null cells are marked with ones,
+and null cells are marked with zero bits correspondingly.
 
 | field | width (bytes) | notes |
 | --- | --- | --- |
 | value count | variable | [LEB128] encoded count of values |
-| bitmap length | variable | [LEB128] encoded length of the following null 
bitmap |
-| null bitmap | variable | [RLE] encoded bitmap |
+| bitmap length | variable | [LEB128] encoded length of the following non-null 
bitmap |
+| non-null bitmap | variable | [RLE] encoded bitmap |
 | data | variable | encoded non-null data values |
-| checksum | 4 | optional CRC-32 checksum of the value count, bitmap length, 
null bitmap, and data |
+| checksum | 4 | optional CRC-32 checksum of the value count, bitmap length, 
non-null bitmap, and data |
 
 If the CFile is configured to use compression then the value count, bitmap
-length, null bitmap, and encoded data are compressed before the checksum is
+length, non-null bitmap, and encoded data are compressed before the checksum is
 appended.
 
 ### Checksums
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index b2b8e1eef..1ad1ac569 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -653,7 +653,7 @@ Status 
DefaultColumnValueIterator::Scan(ColumnMaterializationContext* ctx) {
   ColumnBlock* dst = ctx->block();
   if (dst->is_nullable()) {
     ColumnDataView dst_view(dst);
-    dst_view.SetNullBits(dst->nrows(), value_ != nullptr);
+    dst_view.SetNonNullBits(dst->nrows(), value_ != nullptr);
   }
   // Cases where the selection vector can be cleared:
   // 1. There is a read_default and it does not satisfy the predicate
@@ -1185,7 +1185,7 @@ Status CFileIterator::Scan(ColumnMaterializationContext* 
ctx) {
         }
 
         // Set the ColumnBlock bitmap
-        remaining_dst.SetNullBits(this_batch, not_null);
+        remaining_dst.SetNonNullBits(this_batch, not_null);
 
         rem -= this_batch;
         count -= this_batch;
@@ -1207,7 +1207,7 @@ Status CFileIterator::Scan(ColumnMaterializationContext* 
ctx) {
 
       // If the column is nullable, set all bits to true
       if (ctx->block()->is_nullable()) {
-        remaining_dst.SetNullBits(this_batch, true);
+        remaining_dst.SetNonNullBits(this_batch, true);
       }
 
       rem -= this_batch;
diff --git a/src/kudu/cfile/cfile_util.cc b/src/kudu/cfile/cfile_util.cc
index 95e131133..fa04f8c33 100644
--- a/src/kudu/cfile/cfile_util.cc
+++ b/src/kudu/cfile/cfile_util.cc
@@ -124,8 +124,12 @@ Status DumpIterator(const CFileReader& reader,
   uint8_t buf[kBufSize];
   const TypeInfo* type = reader.type_info();
   size_t max_rows = kBufSize/type->size();
-  uint8_t nulls[BitmapSize(max_rows)];
-  ColumnBlock cb(type, reader.is_nullable() ? nulls : nullptr, buf, max_rows, 
&mem);
+  uint8_t non_null_bitmap[BitmapSize(max_rows)];
+  ColumnBlock cb(type,
+                 reader.is_nullable() ? non_null_bitmap : nullptr,
+                 buf,
+                 max_rows,
+                 &mem);
   SelectionVector sel(max_rows);
   ColumnMaterializationContext ctx(0, nullptr, &cb, &sel);
   string strbuf;
diff --git a/src/kudu/cfile/cfile_writer.cc b/src/kudu/cfile/cfile_writer.cc
index 8bf2e9011..620bee394 100644
--- a/src/kudu/cfile/cfile_writer.cc
+++ b/src/kudu/cfile/cfile_writer.cc
@@ -82,9 +82,9 @@ const size_t kChecksumSize = sizeof(uint32_t);
 
 static const size_t kMinBlockSize = 512;
 
-class NullBitmapBuilder {
+class NonNullBitmapBuilder {
  public:
-  explicit NullBitmapBuilder(size_t initial_row_capacity)
+  explicit NonNullBitmapBuilder(size_t initial_row_capacity)
       : nitems_(0),
         bitmap_(BitmapSize(initial_row_capacity)),
         rle_encoder_(&bitmap_, 1) {
@@ -218,7 +218,7 @@ Status CFileWriter::Start() {
   if (is_nullable_) {
     size_t nrows = ((options_.storage_attributes.cfile_block_size + 
typeinfo_->size() - 1) /
                     typeinfo_->size());
-    non_null_bitmap_builder_.reset(new NullBitmapBuilder(nrows * 8));
+    non_null_bitmap_builder_.reset(new NonNullBitmapBuilder(nrows * 8));
   }
 
   state_ = kWriterWriting;
diff --git a/src/kudu/cfile/cfile_writer.h b/src/kudu/cfile/cfile_writer.h
index 3c36efb03..f96ec27cf 100644
--- a/src/kudu/cfile/cfile_writer.h
+++ b/src/kudu/cfile/cfile_writer.h
@@ -52,7 +52,7 @@ extern const char kMagicStringV2[];
 extern const int kMagicLength;
 extern const size_t kChecksumSize;
 
-class NullBitmapBuilder;
+class NonNullBitmapBuilder;
 
 // Main class used to write a CFile.
 class CFileWriter final {
@@ -188,7 +188,7 @@ class CFileWriter final {
   std::unique_ptr<BlockBuilder> data_block_;
   std::unique_ptr<IndexTreeBuilder> posidx_builder_;
   std::unique_ptr<IndexTreeBuilder> validx_builder_;
-  std::unique_ptr<NullBitmapBuilder> non_null_bitmap_builder_;
+  std::unique_ptr<NonNullBitmapBuilder> non_null_bitmap_builder_;
   std::unique_ptr<CompressedBlockBuilder> block_compressor_;
 
   enum State {
diff --git a/src/kudu/common/columnar_serialization.cc 
b/src/kudu/common/columnar_serialization.cc
index 0dbb9d10b..3085709c3 100644
--- a/src/kudu/common/columnar_serialization.cc
+++ b/src/kudu/common/columnar_serialization.cc
@@ -481,7 +481,7 @@ void CopySelectedCellsFromColumn(const ColumnBlock& cblock,
   size_t sizeof_type = cblock.type_info()->size();
   int n_sel = sel_rows.num_selected();
 
-  // Number of initial rows in the dst values and null_bitmap.
+  // Number of initial rows in the dst values and non-null bitmap.
   DCHECK_EQ(dst->data.size() % sizeof_type, 0);
   size_t initial_rows = div_sizeof_type(dst->data.size(), sizeof_type);
   size_t new_num_rows = initial_rows + n_sel;
@@ -565,7 +565,7 @@ void CopySelectedVarlenCellsFromColumn(const ColumnBlock& 
cblock,
     dst->data.append(&zero_offset, sizeof(zero_offset));
   }
 
-  // Number of initial rows in the dst values and null_bitmap.
+  // Number of initial rows in the dst values and non-null bitmap.
   DCHECK_EQ(dst->data.size() % sizeof(offset_type), 0);
   size_t initial_offset_count = div_sizeof_type(dst->data.size(), 
sizeof(offset_type));
   size_t initial_rows = initial_offset_count - 1;
diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h
index c3263f340..51881cb11 100644
--- a/src/kudu/common/columnblock.h
+++ b/src/kudu/common/columnblock.h
@@ -254,9 +254,9 @@ class ColumnDataView {
     return row_offset_;
   }
 
-  // Set 'nrows' bits of the the null-bitmap to "value"
+  // Set 'nrows' bits of the non-null bitmap to 'value';
   // true if not null, false if null.
-  void SetNullBits(size_t nrows, bool value) {
+  void SetNonNullBits(size_t nrows, bool value) {
     BitmapChangeBits(column_block_->mutable_non_null_bitmap(), row_offset_, 
nrows, value);
   }
 
diff --git a/src/kudu/common/row_operations.cc 
b/src/kudu/common/row_operations.cc
index b1cf61362..f79c6a02e 100644
--- a/src/kudu/common/row_operations.cc
+++ b/src/kudu/common/row_operations.cc
@@ -248,12 +248,12 @@ Status RowOperationsPBDecoder::ReadIssetBitmap(const 
uint8_t** bitmap) {
   return Status::OK();
 }
 
-Status RowOperationsPBDecoder::ReadNullBitmap(const uint8_t** null_bm) {
+Status RowOperationsPBDecoder::ReadNonNullBitmap(const uint8_t** bitmap) {
   if (PREDICT_FALSE(src_.size() < bm_size_)) {
-    *null_bm = nullptr;
+    *bitmap = nullptr;
     return Status::Corruption("Cannot find null bitmap");
   }
-  *null_bm = src_.data();
+  *bitmap = src_.data();
   src_.remove_prefix(bm_size_);
   return Status::OK();
 }
@@ -423,12 +423,12 @@ Status RowOperationsPBDecoder::DecodeInsertOrUpsert(const 
uint8_t* prototype_row
                                                     DecodedRowOperation* op,
                                                     int64_t* 
auto_incrementing_counter) {
   const uint8_t* client_isset_map = nullptr;
-  const uint8_t* client_null_map = nullptr;
+  const uint8_t* client_non_null_map = nullptr;
 
   // Read the null and isset bitmaps for the client-provided row.
   RETURN_NOT_OK(ReadIssetBitmap(&client_isset_map));
   if (client_schema_->has_nullables()) {
-    RETURN_NOT_OK(ReadNullBitmap(&client_null_map));
+    RETURN_NOT_OK(ReadNonNullBitmap(&client_non_null_map));
   }
 
   // Allocate a row with the tablet's layout.
@@ -471,7 +471,7 @@ Status RowOperationsPBDecoder::DecodeInsertOrUpsert(const 
uint8_t* prototype_row
       // If the client provided a value for this column, copy it.
       // Copy null-ness, if the server side column is nullable.
       const bool client_set_to_null = client_schema_->has_nullables() &&
-          BitmapTest(client_null_map, client_col_idx);
+          BitmapTest(client_non_null_map, client_col_idx);
       if (col.is_nullable()) {
         tablet_row.set_null(tablet_col_idx, client_set_to_null);
       }
@@ -572,12 +572,12 @@ Status RowOperationsPBDecoder::DecodeInsertOrUpsert(const 
uint8_t* prototype_row
 Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const ClientServerMapping& 
mapping,
                                                     DecodedRowOperation* op) {
   const uint8_t* client_isset_map = nullptr;
-  const uint8_t* client_null_map = nullptr;
+  const uint8_t* client_non_null_map = nullptr;
 
   // Read the null and isset bitmaps for the client-provided row.
   RETURN_NOT_OK(ReadIssetBitmap(&client_isset_map));
   if (client_schema_->has_nullables()) {
-    RETURN_NOT_OK(ReadNullBitmap(&client_null_map));
+    RETURN_NOT_OK(ReadNonNullBitmap(&client_non_null_map));
   }
 
   // Allocate space for the row key.
@@ -612,7 +612,7 @@ Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const 
ClientServerMapping& m
     }
 
     bool client_set_to_null = client_schema_->has_nullables() &&
-        BitmapTest(client_null_map, client_col_idx);
+        BitmapTest(client_non_null_map, client_col_idx);
     if (PREDICT_FALSE(client_set_to_null)) {
       op->SetFailureStatusOnce(Status::InvalidArgument("NULL values not 
allowed for key column",
                                                        col.ToString()));
@@ -641,7 +641,7 @@ Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const 
ClientServerMapping& m
 
       if (BitmapTest(client_isset_map, client_col_idx)) {
         bool client_set_to_null = client_schema_->has_nullables() &&
-          BitmapTest(client_null_map, client_col_idx);
+          BitmapTest(client_non_null_map, client_col_idx);
 
         if (col.is_immutable()) {
           if (op->type == RowOperationsPB::UPDATE) {
@@ -703,7 +703,7 @@ Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const 
ClientServerMapping& m
             "DELETE should not have a value for column", col.ToString()));
 
         bool client_set_to_null = client_schema_->has_nullables() &&
-            BitmapTest(client_null_map, client_col_idx);
+            BitmapTest(client_non_null_map, client_col_idx);
         if (!client_set_to_null || !col.is_nullable()) {
           RETURN_NOT_OK(ReadColumnAndDiscard(col));
         }
@@ -723,13 +723,12 @@ Status RowOperationsPBDecoder::DecodeSplitRow(const 
ClientServerMapping& mapping
                                               DecodedRowOperation* op) {
   op->split_row = std::make_shared<KuduPartialRow>(tablet_schema_);
 
-  const uint8_t* client_isset_map;
-  const uint8_t* client_null_map;
-
   // Read the null and isset bitmaps for the client-provided row.
+  const uint8_t* client_isset_map;
   RETURN_NOT_OK(ReadIssetBitmap(&client_isset_map));
   if (client_schema_->has_nullables()) {
-    RETURN_NOT_OK(ReadNullBitmap(&client_null_map));
+    const uint8_t* client_non_null_map;
+    RETURN_NOT_OK(ReadNonNullBitmap(&client_non_null_map));
   }
 
   // Now handle each of the columns passed by the user.
diff --git a/src/kudu/common/row_operations.h b/src/kudu/common/row_operations.h
index f27a4afc6..5c4c6d23f 100644
--- a/src/kudu/common/row_operations.h
+++ b/src/kudu/common/row_operations.h
@@ -128,7 +128,7 @@ class RowOperationsPBDecoder {
  private:
   Status ReadOpType(RowOperationsPB::Type* type);
   Status ReadIssetBitmap(const uint8_t** bitmap);
-  Status ReadNullBitmap(const uint8_t** null_bm);
+  Status ReadNonNullBitmap(const uint8_t** bitmap);
   // Read one row's column data from 'src_', read result is stored in 'slice'.
   // Return bad Status if data is corrupt.
   // NOTE: If 'row_status' is not nullptr, column data validate will be 
performed,
@@ -178,7 +178,7 @@ class RowOperationsPBDecoder {
   const Schema* const tablet_schema_;
   Arena* const dst_arena_;
 
-  const int bm_size_;
+  const size_t bm_size_;
   const size_t tablet_row_size_;
   Slice src_;
 
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index a6a77019d..d1f3f9178 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -340,9 +340,9 @@ class RowBlock {
 
     const ColumnSchema& col_schema = schema_->column(col_idx);
     uint8_t* col_data = columns_data_[col_idx];
-    uint8_t* nulls_bitmap = column_non_null_bitmaps_[col_idx];
+    uint8_t* non_null_bitmap = column_non_null_bitmaps_[col_idx];
 
-    return ColumnBlock(col_schema.type_info(), nulls_bitmap, col_data, nrows, 
memory_);
+    return ColumnBlock(col_schema.type_info(), non_null_bitmap, col_data, 
nrows, memory_);
   }
 
   // Return the base pointer for the given column's data.

Reply via email to