This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit eedce87cafbf9ecf17a213060595a39fcc79efba
Author: Alexey Serbin <[email protected]>
AuthorDate: Thu Jun 20 15:19:33 2024 -0700

    [cfile] allocate CFileWriter field on the stack when possible
    
    This patch updates BloomFileWriter and DeltaFileWriter classes to avoid
    allocating their CFileWriter member field on the heap, and also contains
    other minor clean-up.  It makes the code cleaner and results in less
    calls to new/tcmalloc.
    
    This patch doesn't contain any functional modifications.
    
    Change-Id: I910c8dfac88cb1018d76ead20034fdae355a1d4c
    Reviewed-on: http://gerrit.cloudera.org:8080/21543
    Reviewed-by: Yingchun Lai <[email protected]>
    Tested-by: Marton Greber <[email protected]>
    Reviewed-by: Marton Greber <[email protected]>
---
 src/kudu/cfile/bloomfile.cc    | 36 ++++++++++----------
 src/kudu/cfile/bloomfile.h     |  2 +-
 src/kudu/cfile/cfile_util.cc   | 77 ++++++++++++++++++++++++++++++++++++++----
 src/kudu/cfile/cfile_util.h    | 25 ++++++++++++++
 src/kudu/cfile/cfile_writer.cc |  4 +--
 src/kudu/cfile/cfile_writer.h  | 10 +++---
 src/kudu/common/schema.h       | 16 +++++----
 src/kudu/tablet/deltafile.cc   | 73 +++++++++++++++++++--------------------
 src/kudu/tablet/deltafile.h    |  6 ++--
 src/kudu/util/bloom_filter.h   |  4 +--
 10 files changed, 172 insertions(+), 81 deletions(-)

diff --git a/src/kudu/cfile/bloomfile.cc b/src/kudu/cfile/bloomfile.cc
index 97f795e69..42bd3dec4 100644
--- a/src/kudu/cfile/bloomfile.cc
+++ b/src/kudu/cfile/bloomfile.cc
@@ -33,7 +33,6 @@
 #include "kudu/cfile/cfile_writer.h"
 #include "kudu/cfile/index_btree.h"
 #include "kudu/common/common.pb.h"
-#include "kudu/common/schema.h"
 #include "kudu/common/types.h"
 #include "kudu/fs/block_manager.h"
 #include "kudu/gutil/atomicops.h"
@@ -98,28 +97,27 @@ using BloomCacheTLC = ThreadLocalCache<uint64_t, 
BloomCacheItem>;
 // Writer
 ////////////////////////////////////////////////////////////
 
+// For bloom filters' storage, never use compression regardless of the default
+// settings: bloom filters are high-entropy data structures by their nature.
 BloomFileWriter::BloomFileWriter(unique_ptr<WritableBlock> block,
-                                 const BloomFilterSizing &sizing)
-  : bloom_builder_(sizing) {
-  cfile::WriterOptions opts;
-  opts.write_posidx = false;
-  opts.write_validx = true;
-  // Never use compression, regardless of the default settings, since
-  // bloom filters are high-entropy data structures by their nature.
-  opts.storage_attributes.encoding  = PLAIN_ENCODING;
-  opts.storage_attributes.compression = NO_COMPRESSION;
-  writer_.reset(new cfile::CFileWriter(std::move(opts),
-                                       GetTypeInfo(BINARY),
-                                       false,
-                                       std::move(block)));
+                                 const BloomFilterSizing& sizing)
+    : writer_(WriterOptionsBuilder()
+                  .write_posidx(false)
+                  .write_validx(true)
+                  .storage_attributes({ PLAIN_ENCODING, NO_COMPRESSION })
+                  .Build(),
+              GetTypeInfo(BINARY),
+              false,
+              std::move(block)),
+      bloom_builder_(sizing) {
 }
 
 Status BloomFileWriter::Start() {
-  return writer_->Start();
+  return writer_.Start();
 }
 
 Status BloomFileWriter::Finish() {
-  BlockManager* bm = writer_->block()->block_manager();
+  BlockManager* bm = writer_.block()->block_manager();
   unique_ptr<BlockCreationTransaction> transaction = 
bm->NewCreationTransaction();
   RETURN_NOT_OK(FinishAndReleaseBlock(transaction.get()));
   return transaction->CommitCreatedBlocks();
@@ -129,11 +127,11 @@ Status 
BloomFileWriter::FinishAndReleaseBlock(BlockCreationTransaction* transact
   if (bloom_builder_.count() > 0) {
     RETURN_NOT_OK(FinishCurrentBloomBlock());
   }
-  return writer_->FinishAndReleaseBlock(transaction);
+  return writer_.FinishAndReleaseBlock(transaction);
 }
 
 size_t BloomFileWriter::written_size() const {
-  return writer_->written_size();
+  return writer_.written_size();
 }
 
 Status BloomFileWriter::AppendKeys(
@@ -182,7 +180,7 @@ Status BloomFileWriter::FinishCurrentBloomBlock() {
   // Append to the file.
   Slice start_key(first_key_);
   Slice last_key(last_key_);
-  RETURN_NOT_OK(writer_->AppendRawBlock(
+  RETURN_NOT_OK(writer_.AppendRawBlock(
       std::move(slices), 0, &start_key, last_key, "bloom block"));
 
   bloom_builder_.Clear();
diff --git a/src/kudu/cfile/bloomfile.h b/src/kudu/cfile/bloomfile.h
index 63efdfd3f..366a9f6e9 100644
--- a/src/kudu/cfile/bloomfile.h
+++ b/src/kudu/cfile/bloomfile.h
@@ -68,7 +68,7 @@ class BloomFileWriter {
 
   Status FinishCurrentBloomBlock();
 
-  std::unique_ptr<cfile::CFileWriter> writer_;
+  cfile::CFileWriter writer_;
 
   BloomFilterBuilder bloom_builder_;
 
diff --git a/src/kudu/cfile/cfile_util.cc b/src/kudu/cfile/cfile_util.cc
index f654b776b..78ee67c25 100644
--- a/src/kudu/cfile/cfile_util.cc
+++ b/src/kudu/cfile/cfile_util.cc
@@ -41,13 +41,78 @@ using std::string;
 
 static const int kBufSize = 1024*1024;
 
+namespace {
+constexpr const size_t kIndexBlockSize = 32 * 1024;
+constexpr const int kBlockRestartInterval = 16;
+constexpr const bool kWritePosIdxEnabled = false;
+constexpr const bool kWriteValIdxEnabled = false;
+constexpr const bool kOptimizeIndexKeysEnabled = true;
+} // anonymous namespace
+
 WriterOptions::WriterOptions()
-  : index_block_size(32*1024),
-    block_restart_interval(16),
-    write_posidx(false),
-    write_validx(false),
-    optimize_index_keys(true),
-    validx_key_encoder(std::nullopt) {
+    : index_block_size(kIndexBlockSize),
+      block_restart_interval(kBlockRestartInterval),
+      write_posidx(kWritePosIdxEnabled),
+      write_validx(kWriteValIdxEnabled),
+      optimize_index_keys(kOptimizeIndexKeysEnabled),
+      validx_key_encoder(std::nullopt) {
+}
+
+WriterOptionsBuilder::WriterOptionsBuilder() noexcept
+    : index_block_size_(kIndexBlockSize),
+      block_restart_interval_(kBlockRestartInterval),
+      write_posidx_(kWritePosIdxEnabled),
+      write_validx_(kWriteValIdxEnabled),
+      optimize_index_keys_(kOptimizeIndexKeysEnabled),
+      validx_key_encoder_(std::nullopt) {
+}
+
+WriterOptionsBuilder& WriterOptionsBuilder::index_block_size(size_t size) 
noexcept {
+  index_block_size_ = size;
+  return *this;
+}
+
+WriterOptionsBuilder& WriterOptionsBuilder::block_restart_interval(int 
interval) noexcept {
+  block_restart_interval_ = interval;
+  return *this;
+}
+
+WriterOptionsBuilder& WriterOptionsBuilder::write_posidx(bool is_enabled) 
noexcept {
+  write_posidx_ = is_enabled;
+  return *this;
+}
+
+WriterOptionsBuilder& WriterOptionsBuilder::write_validx(bool is_enabled) 
noexcept {
+  write_validx_ = is_enabled;
+  return *this;
+}
+
+WriterOptionsBuilder& WriterOptionsBuilder::optimize_index_keys(bool 
is_enabled) noexcept {
+  optimize_index_keys_ = is_enabled;
+  return *this;
+}
+
+WriterOptionsBuilder& WriterOptionsBuilder::storage_attributes(
+    const ColumnStorageAttributes& attrs) noexcept {
+  storage_attributes_ = attrs;
+  return *this;
+}
+
+WriterOptionsBuilder& 
WriterOptionsBuilder::validx_key_encoder(ValidxKeyEncoder enc) noexcept {
+  validx_key_encoder_.emplace(std::move(enc));
+  return *this;
+}
+
+WriterOptions WriterOptionsBuilder::Build() noexcept {
+  WriterOptions opt;
+  opt.index_block_size = index_block_size_;
+  opt.block_restart_interval = block_restart_interval_;
+  opt.write_posidx = write_posidx_;
+  opt.write_validx = write_validx_;
+  opt.optimize_index_keys = optimize_index_keys_;
+  opt.storage_attributes = storage_attributes_;
+  opt.validx_key_encoder = validx_key_encoder_;
+  return opt;
 }
 
 Status DumpIterator(const CFileReader& reader,
diff --git a/src/kudu/cfile/cfile_util.h b/src/kudu/cfile/cfile_util.h
index 67c4ee0d7..74b6e8f7b 100644
--- a/src/kudu/cfile/cfile_util.h
+++ b/src/kudu/cfile/cfile_util.h
@@ -90,6 +90,31 @@ struct WriterOptions {
   WriterOptions();
 };
 
+// Builder API to construct WriterOptions objects with proper settings.
+class WriterOptionsBuilder final {
+ public:
+  WriterOptionsBuilder() noexcept;
+
+  WriterOptionsBuilder& index_block_size(size_t size) noexcept;
+  WriterOptionsBuilder& block_restart_interval(int interval) noexcept;
+  WriterOptionsBuilder& write_posidx(bool is_enabled) noexcept;
+  WriterOptionsBuilder& write_validx(bool is_enabled) noexcept;
+  WriterOptionsBuilder& optimize_index_keys(bool is_enabled) noexcept;
+  WriterOptionsBuilder& storage_attributes(const ColumnStorageAttributes& 
attrs) noexcept;
+  WriterOptionsBuilder& validx_key_encoder(ValidxKeyEncoder enc) noexcept;
+
+  WriterOptions Build() noexcept;
+
+ private:
+  size_t index_block_size_;
+  int block_restart_interval_;
+  bool write_posidx_;
+  bool write_validx_;
+  bool optimize_index_keys_;
+  ColumnStorageAttributes storage_attributes_;
+  std::optional<ValidxKeyEncoder> validx_key_encoder_;
+};
+
 struct ReaderOptions {
   ReaderOptions();
 
diff --git a/src/kudu/cfile/cfile_writer.cc b/src/kudu/cfile/cfile_writer.cc
index 7a7b448de..03c8d7b06 100644
--- a/src/kudu/cfile/cfile_writer.cc
+++ b/src/kudu/cfile/cfile_writer.cc
@@ -90,10 +90,10 @@ CFileWriter::CFileWriter(WriterOptions options,
                          const TypeInfo* typeinfo,
                          bool is_nullable,
                          unique_ptr<WritableBlock> block)
-  : block_(std::move(block)),
+  : options_(std::move(options)),
+    block_(std::move(block)),
     off_(0),
     value_count_(0),
-    options_(std::move(options)),
     is_nullable_(is_nullable),
     typeinfo_(typeinfo),
     state_(kWriterInitialized) {
diff --git a/src/kudu/cfile/cfile_writer.h b/src/kudu/cfile/cfile_writer.h
index 14b938f83..f9391a297 100644
--- a/src/kudu/cfile/cfile_writer.h
+++ b/src/kudu/cfile/cfile_writer.h
@@ -108,7 +108,7 @@ class CFileWriter {
   // it to 'transaction'.
   Status FinishAndReleaseBlock(fs::BlockCreationTransaction* transaction);
 
-  bool finished() {
+  bool finished() const noexcept {
     return state_ == kWriterFinished;
   }
 
@@ -176,8 +176,6 @@ class CFileWriter {
   }
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(CFileWriter);
-
   friend class IndexTreeBuilder;
 
   // Append the given block into the file.
@@ -195,6 +193,8 @@ class CFileWriter {
   // field, clearing the buffer.
   void 
FlushMetadataToPB(google::protobuf::RepeatedPtrField<FileMetadataPairPB> 
*field);
 
+  WriterOptions options_;
+
   // Block being written.
   std::unique_ptr<fs::WritableBlock> block_;
 
@@ -204,8 +204,6 @@ class CFileWriter {
   // Current number of values that have been appended.
   rowid_t value_count_;
 
-  WriterOptions options_;
-
   // Type of data being written
   bool is_nullable_;
   CompressionType compression_;
@@ -234,6 +232,8 @@ class CFileWriter {
     kWriterFinished
   };
   State state_;
+
+  DISALLOW_COPY_AND_ASSIGN(CFileWriter);
 };
 
 
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index fae14cfcf..58251821d 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -145,15 +145,17 @@ struct ColumnTypeAttributes {
 struct ColumnStorageAttributes {
  public:
   ColumnStorageAttributes()
-    : encoding(AUTO_ENCODING),
-      compression(DEFAULT_COMPRESSION),
-      cfile_block_size(0) {
+      : encoding(AUTO_ENCODING),
+        compression(DEFAULT_COMPRESSION),
+        cfile_block_size(0) {
   }
 
-  ColumnStorageAttributes(EncodingType enc, CompressionType cmp)
-    : encoding(enc),
-      compression(cmp),
-      cfile_block_size(0) {
+  ColumnStorageAttributes(EncodingType enc,
+                          CompressionType cmp,
+                          int32_t block_size = 0)
+      : encoding(enc),
+        compression(cmp),
+        cfile_block_size(block_size) {
   }
 
   std::string ToString() const;
diff --git a/src/kudu/tablet/deltafile.cc b/src/kudu/tablet/deltafile.cc
index fbc91839d..a75f9047e 100644
--- a/src/kudu/tablet/deltafile.cc
+++ b/src/kudu/tablet/deltafile.cc
@@ -17,7 +17,7 @@
 
 #include "kudu/tablet/deltafile.h"
 
-#include <functional>
+// IWYU pragma: no_include <functional>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -88,60 +88,61 @@ namespace tablet {
 
 const char* const DeltaFileReader::kDeltaStatsEntryName = "deltafilestats";
 
-DeltaFileWriter::DeltaFileWriter(unique_ptr<WritableBlock> block)
-#ifndef NDEBUG
-   : has_appended_(false)
-#endif
-{ // NOLINT(*)
-  cfile::WriterOptions opts;
-  opts.write_validx = true;
-  opts.storage_attributes.cfile_block_size = 
FLAGS_deltafile_default_block_size;
-  opts.storage_attributes.encoding = PLAIN_ENCODING;
-  opts.storage_attributes.compression =
-      GetCompressionCodecType(FLAGS_deltafile_default_compression_codec);
-
-  // The CFile value index is 'compressed' by truncating delta values to only
-  // contain the delta key. The entire deltakey is required in order to support
-  // efficient seeks without deserializing the entire block. The generic value
-  // index optimization is disabled, since it could truncate portions of the
-  // deltakey.
-  //
-  // Note: The deltafile usage of the CFile value index is irregular, since it
-  // inserts values in non-sorted order (the timestamp portion of the deltakey
-  // in UNDO files is sorted in descending order). This doesn't appear to cause
-  // problems in practice.
-  opts.optimize_index_keys = false;
-  opts.validx_key_encoder = [] (const void* value, faststring* buffer) {
+namespace {
+
+constexpr auto kKeyEncoder = [] (const void* value, faststring* buffer) {
     buffer->clear();
     const Slice* s1 = static_cast<const Slice*>(value);
     Slice s2(*s1);
     DeltaKey key;
     CHECK_OK(key.DecodeFrom(&s2));
     key.EncodeTo(buffer);
-  };
+};
+
+} // anonymous namespace
 
-  writer_.reset(new cfile::CFileWriter(std::move(opts),
-                                       GetTypeInfo(BINARY),
-                                       false,
-                                       std::move(block)));
+// The CFile value index is 'compressed' by truncating delta values to only
+// contain the delta key. The entire deltakey is required in order to support
+// efficient seeks without deserializing the entire block. The generic value
+// index optimization is disabled, since it could truncate portions of the
+// deltakey.
+//
+// Note: The deltafile usage of the CFile value index is irregular, since it
+// inserts values in non-sorted order (the timestamp portion of the deltakey
+// in UNDO files is sorted in descending order). This doesn't appear to cause
+// problems in practice.
+DeltaFileWriter::DeltaFileWriter(unique_ptr<WritableBlock> block)
+    : writer_(cfile::WriterOptionsBuilder()
+                  .write_validx(true)
+                  .optimize_index_keys(false)
+                  .storage_attributes({
+                      PLAIN_ENCODING,
+                      
GetCompressionCodecType(FLAGS_deltafile_default_compression_codec),
+                      FLAGS_deltafile_default_block_size
+                  })
+                  .validx_key_encoder(kKeyEncoder)
+                  .Build(),
+              GetTypeInfo(BINARY),
+              false,
+              std::move(block)) {
 }
 
 Status DeltaFileWriter::Start() {
-  return writer_->Start();
+  return writer_.Start();
 }
 
 Status DeltaFileWriter::Finish() {
-  BlockManager* bm = writer_->block()->block_manager();
+  BlockManager* bm = writer_.block()->block_manager();
   unique_ptr<BlockCreationTransaction> transaction = 
bm->NewCreationTransaction();
   RETURN_NOT_OK(FinishAndReleaseBlock(transaction.get()));
   return transaction->CommitCreatedBlocks();
 }
 
 Status DeltaFileWriter::FinishAndReleaseBlock(BlockCreationTransaction* 
transaction) {
-  if (writer_->written_value_count() == 0) {
+  if (writer_.written_value_count() == 0) {
     return Status::Aborted("no deltas written");
   }
-  return writer_->FinishAndReleaseBlock(transaction);
+  return writer_.FinishAndReleaseBlock(transaction);
 }
 
 Status DeltaFileWriter::DoAppendDelta(const DeltaKey &key,
@@ -155,7 +156,7 @@ Status DeltaFileWriter::DoAppendDelta(const DeltaKey &key,
   tmp_buf_.append(delta_slice.data(), delta_slice.size());
   Slice tmp_buf_slice(tmp_buf_);
 
-  return writer_->AppendEntries(&tmp_buf_slice, 1);
+  return writer_.AppendEntries(&tmp_buf_slice, 1);
 }
 
 template<>
@@ -202,7 +203,7 @@ void 
DeltaFileWriter::WriteDeltaStats(std::unique_ptr<DeltaStats> stats) {
 
   faststring buf;
   pb_util::SerializeToString(delta_stats_pb, &buf);
-  writer_->AddMetadataPair(DeltaFileReader::kDeltaStatsEntryName, 
buf.ToString());
+  writer_.AddMetadataPair(DeltaFileReader::kDeltaStatsEntryName, 
buf.ToString());
   delta_stats_ = std::move(stats);
 }
 
diff --git a/src/kudu/tablet/deltafile.h b/src/kudu/tablet/deltafile.h
index fb19ce1d5..44a089886 100644
--- a/src/kudu/tablet/deltafile.h
+++ b/src/kudu/tablet/deltafile.h
@@ -111,14 +111,14 @@ class DeltaFileWriter {
   }
 
   size_t written_size() const {
-    return writer_->written_size();
+    return writer_.written_size();
   }
 
  private:
   Status DoAppendDelta(const DeltaKey &key, const RowChangeList &delta);
 
+  cfile::CFileWriter writer_;
   std::unique_ptr<DeltaStats> delta_stats_;
-  std::unique_ptr<cfile::CFileWriter> writer_;
 
   // Buffer used as a temporary for storing the serialized form
   // of the deltas
@@ -129,7 +129,7 @@ class DeltaFileWriter {
   // This is used in debug mode to make sure that rows are appended
   // in order.
   DeltaKey last_key_;
-  bool has_appended_;
+  bool has_appended_ = false;
   #endif
 
   DISALLOW_COPY_AND_ASSIGN(DeltaFileWriter);
diff --git a/src/kudu/util/bloom_filter.h b/src/kudu/util/bloom_filter.h
index 27c631f92..198e68fed 100644
--- a/src/kudu/util/bloom_filter.h
+++ b/src/kudu/util/bloom_filter.h
@@ -126,8 +126,8 @@ class BloomFilterSizing {
     expected_count_(expected_count)
   {}
 
-  size_t n_bytes_;
-  size_t expected_count_;
+  const size_t n_bytes_;
+  const size_t expected_count_;
 };
 
 

Reply via email to