This is an automated email from the ASF dual-hosted git repository.

hulk pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 5a4ad7f7 Use rocksdb::PinnableSlice to avoid memory copy in bitmap and 
bloomfilter (#1888)
5a4ad7f7 is described below

commit 5a4ad7f7907700f49dcbf732d17513ef6cf760ac
Author: Nuo Xu <[email protected]>
AuthorDate: Sat Nov 25 20:04:17 2023 -0800

    Use rocksdb::PinnableSlice to avoid memory copy in bitmap and bloomfilter 
(#1888)
---
 src/storage/storage.cc         | 13 +++++++++++++
 src/storage/storage.h          |  4 ++++
 src/types/redis_bitmap.cc      | 23 ++++++++++++-----------
 src/types/redis_bloom_chain.cc | 37 +++++++++++++++++++++++--------------
 src/types/redis_bloom_chain.h  |  5 +++--
 5 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/src/storage/storage.cc b/src/storage/storage.cc
index 77d0ab27..a0e53c0b 100644
--- a/src/storage/storage.cc
+++ b/src/storage/storage.cc
@@ -531,6 +531,19 @@ rocksdb::Status Storage::Get(const rocksdb::ReadOptions 
&options, rocksdb::Colum
   return db_->Get(options, column_family, key, value);
 }
 
+rocksdb::Status Storage::Get(const rocksdb::ReadOptions &options, const 
rocksdb::Slice &key,
+                             rocksdb::PinnableSlice *value) {
+  return Get(options, db_->DefaultColumnFamily(), key, value);
+}
+
+rocksdb::Status Storage::Get(const rocksdb::ReadOptions &options, 
rocksdb::ColumnFamilyHandle *column_family,
+                             const rocksdb::Slice &key, rocksdb::PinnableSlice 
*value) {
+  if (is_txn_mode_ && txn_write_batch_->GetWriteBatch()->Count() > 0) {
+    return txn_write_batch_->GetFromBatchAndDB(db_.get(), options, 
column_family, key, value);
+  }
+  return db_->Get(options, column_family, key, value);
+}
+
 rocksdb::Iterator *Storage::NewIterator(const rocksdb::ReadOptions &options) {
   return NewIterator(options, db_->DefaultColumnFamily());
 }
diff --git a/src/storage/storage.h b/src/storage/storage.h
index e8d6562a..43c3cf1c 100644
--- a/src/storage/storage.h
+++ b/src/storage/storage.h
@@ -107,6 +107,10 @@ class Storage {
   [[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options, const 
rocksdb::Slice &key, std::string *value);
   [[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options, 
rocksdb::ColumnFamilyHandle *column_family,
                                     const rocksdb::Slice &key, std::string 
*value);
+  [[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options, const 
rocksdb::Slice &key,
+                                    rocksdb::PinnableSlice *value);
+  [[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options, 
rocksdb::ColumnFamilyHandle *column_family,
+                                    const rocksdb::Slice &key, 
rocksdb::PinnableSlice *value);
   void MultiGet(const rocksdb::ReadOptions &options, 
rocksdb::ColumnFamilyHandle *column_family, size_t num_keys,
                 const rocksdb::Slice *keys, rocksdb::PinnableSlice *values, 
rocksdb::Status *statuses);
   rocksdb::Iterator *NewIterator(const rocksdb::ReadOptions &options, 
rocksdb::ColumnFamilyHandle *column_family);
diff --git a/src/types/redis_bitmap.cc b/src/types/redis_bitmap.cc
index a7b73847..a4c71cd2 100644
--- a/src/types/redis_bitmap.cc
+++ b/src/types/redis_bitmap.cc
@@ -246,19 +246,19 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, 
int64_t start, int64_t s
   uint32_t start_index = u_start / kBitmapSegmentBytes;
   uint32_t stop_index = u_stop / kBitmapSegmentBytes;
   // Don't use multi get to prevent large range query, and take too much memory
-  std::string value;
   for (uint32_t i = start_index; i <= stop_index; i++) {
+    rocksdb::PinnableSlice pin_value;
     std::string sub_key =
         InternalKey(ns_key, std::to_string(i * kBitmapSegmentBytes), 
metadata.version, storage_->IsSlotIdEncoded())
             .Encode();
-    s = storage_->Get(read_options, sub_key, &value);
+    s = storage_->Get(read_options, sub_key, &pin_value);
     if (!s.ok() && !s.IsNotFound()) return s;
     if (s.IsNotFound()) continue;
     size_t j = 0;
     if (i == start_index) j = u_start % kBitmapSegmentBytes;
-    auto k = static_cast<int64_t>(value.size());
+    auto k = static_cast<int64_t>(pin_value.size());
     if (i == stop_index) k = u_stop % kBitmapSegmentBytes + 1;
-    *cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t 
*>(value.data()) + j, k);
+    *cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t 
*>(pin_value.data()) + j, k);
   }
   return rocksdb::Status::OK();
 }
@@ -304,12 +304,12 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key, 
bool bit, int64_t start, i
   uint32_t start_index = u_start / kBitmapSegmentBytes;
   uint32_t stop_index = u_stop / kBitmapSegmentBytes;
   // Don't use multi get to prevent large range query, and take too much memory
-  std::string value;
+  rocksdb::PinnableSlice pin_value;
   for (uint32_t i = start_index; i <= stop_index; i++) {
     std::string sub_key =
         InternalKey(ns_key, std::to_string(i * kBitmapSegmentBytes), 
metadata.version, storage_->IsSlotIdEncoded())
             .Encode();
-    s = storage_->Get(read_options, sub_key, &value);
+    s = storage_->Get(read_options, sub_key, &pin_value);
     if (!s.ok() && !s.IsNotFound()) return s;
     if (s.IsNotFound()) {
       if (!bit) {
@@ -320,17 +320,18 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key, 
bool bit, int64_t start, i
     }
     size_t j = 0;
     if (i == start_index) j = u_start % kBitmapSegmentBytes;
-    for (; j < value.size(); j++) {
+    for (; j < pin_value.size(); j++) {
       if (i == stop_index && j > (u_stop % kBitmapSegmentBytes)) break;
-      if (bit_pos_in_byte(value[j], bit) != -1) {
-        *pos = static_cast<int64_t>(i * kBitmapSegmentBits + j * 8 + 
bit_pos_in_byte(value[j], bit));
+      if (bit_pos_in_byte(pin_value[j], bit) != -1) {
+        *pos = static_cast<int64_t>(i * kBitmapSegmentBits + j * 8 + 
bit_pos_in_byte(pin_value[j], bit));
         return rocksdb::Status::OK();
       }
     }
-    if (!bit && value.size() < kBitmapSegmentBytes) {
-      *pos = static_cast<int64_t>(i * kBitmapSegmentBits + value.size() * 8);
+    if (!bit && pin_value.size() < kBitmapSegmentBytes) {
+      *pos = static_cast<int64_t>(i * kBitmapSegmentBits + pin_value.size() * 
8);
       return rocksdb::Status::OK();
     }
+    pin_value.Reset();
   }
   // bit was not found
   *pos = bit ? -1 : static_cast<int64_t>(metadata.size * 8);
diff --git a/src/types/redis_bloom_chain.cc b/src/types/redis_bloom_chain.cc
index aa5131c7..4c4e32d0 100644
--- a/src/types/redis_bloom_chain.cc
+++ b/src/types/redis_bloom_chain.cc
@@ -45,17 +45,17 @@ void BloomChain::getBFKeyList(const Slice &ns_key, const 
BloomChainMetadata &met
 }
 
 rocksdb::Status BloomChain::getBFDataList(const std::vector<std::string> 
&bf_key_list,
-                                          std::vector<std::string> 
*bf_data_list) {
+                                          std::vector<rocksdb::PinnableSlice> 
*bf_data_list) {
   LatestSnapShot ss(storage_);
   rocksdb::ReadOptions read_options;
   read_options.snapshot = ss.GetSnapShot();
 
   bf_data_list->reserve(bf_key_list.size());
   for (const auto &bf_key : bf_key_list) {
-    std::string bf_data;
-    rocksdb::Status s = storage_->Get(read_options, bf_key, &bf_data);
+    rocksdb::PinnableSlice pin_value;
+    rocksdb::Status s = storage_->Get(read_options, bf_key, &pin_value);
     if (!s.ok()) return s;
-    bf_data_list->push_back(std::move(bf_data));
+    bf_data_list->push_back(std::move(pin_value));
   }
   return rocksdb::Status::OK();
 }
@@ -113,8 +113,9 @@ void BloomChain::bloomAdd(uint64_t item_hash, std::string 
&bf_data) {
   block_split_bloom_filter.InsertHash(item_hash);
 }
 
-bool BloomChain::bloomCheck(uint64_t item_hash, std::string &bf_data) {
-  const BlockSplitBloomFilter block_split_bloom_filter(bf_data);
+bool BloomChain::bloomCheck(uint64_t item_hash, std::string_view &bf_data) {
+  const BlockSplitBloomFilter block_split_bloom_filter(
+      nonstd::span<char>(const_cast<char *>(bf_data.data()), bf_data.size()));
   return block_split_bloom_filter.FindHash(item_hash);
 }
 
@@ -163,7 +164,7 @@ rocksdb::Status BloomChain::InsertCommon(const Slice 
&user_key, const std::vecto
   std::vector<std::string> bf_key_list;
   getBFKeyList(ns_key, metadata, &bf_key_list);
 
-  std::vector<std::string> bf_data_list;
+  std::vector<rocksdb::PinnableSlice> bf_data_list;
   s = getBFDataList(bf_key_list, &bf_data_list);
   if (!s.ok()) return s;
 
@@ -180,7 +181,8 @@ rocksdb::Status BloomChain::InsertCommon(const Slice 
&user_key, const std::vecto
     bool exist = false;
     // TODO: to test which direction for searching is better
     for (int ii = static_cast<int>(bf_data_list.size()) - 1; ii >= 0; --ii) {
-      exist = bloomCheck(item_hash_list[i], bf_data_list[ii]);
+      std::string_view data = bf_data_list[ii].ToStringView();
+      exist = bloomCheck(item_hash_list[i], data);
       if (exist) break;
     }
 
@@ -190,17 +192,23 @@ rocksdb::Status BloomChain::InsertCommon(const Slice 
&user_key, const std::vecto
     } else {
       if (metadata.size + 1 > metadata.GetCapacity()) {
         if (metadata.IsScaling()) {
-          batch->Put(bf_key_list.back(), bf_data_list.back());
+          batch->Put(bf_key_list.back(), bf_data_list.back().ToStringView());
           std::string bf_data;
           createBloomFilterInBatch(ns_key, &metadata, batch, &bf_data);
-          bf_data_list.push_back(std::move(bf_data));
+          rocksdb::PinnableSlice pin_slice;
+          *pin_slice.GetSelf() = std::move(bf_data);
+          pin_slice.PinSelf();
+          bf_data_list.push_back(std::move(pin_slice));
           bf_key_list.push_back(getBFKey(ns_key, metadata, metadata.n_filters 
- 1));
         } else {
           (*rets)[i] = BloomFilterAddResult::kFull;
           continue;
         }
       }
-      bloomAdd(item_hash_list[i], bf_data_list.back());
+      std::string data = bf_data_list.back().ToString();
+      bloomAdd(item_hash_list[i], data);
+      *bf_data_list.back().GetSelf() = std::move(data);
+      bf_data_list.back().PinSelf();
       (*rets)[i] = BloomFilterAddResult::kOk;
       metadata.size += 1;
     }
@@ -210,7 +218,7 @@ rocksdb::Status BloomChain::InsertCommon(const Slice 
&user_key, const std::vecto
     std::string bloom_chain_metadata_bytes;
     metadata.Encode(&bloom_chain_metadata_bytes);
     batch->Put(metadata_cf_handle_, ns_key, bloom_chain_metadata_bytes);
-    batch->Put(bf_key_list.back(), bf_data_list.back());
+    batch->Put(bf_key_list.back(), bf_data_list.back().ToStringView());
   }
   return storage_->Write(storage_->DefaultWriteOptions(), 
batch->GetWriteBatch());
 }
@@ -237,7 +245,7 @@ rocksdb::Status BloomChain::MExists(const Slice &user_key, 
const std::vector<std
   std::vector<std::string> bf_key_list;
   getBFKeyList(ns_key, metadata, &bf_key_list);
 
-  std::vector<std::string> bf_data_list;
+  std::vector<rocksdb::PinnableSlice> bf_data_list;
   s = getBFDataList(bf_key_list, &bf_data_list);
   if (!s.ok()) return s;
 
@@ -248,7 +256,8 @@ rocksdb::Status BloomChain::MExists(const Slice &user_key, 
const std::vector<std
     // check
     // TODO: to test which direction for searching is better
     for (int ii = static_cast<int>(bf_data_list.size()) - 1; ii >= 0; --ii) {
-      (*exists)[i] = bloomCheck(item_hash_list[i], bf_data_list[ii]);
+      std::string_view data = bf_data_list[ii].ToStringView();
+      (*exists)[i] = bloomCheck(item_hash_list[i], data);
       if ((*exists)[i]) break;
     }
   }
diff --git a/src/types/redis_bloom_chain.h b/src/types/redis_bloom_chain.h
index 7190a6ff..59f4b5b7 100644
--- a/src/types/redis_bloom_chain.h
+++ b/src/types/redis_bloom_chain.h
@@ -77,7 +77,8 @@ class BloomChain : public Database {
   rocksdb::Status getBloomChainMetadata(const Slice &ns_key, 
BloomChainMetadata *metadata);
   std::string getBFKey(const Slice &ns_key, const BloomChainMetadata 
&metadata, uint16_t filters_index);
   void getBFKeyList(const Slice &ns_key, const BloomChainMetadata &metadata, 
std::vector<std::string> *bf_key_list);
-  rocksdb::Status getBFDataList(const std::vector<std::string> &bf_key_list, 
std::vector<std::string> *bf_data_list);
+  rocksdb::Status getBFDataList(const std::vector<std::string> &bf_key_list,
+                                std::vector<rocksdb::PinnableSlice> 
*bf_data_list);
   static void getItemHashList(const std::vector<std::string> &items, 
std::vector<uint64_t> *item_hash_list);
 
   rocksdb::Status createBloomChain(const Slice &ns_key, double error_rate, 
uint32_t capacity, uint16_t expansion,
@@ -88,6 +89,6 @@ class BloomChain : public Database {
   /// bf_data: [in/out] The content string of bloomfilter.
   static void bloomAdd(uint64_t item_hash, std::string &bf_data);
 
-  static bool bloomCheck(uint64_t item_hash, std::string &bf_data);
+  static bool bloomCheck(uint64_t item_hash, std::string_view &bf_data);
 };
 }  // namespace redis

Reply via email to