This is an automated email from the ASF dual-hosted git repository.
hulk pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 5a4ad7f7 Use rocksdb::PinnableSlice to avoid memory copy in bitmap and
bloomfilter (#1888)
5a4ad7f7 is described below
commit 5a4ad7f7907700f49dcbf732d17513ef6cf760ac
Author: Nuo Xu <[email protected]>
AuthorDate: Sat Nov 25 20:04:17 2023 -0800
Use rocksdb::PinnableSlice to avoid memory copy in bitmap and bloomfilter
(#1888)
---
src/storage/storage.cc | 13 +++++++++++++
src/storage/storage.h | 4 ++++
src/types/redis_bitmap.cc | 23 ++++++++++++-----------
src/types/redis_bloom_chain.cc | 37 +++++++++++++++++++++++--------------
src/types/redis_bloom_chain.h | 5 +++--
5 files changed, 55 insertions(+), 27 deletions(-)
diff --git a/src/storage/storage.cc b/src/storage/storage.cc
index 77d0ab27..a0e53c0b 100644
--- a/src/storage/storage.cc
+++ b/src/storage/storage.cc
@@ -531,6 +531,19 @@ rocksdb::Status Storage::Get(const rocksdb::ReadOptions
&options, rocksdb::Colum
return db_->Get(options, column_family, key, value);
}
+rocksdb::Status Storage::Get(const rocksdb::ReadOptions &options, const
rocksdb::Slice &key,
+ rocksdb::PinnableSlice *value) {
+ return Get(options, db_->DefaultColumnFamily(), key, value);
+}
+
+rocksdb::Status Storage::Get(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice
*value) {
+ if (is_txn_mode_ && txn_write_batch_->GetWriteBatch()->Count() > 0) {
+ return txn_write_batch_->GetFromBatchAndDB(db_.get(), options,
column_family, key, value);
+ }
+ return db_->Get(options, column_family, key, value);
+}
+
rocksdb::Iterator *Storage::NewIterator(const rocksdb::ReadOptions &options) {
return NewIterator(options, db_->DefaultColumnFamily());
}
diff --git a/src/storage/storage.h b/src/storage/storage.h
index e8d6562a..43c3cf1c 100644
--- a/src/storage/storage.h
+++ b/src/storage/storage.h
@@ -107,6 +107,10 @@ class Storage {
[[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options, const
rocksdb::Slice &key, std::string *value);
[[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family,
const rocksdb::Slice &key, std::string
*value);
+ [[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options, const
rocksdb::Slice &key,
+ rocksdb::PinnableSlice *value);
+ [[nodiscard]] rocksdb::Status Get(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family,
+ const rocksdb::Slice &key,
rocksdb::PinnableSlice *value);
void MultiGet(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family, size_t num_keys,
const rocksdb::Slice *keys, rocksdb::PinnableSlice *values,
rocksdb::Status *statuses);
rocksdb::Iterator *NewIterator(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family);
diff --git a/src/types/redis_bitmap.cc b/src/types/redis_bitmap.cc
index a7b73847..a4c71cd2 100644
--- a/src/types/redis_bitmap.cc
+++ b/src/types/redis_bitmap.cc
@@ -246,19 +246,19 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key,
int64_t start, int64_t s
uint32_t start_index = u_start / kBitmapSegmentBytes;
uint32_t stop_index = u_stop / kBitmapSegmentBytes;
// Don't use multi get to prevent large range query, and take too much memory
- std::string value;
for (uint32_t i = start_index; i <= stop_index; i++) {
+ rocksdb::PinnableSlice pin_value;
std::string sub_key =
InternalKey(ns_key, std::to_string(i * kBitmapSegmentBytes),
metadata.version, storage_->IsSlotIdEncoded())
.Encode();
- s = storage_->Get(read_options, sub_key, &value);
+ s = storage_->Get(read_options, sub_key, &pin_value);
if (!s.ok() && !s.IsNotFound()) return s;
if (s.IsNotFound()) continue;
size_t j = 0;
if (i == start_index) j = u_start % kBitmapSegmentBytes;
- auto k = static_cast<int64_t>(value.size());
+ auto k = static_cast<int64_t>(pin_value.size());
if (i == stop_index) k = u_stop % kBitmapSegmentBytes + 1;
- *cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t
*>(value.data()) + j, k);
+ *cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t
*>(pin_value.data()) + j, k);
}
return rocksdb::Status::OK();
}
@@ -304,12 +304,12 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key,
bool bit, int64_t start, i
uint32_t start_index = u_start / kBitmapSegmentBytes;
uint32_t stop_index = u_stop / kBitmapSegmentBytes;
// Don't use multi get to prevent large range query, and take too much memory
- std::string value;
+ rocksdb::PinnableSlice pin_value;
for (uint32_t i = start_index; i <= stop_index; i++) {
std::string sub_key =
InternalKey(ns_key, std::to_string(i * kBitmapSegmentBytes),
metadata.version, storage_->IsSlotIdEncoded())
.Encode();
- s = storage_->Get(read_options, sub_key, &value);
+ s = storage_->Get(read_options, sub_key, &pin_value);
if (!s.ok() && !s.IsNotFound()) return s;
if (s.IsNotFound()) {
if (!bit) {
@@ -320,17 +320,18 @@ rocksdb::Status Bitmap::BitPos(const Slice &user_key,
bool bit, int64_t start, i
}
size_t j = 0;
if (i == start_index) j = u_start % kBitmapSegmentBytes;
- for (; j < value.size(); j++) {
+ for (; j < pin_value.size(); j++) {
if (i == stop_index && j > (u_stop % kBitmapSegmentBytes)) break;
- if (bit_pos_in_byte(value[j], bit) != -1) {
- *pos = static_cast<int64_t>(i * kBitmapSegmentBits + j * 8 +
bit_pos_in_byte(value[j], bit));
+ if (bit_pos_in_byte(pin_value[j], bit) != -1) {
+ *pos = static_cast<int64_t>(i * kBitmapSegmentBits + j * 8 +
bit_pos_in_byte(pin_value[j], bit));
return rocksdb::Status::OK();
}
}
- if (!bit && value.size() < kBitmapSegmentBytes) {
- *pos = static_cast<int64_t>(i * kBitmapSegmentBits + value.size() * 8);
+ if (!bit && pin_value.size() < kBitmapSegmentBytes) {
+ *pos = static_cast<int64_t>(i * kBitmapSegmentBits + pin_value.size() *
8);
return rocksdb::Status::OK();
}
+ pin_value.Reset();
}
// bit was not found
*pos = bit ? -1 : static_cast<int64_t>(metadata.size * 8);
diff --git a/src/types/redis_bloom_chain.cc b/src/types/redis_bloom_chain.cc
index aa5131c7..4c4e32d0 100644
--- a/src/types/redis_bloom_chain.cc
+++ b/src/types/redis_bloom_chain.cc
@@ -45,17 +45,17 @@ void BloomChain::getBFKeyList(const Slice &ns_key, const
BloomChainMetadata &met
}
rocksdb::Status BloomChain::getBFDataList(const std::vector<std::string>
&bf_key_list,
- std::vector<std::string>
*bf_data_list) {
+ std::vector<rocksdb::PinnableSlice>
*bf_data_list) {
LatestSnapShot ss(storage_);
rocksdb::ReadOptions read_options;
read_options.snapshot = ss.GetSnapShot();
bf_data_list->reserve(bf_key_list.size());
for (const auto &bf_key : bf_key_list) {
- std::string bf_data;
- rocksdb::Status s = storage_->Get(read_options, bf_key, &bf_data);
+ rocksdb::PinnableSlice pin_value;
+ rocksdb::Status s = storage_->Get(read_options, bf_key, &pin_value);
if (!s.ok()) return s;
- bf_data_list->push_back(std::move(bf_data));
+ bf_data_list->push_back(std::move(pin_value));
}
return rocksdb::Status::OK();
}
@@ -113,8 +113,9 @@ void BloomChain::bloomAdd(uint64_t item_hash, std::string
&bf_data) {
block_split_bloom_filter.InsertHash(item_hash);
}
-bool BloomChain::bloomCheck(uint64_t item_hash, std::string &bf_data) {
- const BlockSplitBloomFilter block_split_bloom_filter(bf_data);
+bool BloomChain::bloomCheck(uint64_t item_hash, std::string_view &bf_data) {
+ const BlockSplitBloomFilter block_split_bloom_filter(
+ nonstd::span<char>(const_cast<char *>(bf_data.data()), bf_data.size()));
return block_split_bloom_filter.FindHash(item_hash);
}
@@ -163,7 +164,7 @@ rocksdb::Status BloomChain::InsertCommon(const Slice
&user_key, const std::vecto
std::vector<std::string> bf_key_list;
getBFKeyList(ns_key, metadata, &bf_key_list);
- std::vector<std::string> bf_data_list;
+ std::vector<rocksdb::PinnableSlice> bf_data_list;
s = getBFDataList(bf_key_list, &bf_data_list);
if (!s.ok()) return s;
@@ -180,7 +181,8 @@ rocksdb::Status BloomChain::InsertCommon(const Slice
&user_key, const std::vecto
bool exist = false;
// TODO: to test which direction for searching is better
for (int ii = static_cast<int>(bf_data_list.size()) - 1; ii >= 0; --ii) {
- exist = bloomCheck(item_hash_list[i], bf_data_list[ii]);
+ std::string_view data = bf_data_list[ii].ToStringView();
+ exist = bloomCheck(item_hash_list[i], data);
if (exist) break;
}
@@ -190,17 +192,23 @@ rocksdb::Status BloomChain::InsertCommon(const Slice
&user_key, const std::vecto
} else {
if (metadata.size + 1 > metadata.GetCapacity()) {
if (metadata.IsScaling()) {
- batch->Put(bf_key_list.back(), bf_data_list.back());
+ batch->Put(bf_key_list.back(), bf_data_list.back().ToStringView());
std::string bf_data;
createBloomFilterInBatch(ns_key, &metadata, batch, &bf_data);
- bf_data_list.push_back(std::move(bf_data));
+ rocksdb::PinnableSlice pin_slice;
+ *pin_slice.GetSelf() = std::move(bf_data);
+ pin_slice.PinSelf();
+ bf_data_list.push_back(std::move(pin_slice));
bf_key_list.push_back(getBFKey(ns_key, metadata, metadata.n_filters
- 1));
} else {
(*rets)[i] = BloomFilterAddResult::kFull;
continue;
}
}
- bloomAdd(item_hash_list[i], bf_data_list.back());
+ std::string data = bf_data_list.back().ToString();
+ bloomAdd(item_hash_list[i], data);
+ *bf_data_list.back().GetSelf() = std::move(data);
+ bf_data_list.back().PinSelf();
(*rets)[i] = BloomFilterAddResult::kOk;
metadata.size += 1;
}
@@ -210,7 +218,7 @@ rocksdb::Status BloomChain::InsertCommon(const Slice
&user_key, const std::vecto
std::string bloom_chain_metadata_bytes;
metadata.Encode(&bloom_chain_metadata_bytes);
batch->Put(metadata_cf_handle_, ns_key, bloom_chain_metadata_bytes);
- batch->Put(bf_key_list.back(), bf_data_list.back());
+ batch->Put(bf_key_list.back(), bf_data_list.back().ToStringView());
}
return storage_->Write(storage_->DefaultWriteOptions(),
batch->GetWriteBatch());
}
@@ -237,7 +245,7 @@ rocksdb::Status BloomChain::MExists(const Slice &user_key,
const std::vector<std
std::vector<std::string> bf_key_list;
getBFKeyList(ns_key, metadata, &bf_key_list);
- std::vector<std::string> bf_data_list;
+ std::vector<rocksdb::PinnableSlice> bf_data_list;
s = getBFDataList(bf_key_list, &bf_data_list);
if (!s.ok()) return s;
@@ -248,7 +256,8 @@ rocksdb::Status BloomChain::MExists(const Slice &user_key,
const std::vector<std
// check
// TODO: to test which direction for searching is better
for (int ii = static_cast<int>(bf_data_list.size()) - 1; ii >= 0; --ii) {
- (*exists)[i] = bloomCheck(item_hash_list[i], bf_data_list[ii]);
+ std::string_view data = bf_data_list[ii].ToStringView();
+ (*exists)[i] = bloomCheck(item_hash_list[i], data);
if ((*exists)[i]) break;
}
}
diff --git a/src/types/redis_bloom_chain.h b/src/types/redis_bloom_chain.h
index 7190a6ff..59f4b5b7 100644
--- a/src/types/redis_bloom_chain.h
+++ b/src/types/redis_bloom_chain.h
@@ -77,7 +77,8 @@ class BloomChain : public Database {
rocksdb::Status getBloomChainMetadata(const Slice &ns_key,
BloomChainMetadata *metadata);
std::string getBFKey(const Slice &ns_key, const BloomChainMetadata
&metadata, uint16_t filters_index);
void getBFKeyList(const Slice &ns_key, const BloomChainMetadata &metadata,
std::vector<std::string> *bf_key_list);
- rocksdb::Status getBFDataList(const std::vector<std::string> &bf_key_list,
std::vector<std::string> *bf_data_list);
+ rocksdb::Status getBFDataList(const std::vector<std::string> &bf_key_list,
+ std::vector<rocksdb::PinnableSlice>
*bf_data_list);
static void getItemHashList(const std::vector<std::string> &items,
std::vector<uint64_t> *item_hash_list);
rocksdb::Status createBloomChain(const Slice &ns_key, double error_rate,
uint32_t capacity, uint16_t expansion,
@@ -88,6 +89,6 @@ class BloomChain : public Database {
/// bf_data: [in/out] The content string of bloomfilter.
static void bloomAdd(uint64_t item_hash, std::string &bf_data);
- static bool bloomCheck(uint64_t item_hash, std::string &bf_data);
+ static bool bloomCheck(uint64_t item_hash, std::string_view &bf_data);
};
} // namespace redis