http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/table_properties_collector.cc ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/table_properties_collector.cc b/thirdparty/rocksdb/db/table_properties_collector.cc new file mode 100644 index 0000000..a1f4dba --- /dev/null +++ b/thirdparty/rocksdb/db/table_properties_collector.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/table_properties_collector.h" + +#include "db/dbformat.h" +#include "util/coding.h" +#include "util/string_util.h" + +namespace rocksdb { + +Status InternalKeyPropertiesCollector::InternalAdd(const Slice& key, + const Slice& value, + uint64_t file_size) { + ParsedInternalKey ikey; + if (!ParseInternalKey(key, &ikey)) { + return Status::InvalidArgument("Invalid internal key"); + } + + // Note: We count both, deletions and single deletions here. + if (ikey.type == ValueType::kTypeDeletion || + ikey.type == ValueType::kTypeSingleDeletion) { + ++deleted_keys_; + } else if (ikey.type == ValueType::kTypeMerge) { + ++merge_operands_; + } + + return Status::OK(); +} + +Status InternalKeyPropertiesCollector::Finish( + UserCollectedProperties* properties) { + assert(properties); + assert(properties->find( + InternalKeyTablePropertiesNames::kDeletedKeys) == properties->end()); + assert(properties->find(InternalKeyTablePropertiesNames::kMergeOperands) == + properties->end()); + + std::string val_deleted_keys; + PutVarint64(&val_deleted_keys, deleted_keys_); + properties->insert( + {InternalKeyTablePropertiesNames::kDeletedKeys, val_deleted_keys}); + + std::string val_merge_operands; + PutVarint64(&val_merge_operands, merge_operands_); + properties->insert( + {InternalKeyTablePropertiesNames::kMergeOperands, val_merge_operands}); + + return Status::OK(); +} + +UserCollectedProperties +InternalKeyPropertiesCollector::GetReadableProperties() const { + return {{"kDeletedKeys", ToString(deleted_keys_)}, + {"kMergeOperands", ToString(merge_operands_)}}; +} + +namespace { + +EntryType GetEntryType(ValueType value_type) { + switch (value_type) { + case kTypeValue: + return kEntryPut; + case kTypeDeletion: + return kEntryDelete; + case kTypeSingleDeletion: + return kEntrySingleDelete; + case kTypeMerge: + return kEntryMerge; + default: + return kEntryOther; + } +} + +uint64_t GetUint64Property(const UserCollectedProperties& props, + const std::string property_name, + bool* property_present) { + auto pos = props.find(property_name); + if (pos == props.end()) { + *property_present = false; + return 0; + } + Slice raw = pos->second; + uint64_t val = 0; + *property_present = true; + return GetVarint64(&raw, &val) ? val : 0; +} + +} // namespace + +Status UserKeyTablePropertiesCollector::InternalAdd(const Slice& key, + const Slice& value, + uint64_t file_size) { + ParsedInternalKey ikey; + if (!ParseInternalKey(key, &ikey)) { + return Status::InvalidArgument("Invalid internal key"); + } + + return collector_->AddUserKey(ikey.user_key, value, GetEntryType(ikey.type), + ikey.sequence, file_size); +} + +Status UserKeyTablePropertiesCollector::Finish( + UserCollectedProperties* properties) { + return collector_->Finish(properties); +} + +UserCollectedProperties +UserKeyTablePropertiesCollector::GetReadableProperties() const { + return collector_->GetReadableProperties(); +} + + +const std::string InternalKeyTablePropertiesNames::kDeletedKeys + = "rocksdb.deleted.keys"; +const std::string InternalKeyTablePropertiesNames::kMergeOperands = + "rocksdb.merge.operands"; + +uint64_t GetDeletedKeys( + const UserCollectedProperties& props) { + bool property_present_ignored; + return GetUint64Property(props, InternalKeyTablePropertiesNames::kDeletedKeys, + &property_present_ignored); +} + +uint64_t GetMergeOperands(const UserCollectedProperties& props, + bool* property_present) { + return GetUint64Property( + props, InternalKeyTablePropertiesNames::kMergeOperands, property_present); +} + +} // namespace rocksdb
http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/table_properties_collector.h ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/table_properties_collector.h b/thirdparty/rocksdb/db/table_properties_collector.h new file mode 100644 index 0000000..d8cd756 --- /dev/null +++ b/thirdparty/rocksdb/db/table_properties_collector.h @@ -0,0 +1,137 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// This file defines a collection of statistics collectors. +#pragma once + +#include "rocksdb/table_properties.h" + +#include <memory> +#include <string> +#include <vector> + +namespace rocksdb { + +struct InternalKeyTablePropertiesNames { + static const std::string kDeletedKeys; + static const std::string kMergeOperands; +}; + +// Base class for internal table properties collector. +class IntTblPropCollector { + public: + virtual ~IntTblPropCollector() {} + virtual Status Finish(UserCollectedProperties* properties) = 0; + + virtual const char* Name() const = 0; + + // @params key the user key that is inserted into the table. + // @params value the value that is inserted into the table. + virtual Status InternalAdd(const Slice& key, const Slice& value, + uint64_t file_size) = 0; + + virtual UserCollectedProperties GetReadableProperties() const = 0; + + virtual bool NeedCompact() const { return false; } +}; + +// Factory for internal table properties collector. +class IntTblPropCollectorFactory { + public: + virtual ~IntTblPropCollectorFactory() {} + // has to be thread-safe + virtual IntTblPropCollector* CreateIntTblPropCollector( + uint32_t column_family_id) = 0; + + // The name of the properties collector can be used for debugging purpose. + virtual const char* Name() const = 0; +}; + +// Collecting the statistics for internal keys. Visible only by internal +// rocksdb modules. +class InternalKeyPropertiesCollector : public IntTblPropCollector { + public: + virtual Status InternalAdd(const Slice& key, const Slice& value, + uint64_t file_size) override; + + virtual Status Finish(UserCollectedProperties* properties) override; + + virtual const char* Name() const override { + return "InternalKeyPropertiesCollector"; + } + + UserCollectedProperties GetReadableProperties() const override; + + private: + uint64_t deleted_keys_ = 0; + uint64_t merge_operands_ = 0; +}; + +class InternalKeyPropertiesCollectorFactory + : public IntTblPropCollectorFactory { + public: + virtual IntTblPropCollector* CreateIntTblPropCollector( + uint32_t column_family_id) override { + return new InternalKeyPropertiesCollector(); + } + + virtual const char* Name() const override { + return "InternalKeyPropertiesCollectorFactory"; + } +}; + +// When rocksdb creates a new table, it will encode all "user keys" into +// "internal keys", which contains meta information of a given entry. +// +// This class extracts user key from the encoded internal key when Add() is +// invoked. +class UserKeyTablePropertiesCollector : public IntTblPropCollector { + public: + // transfer of ownership + explicit UserKeyTablePropertiesCollector(TablePropertiesCollector* collector) + : collector_(collector) {} + + virtual ~UserKeyTablePropertiesCollector() {} + + virtual Status InternalAdd(const Slice& key, const Slice& value, + uint64_t file_size) override; + + virtual Status Finish(UserCollectedProperties* properties) override; + + virtual const char* Name() const override { return collector_->Name(); } + + UserCollectedProperties GetReadableProperties() const override; + + virtual bool NeedCompact() const override { + return collector_->NeedCompact(); + } + + protected: + std::unique_ptr<TablePropertiesCollector> collector_; +}; + +class UserKeyTablePropertiesCollectorFactory + : public IntTblPropCollectorFactory { + public: + explicit UserKeyTablePropertiesCollectorFactory( + std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory) + : user_collector_factory_(user_collector_factory) {} + virtual IntTblPropCollector* CreateIntTblPropCollector( + uint32_t column_family_id) override { + TablePropertiesCollectorFactory::Context context; + context.column_family_id = column_family_id; + return new UserKeyTablePropertiesCollector( + user_collector_factory_->CreateTablePropertiesCollector(context)); + } + + virtual const char* Name() const override { + return user_collector_factory_->Name(); + } + + private: + std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory_; +}; + +} // namespace rocksdb http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/transaction_log_impl.cc ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/transaction_log_impl.cc b/thirdparty/rocksdb/db/transaction_log_impl.cc new file mode 100644 index 0000000..e22c0c4 --- /dev/null +++ b/thirdparty/rocksdb/db/transaction_log_impl.cc @@ -0,0 +1,272 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include "db/transaction_log_impl.h" +#include <inttypes.h> +#include "db/write_batch_internal.h" +#include "util/file_reader_writer.h" + +namespace rocksdb { + +TransactionLogIteratorImpl::TransactionLogIteratorImpl( + const std::string& dir, const ImmutableDBOptions* options, + const TransactionLogIterator::ReadOptions& read_options, + const EnvOptions& soptions, const SequenceNumber seq, + std::unique_ptr<VectorLogPtr> files, VersionSet const* const versions) + : dir_(dir), + options_(options), + read_options_(read_options), + soptions_(soptions), + startingSequenceNumber_(seq), + files_(std::move(files)), + started_(false), + isValid_(false), + currentFileIndex_(0), + currentBatchSeq_(0), + currentLastSeq_(0), + versions_(versions) { + assert(files_ != nullptr); + assert(versions_ != nullptr); + + reporter_.env = options_->env; + reporter_.info_log = options_->info_log.get(); + SeekToStartSequence(); // Seek till starting sequence +} + +Status TransactionLogIteratorImpl::OpenLogFile( + const LogFile* logFile, unique_ptr<SequentialFileReader>* file_reader) { + Env* env = options_->env; + unique_ptr<SequentialFile> file; + Status s; + EnvOptions optimized_env_options = env->OptimizeForLogRead(soptions_); + if (logFile->Type() == kArchivedLogFile) { + std::string fname = ArchivedLogFileName(dir_, logFile->LogNumber()); + s = env->NewSequentialFile(fname, &file, optimized_env_options); + } else { + std::string fname = LogFileName(dir_, logFile->LogNumber()); + s = env->NewSequentialFile(fname, &file, optimized_env_options); + if (!s.ok()) { + // If cannot open file in DB directory. + // Try the archive dir, as it could have moved in the meanwhile. + fname = ArchivedLogFileName(dir_, logFile->LogNumber()); + s = env->NewSequentialFile(fname, &file, optimized_env_options); + } + } + if (s.ok()) { + file_reader->reset(new SequentialFileReader(std::move(file))); + } + return s; +} + +BatchResult TransactionLogIteratorImpl::GetBatch() { + assert(isValid_); // cannot call in a non valid state. + BatchResult result; + result.sequence = currentBatchSeq_; + result.writeBatchPtr = std::move(currentBatch_); + return result; +} + +Status TransactionLogIteratorImpl::status() { + return currentStatus_; +} + +bool TransactionLogIteratorImpl::Valid() { + return started_ && isValid_; +} + +bool TransactionLogIteratorImpl::RestrictedRead( + Slice* record, + std::string* scratch) { + // Don't read if no more complete entries to read from logs + if (currentLastSeq_ >= versions_->LastSequence()) { + return false; + } + return currentLogReader_->ReadRecord(record, scratch); +} + +void TransactionLogIteratorImpl::SeekToStartSequence( + uint64_t startFileIndex, + bool strict) { + std::string scratch; + Slice record; + started_ = false; + isValid_ = false; + if (files_->size() <= startFileIndex) { + return; + } + Status s = OpenLogReader(files_->at(startFileIndex).get()); + if (!s.ok()) { + currentStatus_ = s; + reporter_.Info(currentStatus_.ToString().c_str()); + return; + } + while (RestrictedRead(&record, &scratch)) { + if (record.size() < WriteBatchInternal::kHeader) { + reporter_.Corruption( + record.size(), Status::Corruption("very small log record")); + continue; + } + UpdateCurrentWriteBatch(record); + if (currentLastSeq_ >= startingSequenceNumber_) { + if (strict && currentBatchSeq_ != startingSequenceNumber_) { + currentStatus_ = Status::Corruption("Gap in sequence number. Could not " + "seek to required sequence number"); + reporter_.Info(currentStatus_.ToString().c_str()); + return; + } else if (strict) { + reporter_.Info("Could seek required sequence number. Iterator will " + "continue."); + } + isValid_ = true; + started_ = true; // set started_ as we could seek till starting sequence + return; + } else { + isValid_ = false; + } + } + + // Could not find start sequence in first file. Normally this must be the + // only file. Otherwise log the error and let the iterator return next entry + // If strict is set, we want to seek exactly till the start sequence and it + // should have been present in the file we scanned above + if (strict) { + currentStatus_ = Status::Corruption("Gap in sequence number. Could not " + "seek to required sequence number"); + reporter_.Info(currentStatus_.ToString().c_str()); + } else if (files_->size() != 1) { + currentStatus_ = Status::Corruption("Start sequence was not found, " + "skipping to the next available"); + reporter_.Info(currentStatus_.ToString().c_str()); + // Let NextImpl find the next available entry. started_ remains false + // because we don't want to check for gaps while moving to start sequence + NextImpl(true); + } +} + +void TransactionLogIteratorImpl::Next() { + return NextImpl(false); +} + +void TransactionLogIteratorImpl::NextImpl(bool internal) { + std::string scratch; + Slice record; + isValid_ = false; + if (!internal && !started_) { + // Runs every time until we can seek to the start sequence + return SeekToStartSequence(); + } + while(true) { + assert(currentLogReader_); + if (currentLogReader_->IsEOF()) { + currentLogReader_->UnmarkEOF(); + } + while (RestrictedRead(&record, &scratch)) { + if (record.size() < WriteBatchInternal::kHeader) { + reporter_.Corruption( + record.size(), Status::Corruption("very small log record")); + continue; + } else { + // started_ should be true if called by application + assert(internal || started_); + // started_ should be false if called internally + assert(!internal || !started_); + UpdateCurrentWriteBatch(record); + if (internal && !started_) { + started_ = true; + } + return; + } + } + + // Open the next file + if (currentFileIndex_ < files_->size() - 1) { + ++currentFileIndex_; + Status s = OpenLogReader(files_->at(currentFileIndex_).get()); + if (!s.ok()) { + isValid_ = false; + currentStatus_ = s; + return; + } + } else { + isValid_ = false; + if (currentLastSeq_ == versions_->LastSequence()) { + currentStatus_ = Status::OK(); + } else { + currentStatus_ = Status::Corruption("NO MORE DATA LEFT"); + } + return; + } + } +} + +bool TransactionLogIteratorImpl::IsBatchExpected( + const WriteBatch* batch, + const SequenceNumber expectedSeq) { + assert(batch); + SequenceNumber batchSeq = WriteBatchInternal::Sequence(batch); + if (batchSeq != expectedSeq) { + char buf[200]; + snprintf(buf, sizeof(buf), + "Discontinuity in log records. Got seq=%" PRIu64 + ", Expected seq=%" PRIu64 ", Last flushed seq=%" PRIu64 + ".Log iterator will reseek the correct batch.", + batchSeq, expectedSeq, versions_->LastSequence()); + reporter_.Info(buf); + return false; + } + return true; +} + +void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) { + std::unique_ptr<WriteBatch> batch(new WriteBatch()); + WriteBatchInternal::SetContents(batch.get(), record); + + SequenceNumber expectedSeq = currentLastSeq_ + 1; + // If the iterator has started, then confirm that we get continuous batches + if (started_ && !IsBatchExpected(batch.get(), expectedSeq)) { + // Seek to the batch having expected sequence number + if (expectedSeq < files_->at(currentFileIndex_)->StartSequence()) { + // Expected batch must lie in the previous log file + // Avoid underflow. + if (currentFileIndex_ != 0) { + currentFileIndex_--; + } + } + startingSequenceNumber_ = expectedSeq; + // currentStatus_ will be set to Ok if reseek succeeds + currentStatus_ = Status::NotFound("Gap in sequence numbers"); + return SeekToStartSequence(currentFileIndex_, true); + } + + currentBatchSeq_ = WriteBatchInternal::Sequence(batch.get()); + currentLastSeq_ = currentBatchSeq_ + + WriteBatchInternal::Count(batch.get()) - 1; + // currentBatchSeq_ can only change here + assert(currentLastSeq_ <= versions_->LastSequence()); + + currentBatch_ = std::move(batch); + isValid_ = true; + currentStatus_ = Status::OK(); +} + +Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* logFile) { + unique_ptr<SequentialFileReader> file; + Status s = OpenLogFile(logFile, &file); + if (!s.ok()) { + return s; + } + assert(file); + currentLogReader_.reset(new log::Reader( + options_->info_log, std::move(file), &reporter_, + read_options_.verify_checksums_, 0, logFile->LogNumber())); + return Status::OK(); +} +} // namespace rocksdb +#endif // ROCKSDB_LITE http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/transaction_log_impl.h ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/transaction_log_impl.h b/thirdparty/rocksdb/db/transaction_log_impl.h new file mode 100644 index 0000000..769d833 --- /dev/null +++ b/thirdparty/rocksdb/db/transaction_log_impl.h @@ -0,0 +1,124 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +#pragma once + +#ifndef ROCKSDB_LITE +#include <vector> + +#include "db/log_reader.h" +#include "db/version_set.h" +#include "options/db_options.h" +#include "port/port.h" +#include "rocksdb/env.h" +#include "rocksdb/options.h" +#include "rocksdb/transaction_log.h" +#include "rocksdb/types.h" +#include "util/filename.h" + +namespace rocksdb { + +class LogFileImpl : public LogFile { + public: + LogFileImpl(uint64_t logNum, WalFileType logType, SequenceNumber startSeq, + uint64_t sizeBytes) : + logNumber_(logNum), + type_(logType), + startSequence_(startSeq), + sizeFileBytes_(sizeBytes) { + } + + std::string PathName() const override { + if (type_ == kArchivedLogFile) { + return ArchivedLogFileName("", logNumber_); + } + return LogFileName("", logNumber_); + } + + uint64_t LogNumber() const override { return logNumber_; } + + WalFileType Type() const override { return type_; } + + SequenceNumber StartSequence() const override { return startSequence_; } + + uint64_t SizeFileBytes() const override { return sizeFileBytes_; } + + bool operator < (const LogFile& that) const { + return LogNumber() < that.LogNumber(); + } + + private: + uint64_t logNumber_; + WalFileType type_; + SequenceNumber startSequence_; + uint64_t sizeFileBytes_; + +}; + +class TransactionLogIteratorImpl : public TransactionLogIterator { + public: + TransactionLogIteratorImpl( + const std::string& dir, const ImmutableDBOptions* options, + const TransactionLogIterator::ReadOptions& read_options, + const EnvOptions& soptions, const SequenceNumber seqNum, + std::unique_ptr<VectorLogPtr> files, VersionSet const* const versions); + + virtual bool Valid() override; + + virtual void Next() override; + + virtual Status status() override; + + virtual BatchResult GetBatch() override; + + private: + const std::string& dir_; + const ImmutableDBOptions* options_; + const TransactionLogIterator::ReadOptions read_options_; + const EnvOptions& soptions_; + SequenceNumber startingSequenceNumber_; + std::unique_ptr<VectorLogPtr> files_; + bool started_; + bool isValid_; // not valid when it starts of. + Status currentStatus_; + size_t currentFileIndex_; + std::unique_ptr<WriteBatch> currentBatch_; + unique_ptr<log::Reader> currentLogReader_; + Status OpenLogFile(const LogFile* logFile, + unique_ptr<SequentialFileReader>* file); + + struct LogReporter : public log::Reader::Reporter { + Env* env; + Logger* info_log; + virtual void Corruption(size_t bytes, const Status& s) override { + ROCKS_LOG_ERROR(info_log, "dropping %" ROCKSDB_PRIszt " bytes; %s", bytes, + s.ToString().c_str()); + } + virtual void Info(const char* s) { ROCKS_LOG_INFO(info_log, "%s", s); } + } reporter_; + + SequenceNumber currentBatchSeq_; // sequence number at start of current batch + SequenceNumber currentLastSeq_; // last sequence in the current batch + // Used only to get latest seq. num + // TODO(icanadi) can this be just a callback? + VersionSet const* const versions_; + + // Reads from transaction log only if the writebatch record has been written + bool RestrictedRead(Slice* record, std::string* scratch); + // Seeks to startingSequenceNumber reading from startFileIndex in files_. + // If strict is set,then must get a batch starting with startingSequenceNumber + void SeekToStartSequence(uint64_t startFileIndex = 0, bool strict = false); + // Implementation of Next. SeekToStartSequence calls it internally with + // internal=true to let it find next entry even if it has to jump gaps because + // the iterator may start off from the first available entry but promises to + // be continuous after that + void NextImpl(bool internal = false); + // Check if batch is expected, else return false + bool IsBatchExpected(const WriteBatch* batch, SequenceNumber expectedSeq); + // Update current batch if a continuous batch is found, else return false + void UpdateCurrentWriteBatch(const Slice& record); + Status OpenLogReader(const LogFile* file); +}; +} // namespace rocksdb +#endif // ROCKSDB_LITE http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/version_builder.cc ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/version_builder.cc b/thirdparty/rocksdb/db/version_builder.cc new file mode 100644 index 0000000..bab8d11 --- /dev/null +++ b/thirdparty/rocksdb/db/version_builder.cc @@ -0,0 +1,412 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/version_builder.h" + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include <inttypes.h> +#include <algorithm> +#include <atomic> +#include <functional> +#include <set> +#include <thread> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "db/dbformat.h" +#include "db/internal_stats.h" +#include "db/table_cache.h" +#include "db/version_set.h" +#include "port/port.h" +#include "table/table_reader.h" + +namespace rocksdb { + +bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) { + if (a->largest_seqno != b->largest_seqno) { + return a->largest_seqno > b->largest_seqno; + } + if (a->smallest_seqno != b->smallest_seqno) { + return a->smallest_seqno > b->smallest_seqno; + } + // Break ties by file number + return a->fd.GetNumber() > b->fd.GetNumber(); +} + +namespace { +bool BySmallestKey(FileMetaData* a, FileMetaData* b, + const InternalKeyComparator* cmp) { + int r = cmp->Compare(a->smallest, b->smallest); + if (r != 0) { + return (r < 0); + } + // Break ties by file number + return (a->fd.GetNumber() < b->fd.GetNumber()); +} +} // namespace + +class VersionBuilder::Rep { + private: + // Helper to sort files_ in v + // kLevel0 -- NewestFirstBySeqNo + // kLevelNon0 -- BySmallestKey + struct FileComparator { + enum SortMethod { kLevel0 = 0, kLevelNon0 = 1, } sort_method; + const InternalKeyComparator* internal_comparator; + + bool operator()(FileMetaData* f1, FileMetaData* f2) const { + switch (sort_method) { + case kLevel0: + return NewestFirstBySeqNo(f1, f2); + case kLevelNon0: + return BySmallestKey(f1, f2, internal_comparator); + } + assert(false); + return false; + } + }; + + struct LevelState { + std::unordered_set<uint64_t> deleted_files; + // Map from file number to file meta data. + std::unordered_map<uint64_t, FileMetaData*> added_files; + }; + + const EnvOptions& env_options_; + Logger* info_log_; + TableCache* table_cache_; + VersionStorageInfo* base_vstorage_; + LevelState* levels_; + FileComparator level_zero_cmp_; + FileComparator level_nonzero_cmp_; + + public: + Rep(const EnvOptions& env_options, Logger* info_log, TableCache* table_cache, + VersionStorageInfo* base_vstorage) + : env_options_(env_options), + info_log_(info_log), + table_cache_(table_cache), + base_vstorage_(base_vstorage) { + levels_ = new LevelState[base_vstorage_->num_levels()]; + level_zero_cmp_.sort_method = FileComparator::kLevel0; + level_nonzero_cmp_.sort_method = FileComparator::kLevelNon0; + level_nonzero_cmp_.internal_comparator = + base_vstorage_->InternalComparator(); + } + + ~Rep() { + for (int level = 0; level < base_vstorage_->num_levels(); level++) { + const auto& added = levels_[level].added_files; + for (auto& pair : added) { + UnrefFile(pair.second); + } + } + + delete[] levels_; + } + + void UnrefFile(FileMetaData* f) { + f->refs--; + if (f->refs <= 0) { + if (f->table_reader_handle) { + assert(table_cache_ != nullptr); + table_cache_->ReleaseHandle(f->table_reader_handle); + f->table_reader_handle = nullptr; + } + delete f; + } + } + + void CheckConsistency(VersionStorageInfo* vstorage) { +#ifdef NDEBUG + if (!vstorage->force_consistency_checks()) { + // Dont run consistency checks in release mode except if + // explicitly asked to + return; + } +#endif + // make sure the files are sorted correctly + for (int level = 0; level < vstorage->num_levels(); level++) { + auto& level_files = vstorage->LevelFiles(level); + for (size_t i = 1; i < level_files.size(); i++) { + auto f1 = level_files[i - 1]; + auto f2 = level_files[i]; + if (level == 0) { + if (!level_zero_cmp_(f1, f2)) { + fprintf(stderr, "L0 files are not sorted properly"); + abort(); + } + + if (f2->smallest_seqno == f2->largest_seqno) { + // This is an external file that we ingested + SequenceNumber external_file_seqno = f2->smallest_seqno; + if (!(external_file_seqno < f1->largest_seqno || + external_file_seqno == 0)) { + fprintf(stderr, "L0 file with seqno %" PRIu64 " %" PRIu64 + " vs. file with global_seqno %" PRIu64 "\n", + f1->smallest_seqno, f1->largest_seqno, + external_file_seqno); + abort(); + } + } else if (f1->smallest_seqno <= f2->smallest_seqno) { + fprintf(stderr, "L0 files seqno %" PRIu64 " %" PRIu64 + " vs. %" PRIu64 " %" PRIu64 "\n", + f1->smallest_seqno, f1->largest_seqno, f2->smallest_seqno, + f2->largest_seqno); + abort(); + } + } else { + if (!level_nonzero_cmp_(f1, f2)) { + fprintf(stderr, "L%d files are not sorted properly", level); + abort(); + } + + // Make sure there is no overlap in levels > 0 + if (vstorage->InternalComparator()->Compare(f1->largest, + f2->smallest) >= 0) { + fprintf(stderr, "L%d have overlapping ranges %s vs. %s\n", level, + (f1->largest).DebugString(true).c_str(), + (f2->smallest).DebugString(true).c_str()); + abort(); + } + } + } + } + } + + void CheckConsistencyForDeletes(VersionEdit* edit, uint64_t number, + int level) { +#ifdef NDEBUG + if (!base_vstorage_->force_consistency_checks()) { + // Dont run consistency checks in release mode except if + // explicitly asked to + return; + } +#endif + // a file to be deleted better exist in the previous version + bool found = false; + for (int l = 0; !found && l < base_vstorage_->num_levels(); l++) { + const std::vector<FileMetaData*>& base_files = + base_vstorage_->LevelFiles(l); + for (size_t i = 0; i < base_files.size(); i++) { + FileMetaData* f = base_files[i]; + if (f->fd.GetNumber() == number) { + found = true; + break; + } + } + } + // if the file did not exist in the previous version, then it + // is possibly moved from lower level to higher level in current + // version + for (int l = level + 1; !found && l < base_vstorage_->num_levels(); l++) { + auto& level_added = levels_[l].added_files; + auto got = level_added.find(number); + if (got != level_added.end()) { + found = true; + break; + } + } + + // maybe this file was added in a previous edit that was Applied + if (!found) { + auto& level_added = levels_[level].added_files; + auto got = level_added.find(number); + if (got != level_added.end()) { + found = true; + } + } + if (!found) { + fprintf(stderr, "not found %" PRIu64 "\n", number); + abort(); + } + } + + // Apply all of the edits in *edit to the current state. + void Apply(VersionEdit* edit) { + CheckConsistency(base_vstorage_); + + // Delete files + const VersionEdit::DeletedFileSet& del = edit->GetDeletedFiles(); + for (const auto& del_file : del) { + const auto level = del_file.first; + const auto number = del_file.second; + levels_[level].deleted_files.insert(number); + CheckConsistencyForDeletes(edit, number, level); + + auto exising = levels_[level].added_files.find(number); + if (exising != levels_[level].added_files.end()) { + UnrefFile(exising->second); + levels_[level].added_files.erase(number); + } + } + + // Add new files + for (const auto& new_file : edit->GetNewFiles()) { + const int level = new_file.first; + FileMetaData* f = new FileMetaData(new_file.second); + f->refs = 1; + + assert(levels_[level].added_files.find(f->fd.GetNumber()) == + levels_[level].added_files.end()); + levels_[level].deleted_files.erase(f->fd.GetNumber()); + levels_[level].added_files[f->fd.GetNumber()] = f; + } + } + + // Save the current state in *v. + void SaveTo(VersionStorageInfo* vstorage) { + CheckConsistency(base_vstorage_); + CheckConsistency(vstorage); + + for (int level = 0; level < base_vstorage_->num_levels(); level++) { + const auto& cmp = (level == 0) ? level_zero_cmp_ : level_nonzero_cmp_; + // Merge the set of added files with the set of pre-existing files. + // Drop any deleted files. Store the result in *v. + const auto& base_files = base_vstorage_->LevelFiles(level); + auto base_iter = base_files.begin(); + auto base_end = base_files.end(); + const auto& unordered_added_files = levels_[level].added_files; + vstorage->Reserve(level, + base_files.size() + unordered_added_files.size()); + + // Sort added files for the level. + std::vector<FileMetaData*> added_files; + added_files.reserve(unordered_added_files.size()); + for (const auto& pair : unordered_added_files) { + added_files.push_back(pair.second); + } + std::sort(added_files.begin(), added_files.end(), cmp); + +#ifndef NDEBUG + FileMetaData* prev_file = nullptr; +#endif + + for (const auto& added : added_files) { +#ifndef NDEBUG + if (level > 0 && prev_file != nullptr) { + assert(base_vstorage_->InternalComparator()->Compare( + prev_file->smallest, added->smallest) <= 0); + } + prev_file = added; +#endif + + // Add all smaller files listed in base_ + for (auto bpos = std::upper_bound(base_iter, base_end, added, cmp); + base_iter != bpos; ++base_iter) { + MaybeAddFile(vstorage, level, *base_iter); + } + + MaybeAddFile(vstorage, level, added); + } + + // Add remaining base files + for (; base_iter != base_end; ++base_iter) { + MaybeAddFile(vstorage, level, *base_iter); + } + } + + CheckConsistency(vstorage); + } + + void LoadTableHandlers(InternalStats* internal_stats, int max_threads, + bool prefetch_index_and_filter_in_cache) { + assert(table_cache_ != nullptr); + // <file metadata, level> + std::vector<std::pair<FileMetaData*, int>> files_meta; + for (int level = 0; level < base_vstorage_->num_levels(); level++) { + for (auto& file_meta_pair : levels_[level].added_files) { + auto* file_meta = file_meta_pair.second; + assert(!file_meta->table_reader_handle); + files_meta.emplace_back(file_meta, level); + } + } + + std::atomic<size_t> next_file_meta_idx(0); + std::function<void()> load_handlers_func = [&]() { + while (true) { + size_t file_idx = next_file_meta_idx.fetch_add(1); + if (file_idx >= files_meta.size()) { + break; + } + + auto* file_meta = files_meta[file_idx].first; + int level = files_meta[file_idx].second; + table_cache_->FindTable(env_options_, + *(base_vstorage_->InternalComparator()), + file_meta->fd, &file_meta->table_reader_handle, + false /*no_io */, true /* record_read_stats */, + internal_stats->GetFileReadHist(level), false, + level, prefetch_index_and_filter_in_cache); + if (file_meta->table_reader_handle != nullptr) { + // Load table_reader + file_meta->fd.table_reader = table_cache_->GetTableReaderFromHandle( + file_meta->table_reader_handle); + } + } + }; + + if (max_threads <= 1) { + load_handlers_func(); + } else { + std::vector<port::Thread> threads; + for (int i = 0; i < max_threads; i++) { + threads.emplace_back(load_handlers_func); + } + + for (auto& t : threads) { + t.join(); + } + } + } + + void MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f) { + if (levels_[level].deleted_files.count(f->fd.GetNumber()) > 0) { + // f is to-be-delected table file + vstorage->RemoveCurrentStats(f); + } else { + vstorage->AddFile(level, f, info_log_); + } + } +}; + +VersionBuilder::VersionBuilder(const EnvOptions& env_options, + TableCache* table_cache, + VersionStorageInfo* base_vstorage, + Logger* info_log) + : rep_(new Rep(env_options, info_log, table_cache, base_vstorage)) {} +VersionBuilder::~VersionBuilder() { delete rep_; } +void VersionBuilder::CheckConsistency(VersionStorageInfo* vstorage) { + rep_->CheckConsistency(vstorage); +} +void VersionBuilder::CheckConsistencyForDeletes(VersionEdit* edit, + uint64_t number, int level) { + rep_->CheckConsistencyForDeletes(edit, number, level); +} +void VersionBuilder::Apply(VersionEdit* edit) { rep_->Apply(edit); } +void VersionBuilder::SaveTo(VersionStorageInfo* vstorage) { + rep_->SaveTo(vstorage); +} +void VersionBuilder::LoadTableHandlers( + InternalStats* internal_stats, int max_threads, + bool prefetch_index_and_filter_in_cache) { + rep_->LoadTableHandlers(internal_stats, max_threads, + prefetch_index_and_filter_in_cache); +} +void VersionBuilder::MaybeAddFile(VersionStorageInfo* vstorage, int level, + FileMetaData* f) { + rep_->MaybeAddFile(vstorage, level, f); +} + +} // namespace rocksdb http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/version_builder.h ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/version_builder.h b/thirdparty/rocksdb/db/version_builder.h new file mode 100644 index 0000000..235f79d --- /dev/null +++ b/thirdparty/rocksdb/db/version_builder.h @@ -0,0 +1,44 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +#pragma once +#include "rocksdb/env.h" + +namespace rocksdb { + +class TableCache; +class VersionStorageInfo; +class VersionEdit; +struct FileMetaData; +class InternalStats; + +// A helper class so we can efficiently apply a whole sequence +// of edits to a particular state without creating intermediate +// Versions that contain full copies of the intermediate state. +class VersionBuilder { + public: + VersionBuilder(const EnvOptions& env_options, TableCache* table_cache, + VersionStorageInfo* base_vstorage, Logger* info_log = nullptr); + ~VersionBuilder(); + void CheckConsistency(VersionStorageInfo* vstorage); + void CheckConsistencyForDeletes(VersionEdit* edit, uint64_t number, + int level); + void Apply(VersionEdit* edit); + void SaveTo(VersionStorageInfo* vstorage); + void LoadTableHandlers(InternalStats* internal_stats, int max_threads, + bool prefetch_index_and_filter_in_cache); + void MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f); + + private: + class Rep; + Rep* rep_; +}; + +extern bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b); +} // namespace rocksdb http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/version_edit.cc ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/version_edit.cc b/thirdparty/rocksdb/db/version_edit.cc new file mode 100644 index 0000000..b01f7bb --- /dev/null +++ b/thirdparty/rocksdb/db/version_edit.cc @@ -0,0 +1,591 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/version_edit.h" + +#include "db/version_set.h" +#include "rocksdb/slice.h" +#include "util/coding.h" +#include "util/event_logger.h" +#include "util/string_util.h" +#include "util/sync_point.h" + +namespace rocksdb { + +// Tag numbers for serialized VersionEdit. These numbers are written to +// disk and should not be changed. +enum Tag { + kComparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, + kDeletedFile = 6, + kNewFile = 7, + // 8 was used for large value refs + kPrevLogNumber = 9, + + // these are new formats divergent from open source leveldb + kNewFile2 = 100, + kNewFile3 = 102, + kNewFile4 = 103, // 4th (the latest) format version of adding files + kColumnFamily = 200, // specify column family for version edit + kColumnFamilyAdd = 201, + kColumnFamilyDrop = 202, + kMaxColumnFamily = 203, +}; + +enum CustomTag { + kTerminate = 1, // The end of customized fields + kNeedCompaction = 2, + kPathId = 65, +}; +// If this bit for the custom tag is set, opening DB should fail if +// we don't know this field. +uint32_t kCustomTagNonSafeIgnoreMask = 1 << 6; + +uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id) { + assert(number <= kFileNumberMask); + return number | (path_id * (kFileNumberMask + 1)); +} + +void VersionEdit::Clear() { + comparator_.clear(); + max_level_ = 0; + log_number_ = 0; + prev_log_number_ = 0; + last_sequence_ = 0; + next_file_number_ = 0; + max_column_family_ = 0; + has_comparator_ = false; + has_log_number_ = false; + has_prev_log_number_ = false; + has_next_file_number_ = false; + has_last_sequence_ = false; + has_max_column_family_ = false; + deleted_files_.clear(); + new_files_.clear(); + column_family_ = 0; + is_column_family_add_ = 0; + is_column_family_drop_ = 0; + column_family_name_.clear(); +} + +bool VersionEdit::EncodeTo(std::string* dst) const { + if (has_comparator_) { + PutVarint32(dst, kComparator); + PutLengthPrefixedSlice(dst, comparator_); + } + if (has_log_number_) { + PutVarint32Varint64(dst, kLogNumber, log_number_); + } + if (has_prev_log_number_) { + PutVarint32Varint64(dst, kPrevLogNumber, prev_log_number_); + } + if (has_next_file_number_) { + PutVarint32Varint64(dst, kNextFileNumber, next_file_number_); + } + if (has_last_sequence_) { + PutVarint32Varint64(dst, kLastSequence, last_sequence_); + } + if (has_max_column_family_) { + PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_); + } + + for (const auto& deleted : deleted_files_) { + PutVarint32Varint32Varint64(dst, kDeletedFile, deleted.first /* level */, + deleted.second /* file number */); + } + + for (size_t i = 0; i < new_files_.size(); i++) { + const FileMetaData& f = new_files_[i].second; + if (!f.smallest.Valid() || !f.largest.Valid()) { + return false; + } + bool has_customized_fields = false; + if (f.marked_for_compaction) { + PutVarint32(dst, kNewFile4); + has_customized_fields = true; + } else if (f.fd.GetPathId() == 0) { + // Use older format to make sure user can roll back the build if they + // don't config multiple DB paths. + PutVarint32(dst, kNewFile2); + } else { + PutVarint32(dst, kNewFile3); + } + PutVarint32Varint64(dst, new_files_[i].first /* level */, f.fd.GetNumber()); + if (f.fd.GetPathId() != 0 && !has_customized_fields) { + // kNewFile3 + PutVarint32(dst, f.fd.GetPathId()); + } + PutVarint64(dst, f.fd.GetFileSize()); + PutLengthPrefixedSlice(dst, f.smallest.Encode()); + PutLengthPrefixedSlice(dst, f.largest.Encode()); + PutVarint64Varint64(dst, f.smallest_seqno, f.largest_seqno); + if (has_customized_fields) { + // Customized fields' format: + // +-----------------------------+ + // | 1st field's tag (varint32) | + // +-----------------------------+ + // | 1st field's size (varint32) | + // +-----------------------------+ + // | bytes for 1st field | + // | (based on size decoded) | + // +-----------------------------+ + // | | + // | ...... | + // | | + // +-----------------------------+ + // | last field's size (varint32)| + // +-----------------------------+ + // | bytes for last field | + // | (based on size decoded) | + // +-----------------------------+ + // | terminating tag (varint32) | + // +-----------------------------+ + // + // Customized encoding for fields: + // tag kPathId: 1 byte as path_id + // tag kNeedCompaction: + // now only can take one char value 1 indicating need-compaction + // + if (f.fd.GetPathId() != 0) { + PutVarint32(dst, CustomTag::kPathId); + char p = static_cast<char>(f.fd.GetPathId()); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } + if (f.marked_for_compaction) { + PutVarint32(dst, CustomTag::kNeedCompaction); + char p = static_cast<char>(1); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } + TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", + dst); + + PutVarint32(dst, CustomTag::kTerminate); + } + } + + // 0 is default and does not need to be explicitly written + if (column_family_ != 0) { + PutVarint32Varint32(dst, kColumnFamily, column_family_); + } + + if (is_column_family_add_) { + PutVarint32(dst, kColumnFamilyAdd); + PutLengthPrefixedSlice(dst, Slice(column_family_name_)); + } + + if (is_column_family_drop_) { + PutVarint32(dst, kColumnFamilyDrop); + } + return true; +} + +static bool GetInternalKey(Slice* input, InternalKey* dst) { + Slice str; + if (GetLengthPrefixedSlice(input, &str)) { + dst->DecodeFrom(str); + return dst->Valid(); + } else { + return false; + } +} + +bool VersionEdit::GetLevel(Slice* input, int* level, const char** msg) { + uint32_t v; + if (GetVarint32(input, &v)) { + *level = v; + if (max_level_ < *level) { + max_level_ = *level; + } + return true; + } else { + return false; + } +} + +const char* VersionEdit::DecodeNewFile4From(Slice* input) { + const char* msg = nullptr; + int level; + FileMetaData f; + uint64_t number; + uint32_t path_id = 0; + uint64_t file_size; + if (GetLevel(input, &level, &msg) && GetVarint64(input, &number) && + GetVarint64(input, &file_size) && GetInternalKey(input, &f.smallest) && + GetInternalKey(input, &f.largest) && + GetVarint64(input, &f.smallest_seqno) && + GetVarint64(input, &f.largest_seqno)) { + // See comments in VersionEdit::EncodeTo() for format of customized fields + while (true) { + uint32_t custom_tag; + Slice field; + if (!GetVarint32(input, &custom_tag)) { + return "new-file4 custom field"; + } + if (custom_tag == kTerminate) { + break; + } + if (!GetLengthPrefixedSlice(input, &field)) { + return "new-file4 custom field lenth prefixed slice error"; + } + switch (custom_tag) { + case kPathId: + if (field.size() != 1) { + return "path_id field wrong size"; + } + path_id = field[0]; + if (path_id > 3) { + return "path_id wrong vaue"; + } + break; + case kNeedCompaction: + if (field.size() != 1) { + return "need_compaction field wrong size"; + } + f.marked_for_compaction = (field[0] == 1); + break; + default: + if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { + // Should not proceed if cannot understand it + return "new-file4 custom field not supported"; + } + break; + } + } + } else { + return "new-file4 entry"; + } + f.fd = FileDescriptor(number, path_id, file_size); + new_files_.push_back(std::make_pair(level, f)); + return nullptr; +} + +Status VersionEdit::DecodeFrom(const Slice& src) { + Clear(); + Slice input = src; + const char* msg = nullptr; + uint32_t tag; + + // Temporary storage for parsing + int level; + FileMetaData f; + Slice str; + InternalKey key; + + while (msg == nullptr && GetVarint32(&input, &tag)) { + switch (tag) { + case kComparator: + if (GetLengthPrefixedSlice(&input, &str)) { + comparator_ = str.ToString(); + has_comparator_ = true; + } else { + msg = "comparator name"; + } + break; + + case kLogNumber: + if (GetVarint64(&input, &log_number_)) { + has_log_number_ = true; + } else { + msg = "log number"; + } + break; + + case kPrevLogNumber: + if (GetVarint64(&input, &prev_log_number_)) { + has_prev_log_number_ = true; + } else { + msg = "previous log number"; + } + break; + + case kNextFileNumber: + if (GetVarint64(&input, &next_file_number_)) { + has_next_file_number_ = true; + } else { + msg = "next file number"; + } + break; + + case kLastSequence: + if (GetVarint64(&input, &last_sequence_)) { + has_last_sequence_ = true; + } else { + msg = "last sequence number"; + } + break; + + case kMaxColumnFamily: + if (GetVarint32(&input, &max_column_family_)) { + has_max_column_family_ = true; + } else { + msg = "max column family"; + } + break; + + case kCompactPointer: + if (GetLevel(&input, &level, &msg) && + GetInternalKey(&input, &key)) { + // we don't use compact pointers anymore, + // but we should not fail if they are still + // in manifest + } else { + if (!msg) { + msg = "compaction pointer"; + } + } + break; + + case kDeletedFile: { + uint64_t number; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number)) { + deleted_files_.insert(std::make_pair(level, number)); + } else { + if (!msg) { + msg = "deleted file"; + } + } + break; + } + + case kNewFile: { + uint64_t number; + uint64_t file_size; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && + GetVarint64(&input, &file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest)) { + f.fd = FileDescriptor(number, 0, file_size); + new_files_.push_back(std::make_pair(level, f)); + } else { + if (!msg) { + msg = "new-file entry"; + } + } + break; + } + case kNewFile2: { + uint64_t number; + uint64_t file_size; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && + GetVarint64(&input, &file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest) && + GetVarint64(&input, &f.smallest_seqno) && + GetVarint64(&input, &f.largest_seqno)) { + f.fd = FileDescriptor(number, 0, file_size); + new_files_.push_back(std::make_pair(level, f)); + } else { + if (!msg) { + msg = "new-file2 entry"; + } + } + break; + } + + case kNewFile3: { + uint64_t number; + uint32_t path_id; + uint64_t file_size; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && + GetVarint32(&input, &path_id) && GetVarint64(&input, &file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest) && + GetVarint64(&input, &f.smallest_seqno) && + GetVarint64(&input, &f.largest_seqno)) { + f.fd = FileDescriptor(number, path_id, file_size); + new_files_.push_back(std::make_pair(level, f)); + } else { + if (!msg) { + msg = "new-file3 entry"; + } + } + break; + } + + case kNewFile4: { + msg = DecodeNewFile4From(&input); + break; + } + + case kColumnFamily: + if (!GetVarint32(&input, &column_family_)) { + if (!msg) { + msg = "set column family id"; + } + } + break; + + case kColumnFamilyAdd: + if (GetLengthPrefixedSlice(&input, &str)) { + is_column_family_add_ = true; + column_family_name_ = str.ToString(); + } else { + if (!msg) { + msg = "column family add"; + } + } + break; + + case kColumnFamilyDrop: + is_column_family_drop_ = true; + break; + + default: + msg = "unknown tag"; + break; + } + } + + if (msg == nullptr && !input.empty()) { + msg = "invalid tag"; + } + + Status result; + if (msg != nullptr) { + result = Status::Corruption("VersionEdit", msg); + } + return result; +} + +std::string VersionEdit::DebugString(bool hex_key) const { + std::string r; + r.append("VersionEdit {"); + if (has_comparator_) { + r.append("\n Comparator: "); + r.append(comparator_); + } + if (has_log_number_) { + r.append("\n LogNumber: "); + AppendNumberTo(&r, log_number_); + } + if (has_prev_log_number_) { + r.append("\n PrevLogNumber: "); + AppendNumberTo(&r, prev_log_number_); + } + if (has_next_file_number_) { + r.append("\n NextFileNumber: "); + AppendNumberTo(&r, next_file_number_); + } + if (has_last_sequence_) { + r.append("\n LastSeq: "); + AppendNumberTo(&r, last_sequence_); + } + for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); + iter != deleted_files_.end(); + ++iter) { + r.append("\n DeleteFile: "); + AppendNumberTo(&r, iter->first); + r.append(" "); + AppendNumberTo(&r, iter->second); + } + for (size_t i = 0; i < new_files_.size(); i++) { + const FileMetaData& f = new_files_[i].second; + r.append("\n AddFile: "); + AppendNumberTo(&r, new_files_[i].first); + r.append(" "); + AppendNumberTo(&r, f.fd.GetNumber()); + r.append(" "); + AppendNumberTo(&r, f.fd.GetFileSize()); + r.append(" "); + r.append(f.smallest.DebugString(hex_key)); + r.append(" .. "); + r.append(f.largest.DebugString(hex_key)); + } + r.append("\n ColumnFamily: "); + AppendNumberTo(&r, column_family_); + if (is_column_family_add_) { + r.append("\n ColumnFamilyAdd: "); + r.append(column_family_name_); + } + if (is_column_family_drop_) { + r.append("\n ColumnFamilyDrop"); + } + if (has_max_column_family_) { + r.append("\n MaxColumnFamily: "); + AppendNumberTo(&r, max_column_family_); + } + r.append("\n}\n"); + return r; +} + +std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { + JSONWriter jw; + jw << "EditNumber" << edit_num; + + if (has_comparator_) { + jw << "Comparator" << comparator_; + } + if (has_log_number_) { + jw << "LogNumber" << log_number_; + } + if (has_prev_log_number_) { + jw << "PrevLogNumber" << prev_log_number_; + } + if (has_next_file_number_) { + jw << "NextFileNumber" << next_file_number_; + } + if (has_last_sequence_) { + jw << "LastSeq" << last_sequence_; + } + + if (!deleted_files_.empty()) { + jw << "DeletedFiles"; + jw.StartArray(); + + for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); + iter != deleted_files_.end(); + ++iter) { + jw.StartArrayedObject(); + jw << "Level" << iter->first; + jw << "FileNumber" << iter->second; + jw.EndArrayedObject(); + } + + jw.EndArray(); + } + + if (!new_files_.empty()) { + jw << "AddedFiles"; + jw.StartArray(); + + for (size_t i = 0; i < new_files_.size(); i++) { + jw.StartArrayedObject(); + jw << "Level" << new_files_[i].first; + const FileMetaData& f = new_files_[i].second; + jw << "FileNumber" << f.fd.GetNumber(); + jw << "FileSize" << f.fd.GetFileSize(); + jw << "SmallestIKey" << f.smallest.DebugString(hex_key); + jw << "LargestIKey" << f.largest.DebugString(hex_key); + jw.EndArrayedObject(); + } + + jw.EndArray(); + } + + jw << "ColumnFamily" << column_family_; + + if (is_column_family_add_) { + jw << "ColumnFamilyAdd" << column_family_name_; + } + if (is_column_family_drop_) { + jw << "ColumnFamilyDrop" << column_family_name_; + } + if (has_max_column_family_) { + jw << "MaxColumnFamily" << max_column_family_; + } + + jw.EndObject(); + + return jw.Get(); +} + +} // namespace rocksdb http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/db/version_edit.h ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/db/version_edit.h b/thirdparty/rocksdb/db/version_edit.h new file mode 100644 index 0000000..47ebf5b --- /dev/null +++ b/thirdparty/rocksdb/db/version_edit.h @@ -0,0 +1,309 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once +#include <algorithm> +#include <set> +#include <utility> +#include <vector> +#include <string> +#include "rocksdb/cache.h" +#include "db/dbformat.h" +#include "util/arena.h" +#include "util/autovector.h" + +namespace rocksdb { + +class VersionSet; + +const uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; + +extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id); + +// A copyable structure contains information needed to read data from an SST +// file. It can contains a pointer to a table reader opened for the file, or +// file number and size, which can be used to create a new table reader for it. +// The behavior is undefined when a copied of the structure is used when the +// file is not in any live version any more. +struct FileDescriptor { + // Table reader in table_reader_handle + TableReader* table_reader; + uint64_t packed_number_and_path_id; + uint64_t file_size; // File size in bytes + + FileDescriptor() : FileDescriptor(0, 0, 0) {} + + FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size) + : table_reader(nullptr), + packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)), + file_size(_file_size) {} + + FileDescriptor& operator=(const FileDescriptor& fd) { + table_reader = fd.table_reader; + packed_number_and_path_id = fd.packed_number_and_path_id; + file_size = fd.file_size; + return *this; + } + + uint64_t GetNumber() const { + return packed_number_and_path_id & kFileNumberMask; + } + uint32_t GetPathId() const { + return static_cast<uint32_t>( + packed_number_and_path_id / (kFileNumberMask + 1)); + } + uint64_t GetFileSize() const { return file_size; } +}; + +struct FileSampledStats { + FileSampledStats() : num_reads_sampled(0) {} + FileSampledStats(const FileSampledStats& other) { *this = other; } + FileSampledStats& operator=(const FileSampledStats& other) { + num_reads_sampled = other.num_reads_sampled.load(); + return *this; + } + + // number of user reads to this file. + mutable std::atomic<uint64_t> num_reads_sampled; +}; + +struct FileMetaData { + FileDescriptor fd; + InternalKey smallest; // Smallest internal key served by table + InternalKey largest; // Largest internal key served by table + SequenceNumber smallest_seqno; // The smallest seqno in this file + SequenceNumber largest_seqno; // The largest seqno in this file + + // Needs to be disposed when refs becomes 0. + Cache::Handle* table_reader_handle; + + FileSampledStats stats; + + // Stats for compensating deletion entries during compaction + + // File size compensated by deletion entry. + // This is updated in Version::UpdateAccumulatedStats() first time when the + // file is created or loaded. After it is updated (!= 0), it is immutable. + uint64_t compensated_file_size; + // These values can mutate, but they can only be read or written from + // single-threaded LogAndApply thread + uint64_t num_entries; // the number of entries. + uint64_t num_deletions; // the number of deletion entries. + uint64_t raw_key_size; // total uncompressed key size. + uint64_t raw_value_size; // total uncompressed value size. + + int refs; // Reference count + + bool being_compacted; // Is this file undergoing compaction? + bool init_stats_from_file; // true if the data-entry stats of this file + // has initialized from file. + + bool marked_for_compaction; // True if client asked us nicely to compact this + // file. + + FileMetaData() + : smallest_seqno(kMaxSequenceNumber), + largest_seqno(0), + table_reader_handle(nullptr), + compensated_file_size(0), + num_entries(0), + num_deletions(0), + raw_key_size(0), + raw_value_size(0), + refs(0), + being_compacted(false), + init_stats_from_file(false), + marked_for_compaction(false) {} + + // REQUIRED: Keys must be given to the function in sorted order (it expects + // the last key to be the largest). + void UpdateBoundaries(const Slice& key, SequenceNumber seqno) { + if (smallest.size() == 0) { + smallest.DecodeFrom(key); + } + largest.DecodeFrom(key); + smallest_seqno = std::min(smallest_seqno, seqno); + largest_seqno = std::max(largest_seqno, seqno); + } +}; + +// A compressed copy of file meta data that just contain minimum data needed +// to server read operations, while still keeping the pointer to full metadata +// of the file in case it is needed. +struct FdWithKeyRange { + FileDescriptor fd; + FileMetaData* file_metadata; // Point to all metadata + Slice smallest_key; // slice that contain smallest key + Slice largest_key; // slice that contain largest key + + FdWithKeyRange() + : fd(), + smallest_key(), + largest_key() { + } + + FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key, + FileMetaData* _file_metadata) + : fd(_fd), + file_metadata(_file_metadata), + smallest_key(_smallest_key), + largest_key(_largest_key) {} +}; + +// Data structure to store an array of FdWithKeyRange in one level +// Actual data is guaranteed to be stored closely +struct LevelFilesBrief { + size_t num_files; + FdWithKeyRange* files; + LevelFilesBrief() { + num_files = 0; + files = nullptr; + } +}; + +class VersionEdit { + public: + VersionEdit() { Clear(); } + ~VersionEdit() { } + + void Clear(); + + void SetComparatorName(const Slice& name) { + has_comparator_ = true; + comparator_ = name.ToString(); + } + void SetLogNumber(uint64_t num) { + has_log_number_ = true; + log_number_ = num; + } + void SetPrevLogNumber(uint64_t num) { + has_prev_log_number_ = true; + prev_log_number_ = num; + } + void SetNextFile(uint64_t num) { + has_next_file_number_ = true; + next_file_number_ = num; + } + void SetLastSequence(SequenceNumber seq) { + has_last_sequence_ = true; + last_sequence_ = seq; + } + void SetMaxColumnFamily(uint32_t max_column_family) { + has_max_column_family_ = true; + max_column_family_ = max_column_family; + } + + // Add the specified file at the specified number. + // REQUIRES: This version has not been saved (see VersionSet::SaveTo) + // REQUIRES: "smallest" and "largest" are smallest and largest keys in file + void AddFile(int level, uint64_t file, uint32_t file_path_id, + uint64_t file_size, const InternalKey& smallest, + const InternalKey& largest, const SequenceNumber& smallest_seqno, + const SequenceNumber& largest_seqno, + bool marked_for_compaction) { + assert(smallest_seqno <= largest_seqno); + FileMetaData f; + f.fd = FileDescriptor(file, file_path_id, file_size); + f.smallest = smallest; + f.largest = largest; + f.smallest_seqno = smallest_seqno; + f.largest_seqno = largest_seqno; + f.marked_for_compaction = marked_for_compaction; + new_files_.emplace_back(level, std::move(f)); + } + + void AddFile(int level, const FileMetaData& f) { + assert(f.smallest_seqno <= f.largest_seqno); + new_files_.emplace_back(level, f); + } + + // Delete the specified "file" from the specified "level". + void DeleteFile(int level, uint64_t file) { + deleted_files_.insert({level, file}); + } + + // Number of edits + size_t NumEntries() { return new_files_.size() + deleted_files_.size(); } + + bool IsColumnFamilyManipulation() { + return is_column_family_add_ || is_column_family_drop_; + } + + void SetColumnFamily(uint32_t column_family_id) { + column_family_ = column_family_id; + } + + // set column family ID by calling SetColumnFamily() + void AddColumnFamily(const std::string& name) { + assert(!is_column_family_drop_); + assert(!is_column_family_add_); + assert(NumEntries() == 0); + is_column_family_add_ = true; + column_family_name_ = name; + } + + // set column family ID by calling SetColumnFamily() + void DropColumnFamily() { + assert(!is_column_family_drop_); + assert(!is_column_family_add_); + assert(NumEntries() == 0); + is_column_family_drop_ = true; + } + + // return true on success. + bool EncodeTo(std::string* dst) const; + Status DecodeFrom(const Slice& src); + + const char* DecodeNewFile4From(Slice* input); + + typedef std::set<std::pair<int, uint64_t>> DeletedFileSet; + + const DeletedFileSet& GetDeletedFiles() { return deleted_files_; } + const std::vector<std::pair<int, FileMetaData>>& GetNewFiles() { + return new_files_; + } + + std::string DebugString(bool hex_key = false) const; + std::string DebugJSON(int edit_num, bool hex_key = false) const; + + private: + friend class VersionSet; + friend class Version; + + bool GetLevel(Slice* input, int* level, const char** msg); + + int max_level_; + std::string comparator_; + uint64_t log_number_; + uint64_t prev_log_number_; + uint64_t next_file_number_; + uint32_t max_column_family_; + SequenceNumber last_sequence_; + bool has_comparator_; + bool has_log_number_; + bool has_prev_log_number_; + bool has_next_file_number_; + bool has_last_sequence_; + bool has_max_column_family_; + + DeletedFileSet deleted_files_; + std::vector<std::pair<int, FileMetaData>> new_files_; + + // Each version edit record should have column_family_id set + // If it's not set, it is default (0) + uint32_t column_family_; + // a version edit can be either column_family add or + // column_family drop. If it's column family add, + // it also includes column family name. + bool is_column_family_drop_; + bool is_column_family_add_; + std::string column_family_name_; +}; + +} // namespace rocksdb
