This is an automated email from the ASF dual-hosted git repository. adebreceni pushed a commit to branch minifi-api-reduced in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit bf1134e5a1e762211fe99ec33b425abfcce32cfb Author: Adam Debreceni <[email protected]> AuthorDate: Tue Jun 24 11:45:55 2025 +0200 Review changes, upgrade concurrentqueue --- core-framework/include/core/ProcessorImpl.h | 6 +- core-framework/src/core/ProcessorImpl.cpp | 4 +- core-framework/src/io/OutputStream.cpp | 12 +- extensions/aws/processors/AwsProcessor.h | 3 +- extensions/aws/processors/DeleteS3Object.h | 2 +- extensions/aws/processors/FetchS3Object.h | 2 +- extensions/aws/processors/ListS3.h | 2 +- extensions/aws/processors/PutKinesisStream.h | 2 +- extensions/aws/processors/PutS3Object.h | 2 +- extensions/aws/processors/S3Processor.cpp | 2 +- extensions/aws/processors/S3Processor.h | 1 + extensions/aws/tests/PutKinesisStreamTests.cpp | 2 +- extensions/python/ExecutePythonProcessor.cpp | 1 - thirdparty/concurrentqueue/concurrentqueue.h | 1370 +++++++++++++----------- 14 files changed, 764 insertions(+), 647 deletions(-) diff --git a/core-framework/include/core/ProcessorImpl.h b/core-framework/include/core/ProcessorImpl.h index 6d51b70ff..6b14e5e3d 100644 --- a/core-framework/include/core/ProcessorImpl.h +++ b/core-framework/include/core/ProcessorImpl.h @@ -77,8 +77,10 @@ class ProcessorImpl : public virtual ProcessorApi { public: explicit ProcessorImpl(ProcessorMetadata info); - ProcessorImpl(const ProcessorImpl& parent) = delete; - ProcessorImpl& operator=(const ProcessorImpl& parent) = delete; + ProcessorImpl(const ProcessorImpl&) = delete; + ProcessorImpl(ProcessorImpl&&) = delete; + ProcessorImpl& operator=(const ProcessorImpl&) = delete; + ProcessorImpl& operator=(ProcessorImpl&&) = delete; ~ProcessorImpl() override; diff --git a/core-framework/src/core/ProcessorImpl.cpp b/core-framework/src/core/ProcessorImpl.cpp index 460ba47ff..68f5885bd 100644 --- a/core-framework/src/core/ProcessorImpl.cpp +++ b/core-framework/src/core/ProcessorImpl.cpp @@ -45,10 +45,10 @@ using namespace std::literals::chrono_literals; namespace org::apache::nifi::minifi::core { ProcessorImpl::ProcessorImpl(ProcessorMetadata info) - : info_(info), + : info_(std::move(info)), trigger_when_empty_(false), metrics_(std::make_shared<ProcessorMetricsImpl>(*this)), - logger_(info.logger) { + logger_(info_.logger) { logger_->log_debug("Processor {} created with uuid {}", getName(), getUUIDStr()); } diff --git a/core-framework/src/io/OutputStream.cpp b/core-framework/src/io/OutputStream.cpp index c19fff602..1b48912c8 100644 --- a/core-framework/src/io/OutputStream.cpp +++ b/core-framework/src/io/OutputStream.cpp @@ -24,11 +24,7 @@ #include "io/OutputStream.h" #include "utils/gsl.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace io { +namespace org::apache::nifi::minifi::io { size_t OutputStream::write(const std::vector<uint8_t>& buffer, size_t len) { if (buffer.size() < len) { @@ -77,8 +73,4 @@ size_t OutputStream::write_str(const char* str, uint32_t len, bool widen) { return ret + write(reinterpret_cast<const uint8_t *>(str), len); } -} /* namespace io */ -} /* namespace minifi */ -} /* namespace nifi */ -} /* namespace apache */ -} /* namespace org */ +} // namespace org::apache::nifi::minifi::io diff --git a/extensions/aws/processors/AwsProcessor.h b/extensions/aws/processors/AwsProcessor.h index 542d0c75b..f8ed99608 100644 --- a/extensions/aws/processors/AwsProcessor.h +++ b/extensions/aws/processors/AwsProcessor.h @@ -94,7 +94,7 @@ struct CommonProperties { std::string endpoint_override_url; }; -class AwsProcessor : public core::ProcessorImpl { +class AwsProcessor : public core::ProcessorImpl { // NOLINT(cppcoreguidelines-special-member-functions) public: EXTENSIONAPI static constexpr auto AccessKey = core::PropertyDefinitionBuilder<>::createProperty("Access Key") .withDescription("AWS account access key") @@ -170,6 +170,7 @@ class AwsProcessor : public core::ProcessorImpl { }); using ProcessorImpl::ProcessorImpl; + ~AwsProcessor() override = default; void onSchedule(core::ProcessContext& context, core::ProcessSessionFactory& session_factory) override; diff --git a/extensions/aws/processors/DeleteS3Object.h b/extensions/aws/processors/DeleteS3Object.h index 6a2047755..f723c58fd 100644 --- a/extensions/aws/processors/DeleteS3Object.h +++ b/extensions/aws/processors/DeleteS3Object.h @@ -35,7 +35,7 @@ class FlowProcessorS3TestsFixture; namespace org::apache::nifi::minifi::aws::processors { -class DeleteS3Object : public S3Processor { +class DeleteS3Object : public S3Processor { // NOLINT(cppcoreguidelines-special-member-functions) public: EXTENSIONAPI static constexpr const char* Description = "Deletes FlowFiles on an Amazon S3 Bucket. If attempting to delete a file that does not exist, FlowFile is routed to success."; diff --git a/extensions/aws/processors/FetchS3Object.h b/extensions/aws/processors/FetchS3Object.h index fa5fd182f..19d9529bf 100644 --- a/extensions/aws/processors/FetchS3Object.h +++ b/extensions/aws/processors/FetchS3Object.h @@ -37,7 +37,7 @@ class FlowProcessorS3TestsFixture; namespace org::apache::nifi::minifi::aws::processors { -class FetchS3Object : public S3Processor { +class FetchS3Object : public S3Processor { // NOLINT(cppcoreguidelines-special-member-functions) public: EXTENSIONAPI static constexpr const char* Description = "This Processor retrieves the contents of an S3 Object and writes it to the content of a FlowFile."; diff --git a/extensions/aws/processors/ListS3.h b/extensions/aws/processors/ListS3.h index b3e457952..4c9d64fb1 100644 --- a/extensions/aws/processors/ListS3.h +++ b/extensions/aws/processors/ListS3.h @@ -32,7 +32,7 @@ namespace org::apache::nifi::minifi::aws::processors { -class ListS3 : public S3Processor { +class ListS3 : public S3Processor { // NOLINT(cppcoreguidelines-special-member-functions) public: EXTENSIONAPI static constexpr const char* Description = "This Processor retrieves a listing of objects from an Amazon S3 bucket."; diff --git a/extensions/aws/processors/PutKinesisStream.h b/extensions/aws/processors/PutKinesisStream.h index 73d308dcf..458d52907 100644 --- a/extensions/aws/processors/PutKinesisStream.h +++ b/extensions/aws/processors/PutKinesisStream.h @@ -33,7 +33,7 @@ namespace org::apache::nifi::minifi::aws::processors { -class PutKinesisStream : public AwsProcessor { +class PutKinesisStream : public AwsProcessor { // NOLINT(cppcoreguidelines-special-member-functions) public: EXTENSIONAPI static constexpr const char* Description = "Sends the contents to a specified Amazon Kinesis. In order to send data to Kinesis, the stream name has to be specified."; diff --git a/extensions/aws/processors/PutS3Object.h b/extensions/aws/processors/PutS3Object.h index 0a5cbfe4e..03647b5da 100644 --- a/extensions/aws/processors/PutS3Object.h +++ b/extensions/aws/processors/PutS3Object.h @@ -43,7 +43,7 @@ class FlowProcessorS3TestsFixture; namespace org::apache::nifi::minifi::aws::processors { -class PutS3Object : public S3Processor { +class PutS3Object : public S3Processor { // NOLINT(cppcoreguidelines-special-member-functions) public: static constexpr auto CANNED_ACLS = minifi::utils::getKeys(minifi::aws::s3::CANNED_ACL_MAP); static constexpr auto STORAGE_CLASSES = minifi::utils::getKeys(minifi::aws::s3::STORAGE_CLASS_MAP); diff --git a/extensions/aws/processors/S3Processor.cpp b/extensions/aws/processors/S3Processor.cpp index 6bb6402f4..532bb6012 100644 --- a/extensions/aws/processors/S3Processor.cpp +++ b/extensions/aws/processors/S3Processor.cpp @@ -33,7 +33,7 @@ namespace org::apache::nifi::minifi::aws::processors { S3Processor::S3Processor(core::ProcessorMetadata info, std::unique_ptr<aws::s3::S3RequestSender> s3_request_sender) - : AwsProcessor(info), + : AwsProcessor(std::move(info)), s3_wrapper_(std::move(s3_request_sender)) { } diff --git a/extensions/aws/processors/S3Processor.h b/extensions/aws/processors/S3Processor.h index cdcee4c4c..a5f966b5d 100644 --- a/extensions/aws/processors/S3Processor.h +++ b/extensions/aws/processors/S3Processor.h @@ -51,6 +51,7 @@ class S3Processor : public AwsProcessor { EXTENSIONAPI static constexpr auto Properties = minifi::utils::array_cat(AwsProcessor::Properties, std::to_array<core::PropertyReference>({Bucket})); using AwsProcessor::AwsProcessor; + ~S3Processor() override = default; void onSchedule(core::ProcessContext& context, core::ProcessSessionFactory& session_factory) override; diff --git a/extensions/aws/tests/PutKinesisStreamTests.cpp b/extensions/aws/tests/PutKinesisStreamTests.cpp index b6532b5b3..3a3d4c105 100644 --- a/extensions/aws/tests/PutKinesisStreamTests.cpp +++ b/extensions/aws/tests/PutKinesisStreamTests.cpp @@ -83,7 +83,7 @@ class MockKinesisClient final : public Aws::Kinesis::KinesisClient { mutable uint32_t sequence_number_ = 0; }; -class PutKinesisStreamMocked final : public aws::processors::PutKinesisStream { +class PutKinesisStreamMocked final : public aws::processors::PutKinesisStream { // NOLINT(cppcoreguidelines-special-member-functions) public: static constexpr const char* Description = "PutKinesisStreamMocked"; diff --git a/extensions/python/ExecutePythonProcessor.cpp b/extensions/python/ExecutePythonProcessor.cpp index 7daec15c1..48a58ad6f 100644 --- a/extensions/python/ExecutePythonProcessor.cpp +++ b/extensions/python/ExecutePythonProcessor.cpp @@ -142,7 +142,6 @@ void ExecutePythonProcessor::loadScript() { } loadScriptFromFile(); last_script_write_time_ = utils::file::last_write_time(script_file_path_); - return; } } diff --git a/thirdparty/concurrentqueue/concurrentqueue.h b/thirdparty/concurrentqueue/concurrentqueue.h index db852d12c..b9c0bc7a8 100644 --- a/thirdparty/concurrentqueue/concurrentqueue.h +++ b/thirdparty/concurrentqueue/concurrentqueue.h @@ -5,7 +5,7 @@ // http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue // Simplified BSD license: -// Copyright (c) 2013-2016, Cameron Desrochers. +// Copyright (c) 2013-2020, Cameron Desrochers. // All rights reserved. // // Redistribution and use in source and binary forms, with or without modification, @@ -27,9 +27,11 @@ // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Also dual-licensed under the Boost Software License (see LICENSE.md) + #pragma once -#if defined(__GNUC__) +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings // upon assigning any computed values) @@ -41,6 +43,13 @@ #endif #endif +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher +// does not support `if constexpr`, so we have no choice but to simply disable the warning +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#endif + #if defined(__APPLE__) #include "TargetConditionals.h" #endif @@ -68,6 +77,7 @@ #include <climits> // for CHAR_BIT #include <array> #include <thread> // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading +#include <mutex> // used for thread exit synchronization // Platform-specific definitions of a numeric thread ID type and an invalid value namespace moodycamel { namespace details { @@ -95,10 +105,10 @@ namespace moodycamel { namespace details { static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4. static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); } } } -#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) || defined(MOODYCAMEL_NO_THREAD_LOCAL) namespace moodycamel { namespace details { static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); - + typedef std::thread::id thread_id_t; static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID @@ -145,10 +155,21 @@ namespace moodycamel { namespace details { typedef std::uintptr_t thread_id_t; static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. - static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); } + inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); } } } #endif +// Constexpr if +#ifndef MOODYCAMEL_CONSTEXPR_IF +#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L +#define MOODYCAMEL_CONSTEXPR_IF if constexpr +#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] +#else +#define MOODYCAMEL_CONSTEXPR_IF if +#define MOODYCAMEL_MAYBE_UNUSED +#endif +#endif + // Exceptions #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) @@ -161,8 +182,8 @@ namespace moodycamel { namespace details { #define MOODYCAMEL_RETHROW throw #define MOODYCAMEL_THROW(expr) throw (expr) #else -#define MOODYCAMEL_TRY if (true) -#define MOODYCAMEL_CATCH(...) else if (false) +#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true) +#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false) #define MOODYCAMEL_RETHROW #define MOODYCAMEL_THROW(expr) #endif @@ -196,14 +217,14 @@ namespace moodycamel { namespace details { // VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 // g++ <=4.7 doesn't support thread_local either. // Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work -#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__) // Assume `thread_local` is fully supported in all other C++11 compilers/platforms -//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // tentatively enabled for now; years ago several users report having problems with it on #endif #endif #endif -// VS2012 doesn't support deleted functions. +// VS2012 doesn't support deleted functions. // In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. #ifndef MOODYCAMEL_DELETE_FUNCTION #if defined(_MSC_VER) && _MSC_VER < 1800 @@ -213,14 +234,52 @@ namespace moodycamel { namespace details { #endif #endif +namespace moodycamel { namespace details { +#ifndef MOODYCAMEL_ALIGNAS +// VS2013 doesn't support alignas or alignof, and align() requires a constant literal +#if defined(_MSC_VER) && _MSC_VER <= 1800 +#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) +#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type + template<int Align, typename T> struct Vs2013Aligned { }; // default, unsupported alignment + template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; }; + template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; }; + template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; }; + template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; }; + template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; }; + template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; }; + template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; }; + template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; }; + template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; }; +#else + template<typename T> struct identity { typedef T type; }; +#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) +#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type +#endif +#endif +} } + + +// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, +// we can apply per-function compile-time suppression. +// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef MOODYCAMEL_NO_TSAN + #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif // TSAN +#endif // TSAN + // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { #if defined(__GNUC__) - inline bool likely(bool x) { return __builtin_expect((x), true); } - inline bool unlikely(bool x) { return __builtin_expect((x), false); } + static inline bool (likely)(bool x) { return __builtin_expect((x), true); } + static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } #else - inline bool likely(bool x) { return x; } - inline bool unlikely(bool x) { return x; } + static inline bool (likely)(bool x) { return x; } + static inline bool (unlikely)(bool x) { return x; } #endif } } @@ -238,11 +297,19 @@ namespace details { : static_cast<T>(-1); }; -#if defined(__GNUC__) && !defined( __clang__ ) - typedef ::max_align_t max_align_t; // GCC forgot to add it to std:: for a while +#if defined(__GLIBCXX__) + typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while #else - typedef std::max_align_t max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: + typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: #endif + + // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting + // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. + typedef union { + std_max_align_t x; + long long y; + void* z; + } max_align_t; } // Default traits for the ConcurrentQueue. To change some of the @@ -255,7 +322,7 @@ struct ConcurrentQueueDefaultTraits { // General-purpose size type. std::size_t is strongly recommended. typedef std::size_t size_t; - + // The type used for the enqueue and dequeue indices. Must be at least as // large as size_t. Should be significantly larger than the number of elements // you expect to hold at once, especially if you have a high turnover rate; @@ -267,47 +334,60 @@ struct ConcurrentQueueDefaultTraits // whether the queue is lock-free with a 64-int type depends on the whether // std::atomic<std::uint64_t> is lock-free, which is platform-specific. typedef std::size_t index_t; - + // Internally, all elements are enqueued and dequeued from multi-element // blocks; this is the smallest controllable unit. If you expect few elements // but many producers, a smaller block size should be favoured. For few producers // and/or many elements, a larger block size is preferred. A sane default // is provided. Must be a power of 2. static const size_t BLOCK_SIZE = 32; - + // For explicit producers (i.e. when using a producer token), the block is // checked for being empty by iterating through a list of flags, one per element. // For large block sizes, this is too inefficient, and switching to an atomic // counter-based approach is faster. The switch is made for block sizes strictly // larger than this threshold. static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; - + // How many full blocks can be expected for a single explicit producer? This should // reflect that number's maximum for optimal performance. Must be a power of 2. static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; - + // How many full blocks can be expected for a single implicit producer? This should // reflect that number's maximum for optimal performance. Must be a power of 2. static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; - + // The initial size of the hash table mapping thread IDs to implicit producers. // Note that the hash is resized every time it becomes half full. // Must be a power of two, and either 0 or at least 1. If 0, implicit production // (using the enqueue methods without an explicit producer token) is disabled. static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; - + // Controls the number of items that an explicit consumer (i.e. one with a token) // must consume before it causes all consumers to rotate and move on to the next // internal queue. static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; - + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. // Enqueue operations that would cause this limit to be surpassed will fail. Note // that this limit is enforced at the block level (for performance reasons), i.e. // it's rounded up to the nearest block size. static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value; - - + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; + + // Whether to recycle dynamically-allocated blocks into an internal free list or + // not. If false, only pre-allocated blocks (controlled by the constructor + // arguments) will be recycled, and all others will be `free`d back to the heap. + // Note that blocks consumed by explicit producers are only freed on destruction + // of the queue (not following destruction of the token) regardless of this trait. + static const bool RECYCLE_ALLOCATED_BLOCKS = false; + + #ifndef MCDBGQ_USE_RELACY // Memory allocation can be customized if needed. // malloc should return nullptr on failure, and handle alignment like std::malloc. @@ -353,13 +433,13 @@ namespace details ConcurrentQueueProducerTypelessBase* next; std::atomic<bool> inactive; ProducerToken* token; - + ConcurrentQueueProducerTypelessBase() : next(nullptr), inactive(false), token(nullptr) { } }; - + template<bool use32> struct _hash_32_or_64 { static inline std::uint32_t hash(std::uint32_t h) { @@ -385,28 +465,23 @@ namespace details } }; template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; - + static inline size_t hash_thread_id(thread_id_t id) { static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash( thread_id_converter<thread_id_t>::prehash(id))); } - + template<typename T> static inline bool circular_less_than(T a, T b) { -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4554) -#endif static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); - return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1)); -#ifdef _MSC_VER -#pragma warning(pop) -#endif + return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << (static_cast<T>(sizeof(T) * CHAR_BIT - 1))); + // Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 + // silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here. } - + template<typename U> static inline char* align_for(char* ptr) { @@ -430,7 +505,7 @@ namespace details ++x; return x; } - + template<typename T> static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right) { @@ -438,13 +513,13 @@ namespace details left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); right.store(std::move(temp), std::memory_order_relaxed); } - + template<typename T> static inline T const& nomove(T const& x) { return x; } - + template<bool Enable> struct nomove_if { @@ -454,7 +529,7 @@ namespace details return x; } }; - + template<> struct nomove_if<false> { @@ -465,47 +540,56 @@ namespace details return std::forward<U>(x); } }; - + template<typename It> static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) { return *it; } - + #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { }; #else template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { }; #endif - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED #ifdef MCDBGQ_USE_RELACY typedef RelacyThreadExitListener ThreadExitListener; typedef RelacyThreadExitNotifier ThreadExitNotifier; #else + class ThreadExitNotifier; + struct ThreadExitListener { typedef void (*callback_t)(void*); callback_t callback; void* userData; - + ThreadExitListener* next; // reserved for use by the ThreadExitNotifier + ThreadExitNotifier* chain; // reserved for use by the ThreadExitNotifier }; - - + class ThreadExitNotifier { public: static void subscribe(ThreadExitListener* listener) { auto& tlsInst = instance(); + std::lock_guard<std::mutex> guard(mutex()); listener->next = tlsInst.tail; + listener->chain = &tlsInst; tlsInst.tail = listener; } - + static void unsubscribe(ThreadExitListener* listener) { - auto& tlsInst = instance(); + std::lock_guard<std::mutex> guard(mutex()); + if (!listener->chain) { + return; // race with ~ThreadExitNotifier + } + auto& tlsInst = *listener->chain; + listener->chain = nullptr; ThreadExitListener** prev = &tlsInst.tail; for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { if (ptr == listener) { @@ -515,34 +599,43 @@ namespace details prev = &ptr->next; } } - + private: ThreadExitNotifier() : tail(nullptr) { } ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; - + ~ThreadExitNotifier() { // This thread is about to exit, let everyone know! assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); + std::lock_guard<std::mutex> guard(mutex()); for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { + ptr->chain = nullptr; ptr->callback(ptr->userData); } } - + // Thread-local static inline ThreadExitNotifier& instance() { static thread_local ThreadExitNotifier notifier; return notifier; } - + + static inline std::mutex& mutex() + { + // Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called + static std::mutex mutex; + return mutex; + } + private: ThreadExitListener* tail; }; #endif #endif - + template<typename T> struct static_is_lock_free_num { enum { value = 0 }; }; template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; @@ -559,10 +652,10 @@ struct ProducerToken { template<typename T, typename Traits> explicit ProducerToken(ConcurrentQueue<T, Traits>& queue); - + template<typename T, typename Traits> explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue); - + ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT : producer(other.producer) { @@ -571,13 +664,13 @@ struct ProducerToken producer->token = this; } } - + inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT { swap(other); return *this; } - + void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT { std::swap(producer, other.producer); @@ -588,7 +681,7 @@ struct ProducerToken other.producer->token = &other; } } - + // A token is always valid unless: // 1) Memory allocation failed during construction // 2) It was moved via the move constructor @@ -598,7 +691,7 @@ struct ProducerToken // that the token is valid for use with a specific queue, // but not which one; that's up to the user to track. inline bool valid() const { return producer != nullptr; } - + ~ProducerToken() { if (producer != nullptr) { @@ -606,15 +699,15 @@ struct ProducerToken producer->inactive.store(true, std::memory_order_release); } } - + // Disable copying and assignment ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; - + private: template<typename T, typename Traits> friend class ConcurrentQueue; friend class ConcurrentQueueTests; - + protected: details::ConcurrentQueueProducerTypelessBase* producer; }; @@ -624,21 +717,21 @@ struct ConsumerToken { template<typename T, typename Traits> explicit ConsumerToken(ConcurrentQueue<T, Traits>& q); - + template<typename T, typename Traits> explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q); - + ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) { } - + inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT { swap(other); return *this; } - + void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT { std::swap(initialOffset, other.initialOffset); @@ -647,7 +740,7 @@ struct ConsumerToken std::swap(currentProducer, other.currentProducer); std::swap(desiredProducer, other.desiredProducer); } - + // Disable copying and assignment ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; @@ -655,7 +748,7 @@ struct ConsumerToken private: template<typename T, typename Traits> friend class ConcurrentQueue; friend class ConcurrentQueueTests; - + private: // but shared with ConcurrentQueue std::uint32_t initialOffset; std::uint32_t lastKnownGlobalOffset; @@ -676,10 +769,10 @@ class ConcurrentQueue public: typedef ::moodycamel::ProducerToken producer_token_t; typedef ::moodycamel::ConsumerToken consumer_token_t; - + typedef typename Traits::index_t index_t; typedef typename Traits::size_t size_t; - + static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE); static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE); @@ -717,7 +810,7 @@ public: // queue is fully constructed before it starts being used by other threads (this // includes making the memory effects of construction visible, possibly with a // memory barrier). - explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) + explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE) : producerListTail(nullptr), producerCount(0), initialBlockPoolIndex(0), @@ -727,7 +820,7 @@ public: implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); populate_initial_implicit_producer_hash(); populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG // Track all the producers using a fully-resolved typed list for // each kind; this makes it possible to debug them starting from @@ -737,7 +830,7 @@ public: implicitProducers.store(nullptr, std::memory_order_relaxed); #endif } - + // Computes the correct amount of pre-allocated blocks for you based // on the minimum number of elements you want available at any given // time, and the maximum concurrent number of each type of producer. @@ -752,13 +845,13 @@ public: populate_initial_implicit_producer_hash(); size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); populate_initial_block_list(blocks); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG explicitProducers.store(nullptr, std::memory_order_relaxed); implicitProducers.store(nullptr, std::memory_order_relaxed); #endif } - + // Note: The queue should not be accessed concurrently while it's // being deleted. It's up to the user to synchronize this. // This method is not thread safe. @@ -774,9 +867,9 @@ public: destroy(ptr); ptr = next; } - + // Destroy implicit producer hash tables - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { auto hash = implicitProducerHash.load(std::memory_order_relaxed); while (hash != nullptr) { auto prev = hash->prev; @@ -790,7 +883,7 @@ public: hash = prev; } } - + // Destroy global free list auto block = freeList.head_unsafe(); while (block != nullptr) { @@ -800,7 +893,7 @@ public: } block = next; } - + // Destroy initial free list destroy_array(initialBlockPool, initialBlockPoolSize); } @@ -808,7 +901,7 @@ public: // Disable copying and copy assignment ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; - + // Moving is supported, but note that it is *not* a thread-safe operation. // Nobody can use the queue while it's being moved, and the memory effects // of that move must be propagated to other threads before they can use it. @@ -829,31 +922,31 @@ public: implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); populate_initial_implicit_producer_hash(); swap_implicit_producer_hashes(other); - + other.producerListTail.store(nullptr, std::memory_order_relaxed); other.producerCount.store(0, std::memory_order_relaxed); other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); other.explicitProducers.store(nullptr, std::memory_order_relaxed); implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); other.implicitProducers.store(nullptr, std::memory_order_relaxed); #endif - + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); other.initialBlockPoolSize = 0; other.initialBlockPool = nullptr; - + reown_producers(); } - + inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT { return swap_internal(other); } - + // Swaps this queue's state with the other's. Not thread-safe. // Swapping two queues does not invalidate their tokens, however // the tokens that were created for one queue must be used with @@ -863,14 +956,14 @@ public: { swap_internal(other); } - + private: ConcurrentQueue& swap_internal(ConcurrentQueue& other) { if (this == &other) { return *this; } - + details::swap_relaxed(producerListTail, other.producerListTail); details::swap_relaxed(producerCount, other.producerCount); details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); @@ -879,20 +972,20 @@ private: freeList.swap(other.freeList); details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); - + swap_implicit_producer_hashes(other); - + reown_producers(); other.reown_producers(); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG details::swap_relaxed(explicitProducers, other.explicitProducers); details::swap_relaxed(implicitProducers, other.implicitProducers); #endif - + return *this; } - + public: // Enqueues a single item (by copying it). // Allocates memory if required. Only fails if memory allocation fails (or implicit @@ -901,10 +994,10 @@ public: // Thread-safe. inline bool enqueue(T const& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue<CanAlloc>(item); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue<CanAlloc>(item); } - + // Enqueues a single item (by moving it, if possible). // Allocates memory if required. Only fails if memory allocation fails (or implicit // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, @@ -912,10 +1005,10 @@ public: // Thread-safe. inline bool enqueue(T&& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue<CanAlloc>(std::move(item)); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue<CanAlloc>(std::move(item)); } - + // Enqueues a single item (by copying it) using an explicit producer token. // Allocates memory if required. Only fails if memory allocation fails (or // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). @@ -924,7 +1017,7 @@ public: { return inner_enqueue<CanAlloc>(token, item); } - + // Enqueues a single item (by moving it, if possible) using an explicit producer token. // Allocates memory if required. Only fails if memory allocation fails (or // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). @@ -933,7 +1026,7 @@ public: { return inner_enqueue<CanAlloc>(token, std::move(item)); } - + // Enqueues several items. // Allocates memory if required. Only fails if memory allocation fails (or // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE @@ -943,10 +1036,10 @@ public: template<typename It> bool enqueue_bulk(It itemFirst, size_t count) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue_bulk<CanAlloc>(itemFirst, count); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk<CanAlloc>(itemFirst, count); } - + // Enqueues several items using an explicit producer token. // Allocates memory if required. Only fails if memory allocation fails // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). @@ -958,7 +1051,7 @@ public: { return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count); } - + // Enqueues a single item (by copying it). // Does not allocate memory. Fails if not enough room to enqueue (or implicit // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE @@ -966,10 +1059,10 @@ public: // Thread-safe. inline bool try_enqueue(T const& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue<CannotAlloc>(item); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue<CannotAlloc>(item); } - + // Enqueues a single item (by moving it, if possible). // Does not allocate memory (except for one-time implicit producer). // Fails if not enough room to enqueue (or implicit production is @@ -977,10 +1070,10 @@ public: // Thread-safe. inline bool try_enqueue(T&& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue<CannotAlloc>(std::move(item)); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue<CannotAlloc>(std::move(item)); } - + // Enqueues a single item (by copying it) using an explicit producer token. // Does not allocate memory. Fails if not enough room to enqueue. // Thread-safe. @@ -988,7 +1081,7 @@ public: { return inner_enqueue<CannotAlloc>(token, item); } - + // Enqueues a single item (by moving it, if possible) using an explicit producer token. // Does not allocate memory. Fails if not enough room to enqueue. // Thread-safe. @@ -996,7 +1089,7 @@ public: { return inner_enqueue<CannotAlloc>(token, std::move(item)); } - + // Enqueues several items. // Does not allocate memory (except for one-time implicit producer). // Fails if not enough room to enqueue (or implicit production is @@ -1007,10 +1100,10 @@ public: template<typename It> bool try_enqueue_bulk(It itemFirst, size_t count) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue_bulk<CannotAlloc>(itemFirst, count); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count); } - + // Enqueues several items using an explicit producer token. // Does not allocate memory. Fails if not enough room to enqueue. // Note: Use std::make_move_iterator if the elements should be moved @@ -1021,9 +1114,9 @@ public: { return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count); } - - - + + + // Attempts to dequeue from the queue. // Returns false if all producer streams appeared empty at the time they // were checked (so, the queue is likely but not guaranteed to be empty). @@ -1046,11 +1139,11 @@ public: ++nonEmptyCount; } } - + // If there was at least one non-empty queue but it appears empty at the time // we try to dequeue from it, we need to make sure every queue's been tried if (nonEmptyCount > 0) { - if (details::likely(best->dequeue(item))) { + if ((details::likely)(best->dequeue(item))) { return true; } for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { @@ -1061,7 +1154,7 @@ public: } return false; } - + // Attempts to dequeue from the queue. // Returns false if all producer streams appeared empty at the time they // were checked (so, the queue is likely but not guaranteed to be empty). @@ -1081,7 +1174,7 @@ public: } return false; } - + // Attempts to dequeue from the queue using an explicit consumer token. // Returns false if all producer streams appeared empty at the time they // were checked (so, the queue is likely but not guaranteed to be empty). @@ -1094,13 +1187,13 @@ public: // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place // If there's no items where you're supposed to be, keep moving until you find a producer with some items // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it - + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { if (!update_current_producer_after_rotation(token)) { return false; } } - + // If there was at least one non-empty queue but it appears empty at the time // we try to dequeue from it, we need to make sure every queue's been tried if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) { @@ -1109,7 +1202,7 @@ public: } return true; } - + auto tail = producerListTail.load(std::memory_order_acquire); auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod(); if (ptr == nullptr) { @@ -1128,7 +1221,7 @@ public: } return false; } - + // Attempts to dequeue several elements from the queue. // Returns the number of items actually dequeued. // Returns 0 if all producer streams appeared empty at the time they @@ -1146,7 +1239,7 @@ public: } return count; } - + // Attempts to dequeue several elements from the queue using an explicit consumer token. // Returns the number of items actually dequeued. // Returns 0 if all producer streams appeared empty at the time they @@ -1160,7 +1253,7 @@ public: return 0; } } - + size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max); if (count == max) { if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { @@ -1170,7 +1263,7 @@ public: } token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count); max -= count; - + auto tail = producerListTail.load(std::memory_order_acquire); auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod(); if (ptr == nullptr) { @@ -1194,9 +1287,9 @@ public: } return count; } - - - + + + // Attempts to dequeue from a specific producer's inner queue. // If you happen to know which producer you want to dequeue from, this // is significantly faster than using the general-case try_dequeue methods. @@ -1208,7 +1301,7 @@ public: { return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item); } - + // Attempts to dequeue several elements from a specific producer's inner queue. // Returns the number of items actually dequeued. // If you happen to know which producer you want to dequeue from, this @@ -1221,8 +1314,8 @@ public: { return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max); } - - + + // Returns an estimate of the total number of elements currently in the queue. This // estimate is only accurate if the queue has completely stabilized before it is called // (i.e. all enqueue and dequeue operations have completed and their memory effects are @@ -1237,12 +1330,12 @@ public: } return size; } - - + + // Returns true if the underlying atomic variables used by // the queue are lock-free (they should be on most platforms). // Thread-safe. - static bool is_lock_free() + static constexpr bool is_lock_free() { return details::static_is_lock_free<bool>::value == 2 && @@ -1257,42 +1350,45 @@ public: private: friend struct ProducerToken; friend struct ConsumerToken; + struct ExplicitProducer; friend struct ExplicitProducer; + struct ImplicitProducer; + friend struct ImplicitProducer; friend class ConcurrentQueueTests; - + enum AllocationMode { CanAlloc, CannotAlloc }; - - + + /////////////////////////////// // Queue methods /////////////////////////////// - + template<AllocationMode canAlloc, typename U> inline bool inner_enqueue(producer_token_t const& token, U&& element) { return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element)); } - + template<AllocationMode canAlloc, typename U> inline bool inner_enqueue(U&& element) { auto producer = get_or_add_implicit_producer(); return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element)); } - + template<AllocationMode canAlloc, typename It> inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) { return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count); } - + template<AllocationMode canAlloc, typename It> inline bool inner_enqueue_bulk(It itemFirst, size_t count) { auto producer = get_or_add_implicit_producer(); return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count); } - + inline bool update_current_producer_after_rotation(consumer_token_t& token) { // Ah, there's been a rotation, figure out where we should be! @@ -1302,7 +1398,7 @@ private: } auto prodCount = producerCount.load(std::memory_order_relaxed); auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); - if (details::unlikely(token.desiredProducer == nullptr)) { + if ((details::unlikely)(token.desiredProducer == nullptr)) { // Aha, first time we're dequeueing anything. // Figure out our local position // Note: offset is from start, not end, but we're traversing from end -- subtract from count first @@ -1315,7 +1411,7 @@ private: } } } - + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; if (delta >= prodCount) { delta = delta % prodCount; @@ -1326,27 +1422,27 @@ private: token.desiredProducer = tail; } } - + token.lastKnownGlobalOffset = globalOffset; token.currentProducer = token.desiredProducer; token.itemsConsumedFromCurrent = 0; return true; } - - + + /////////////////////////// // Free list /////////////////////////// - + template <typename N> struct FreeListNode { FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } - + std::atomic<std::uint32_t> freeListRefs; std::atomic<N*> freeListNext; }; - + // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly // speedy under low contention. @@ -1356,15 +1452,15 @@ private: FreeList() : freeListHead(nullptr) { } FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } - + FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; - + inline void add(N* node) { -#if MCDBGQ_NOLOCKFREE_FREELIST +#ifdef MCDBGQ_NOLOCKFREE_FREELIST debug::DebugLock lock(mutex); -#endif +#endif // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to // set it using a fetch_add if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { @@ -1373,12 +1469,12 @@ private: add_knowing_refcount_is_zero(node); } } - + inline N* try_get() { -#if MCDBGQ_NOLOCKFREE_FREELIST +#ifdef MCDBGQ_NOLOCKFREE_FREELIST debug::DebugLock lock(mutex); -#endif +#endif auto head = freeListHead.load(std::memory_order_acquire); while (head != nullptr) { auto prevHead = head; @@ -1387,7 +1483,7 @@ private: head = freeListHead.load(std::memory_order_acquire); continue; } - + // Good, reference count has been incremented (it wasn't at zero), which means we can read the // next and not worry about it changing between now and the time we do the CAS auto next = head->freeListNext.load(std::memory_order_relaxed); @@ -1395,27 +1491,27 @@ private: // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); - + // Decrease refcount twice, once for our ref, and once for the list's ref - head->freeListRefs.fetch_add(-2, std::memory_order_release); + head->freeListRefs.fetch_sub(2, std::memory_order_release); return head; } - + // OK, the head must have changed on us, but we still need to decrease the refcount we increased. // Note that we don't need to release any memory effects, but we do need to ensure that the reference // count decrement happens-after the CAS on the head. - refs = prevHead->freeListRefs.fetch_add(-1, std::memory_order_acq_rel); + refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); if (refs == SHOULD_BE_ON_FREELIST + 1) { add_knowing_refcount_is_zero(prevHead); } } - + return nullptr; } - + // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } - + private: inline void add_knowing_refcount_is_zero(N* node) { @@ -1440,47 +1536,47 @@ private: return; } } - + private: // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) std::atomic<N*> freeListHead; - + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; - -#if MCDBGQ_NOLOCKFREE_FREELIST + +#ifdef MCDBGQ_NOLOCKFREE_FREELIST debug::DebugMutex mutex; #endif }; - - + + /////////////////////////// // Block /////////////////////////// - + enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; - + struct Block { Block() - : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true) + : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true) { -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM owner = nullptr; #endif } - + template<InnerQueueContext context> inline bool is_empty() const { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Check flags for (size_t i = 0; i < BLOCK_SIZE; ++i) { if (!emptyFlags[i].load(std::memory_order_relaxed)) { return false; } } - + // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set std::atomic_thread_fence(std::memory_order_acquire); return true; @@ -1495,12 +1591,12 @@ private: return false; } } - + // Returns true if the block is now empty (does not apply in explicit context) template<InnerQueueContext context> - inline bool set_empty(index_t i) + inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Set flag assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release); @@ -1513,13 +1609,13 @@ private: return prevVal == BLOCK_SIZE - 1; } } - + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). // Returns true if the block is now empty (does not apply in explicit context). template<InnerQueueContext context> - inline bool set_many_empty(index_t i, size_t count) + inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Set flags std::atomic_thread_fence(std::memory_order_release); i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1; @@ -1536,11 +1632,11 @@ private: return prevVal + count == BLOCK_SIZE; } } - + template<InnerQueueContext context> inline void set_all_empty() { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Set all flags for (size_t i = 0; i != BLOCK_SIZE; ++i) { emptyFlags[i].store(true, std::memory_order_relaxed); @@ -1551,11 +1647,11 @@ private: elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); } } - + template<InnerQueueContext context> inline void reset_empty() { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Reset flags for (size_t i = 0; i != BLOCK_SIZE; ++i) { emptyFlags[i].store(false, std::memory_order_relaxed); @@ -1566,25 +1662,13 @@ private: elementsCompletelyDequeued.store(0, std::memory_order_relaxed); } } - + inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); } inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); } - + private: - // IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of - // addresses returned by malloc, that alignment will be preserved. Apparently clang actually - // generates code that uses this assumption for AVX instructions in some cases. Ideally, we - // should also align Block to the alignment of T in case it's higher than malloc's 16-byte - // alignment, but this is hard to do in a cross-platform way. Assert for this case: - static_assert(std::alignment_of<T>::value <= std::alignment_of<details::max_align_t>::value, "The queue does not support super-aligned types at this time"); - // Additionally, we need the alignment of Block itself to be a multiple of max_align_t since - // otherwise the appropriate padding will not be added at the end of Block in order to make - // arrays of Blocks all be properly aligned (not just the first one). We use a union to force - // this. - union { - char elements[sizeof(T) * BLOCK_SIZE]; - details::max_align_t dummy; - }; + static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; public: Block* next; std::atomic<size_t> elementsCompletelyDequeued; @@ -1592,26 +1676,25 @@ private: public: std::atomic<std::uint32_t> freeListRefs; std::atomic<Block*> freeListNext; - std::atomic<bool> shouldBeOnFreeList; bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' - -#if MCDBGQ_TRACKMEM + +#ifdef MCDBGQ_TRACKMEM void* owner; #endif }; - static_assert(std::alignment_of<Block>::value >= std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); + static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM public: struct MemStats; private: #endif - + /////////////////////////// // Producer base /////////////////////////// - + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase { ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : @@ -1624,9 +1707,9 @@ private: parent(parent_) { } - - virtual ~ProducerBase() { }; - + + virtual ~ProducerBase() { } + template<typename U> inline bool dequeue(U& element) { @@ -1637,7 +1720,7 @@ private: return static_cast<ImplicitProducer*>(this)->dequeue(element); } } - + template<typename It> inline size_t dequeue_bulk(It& itemFirst, size_t max) { @@ -1648,45 +1731,45 @@ private: return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max); } } - + inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); } - + inline size_t size_approx() const { auto tail = tailIndex.load(std::memory_order_relaxed); auto head = headIndex.load(std::memory_order_relaxed); return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0; } - + inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } protected: std::atomic<index_t> tailIndex; // Where to enqueue to next std::atomic<index_t> headIndex; // Where to dequeue from next - + std::atomic<index_t> dequeueOptimisticCount; std::atomic<index_t> dequeueOvercommit; - + Block* tailBlock; - + public: bool isExplicit; ConcurrentQueue* parent; - + protected: -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM friend struct MemStats; #endif }; - - + + /////////////////////////// // Explicit queue /////////////////////////// - + struct ExplicitProducer : public ProducerBase { - explicit ExplicitProducer(ConcurrentQueue* parent) : - ProducerBase(parent, true), + explicit ExplicitProducer(ConcurrentQueue* parent_) : + ProducerBase(parent_, true), blockIndex(nullptr), pr_blockIndexSlotsUsed(0), pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), @@ -1694,14 +1777,14 @@ private: pr_blockIndexEntries(nullptr), pr_blockIndexRaw(nullptr) { - size_t poolBasedIndexSize = details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1; + size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; if (poolBasedIndexSize > pr_blockIndexSize) { pr_blockIndexSize = poolBasedIndexSize; } - + new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE } - + ~ExplicitProducer() { // Destruct any elements not yet dequeued. @@ -1720,7 +1803,7 @@ private: assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); halfDequeuedBlock = pr_blockIndexEntries[i].block; } - + // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) auto block = this->tailBlock; do { @@ -1728,12 +1811,12 @@ private: if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) { continue; } - + size_t i = 0; // Offset into block if (block == halfDequeuedBlock) { i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)); } - + // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)); while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { @@ -1741,22 +1824,17 @@ private: } } while (block != this->tailBlock); } - + // Destroy all blocks that we own if (this->tailBlock != nullptr) { auto block = this->tailBlock; do { auto nextBlock = block->next; - if (block->dynamicallyAllocated) { - destroy(block); - } - else { - this->parent->add_block_to_free_list(block); - } + this->parent->add_block_to_free_list(block); block = nextBlock; } while (block != this->tailBlock); } - + // Destroy the block indices auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw); while (header != nullptr) { @@ -1766,7 +1844,7 @@ private: header = prev; } } - + template<AllocationMode allocMode, typename U> inline bool enqueue(U&& element) { @@ -1777,10 +1855,10 @@ private: auto startBlock = this->tailBlock; auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) { - // We can re-use the block ahead of us, it's empty! + // We can re-use the block ahead of us, it's empty! this->tailBlock = this->tailBlock->next; this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>(); - + // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the // last block from it first -- except instead of removing then adding, we can just overwrite). // Note that there must be a valid block index here, since even if allocation failed in the ctor, @@ -1805,18 +1883,21 @@ private: // Hmm, the circular block index is already full -- we'll need // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if // the initial allocation failed in the constructor. - - if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) { + + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index(pr_blockIndexSlotsUsed)) { return false; } } - + // Insert a new block in the circular linked list auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>(); if (newBlock == nullptr) { return false; } -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>(); @@ -1830,8 +1911,8 @@ private: this->tailBlock = newBlock; ++pr_blockIndexSlotsUsed; } - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) { + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) { // The constructor may throw. We want the element not to appear in the queue in // that case (without corrupting the queue): MOODYCAMEL_TRY { @@ -1849,27 +1930,27 @@ private: (void)startBlock; (void)originalBlockIndexSlotsUsed; } - + // Add block to block index auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; entry.base = currentTailIndex; entry.block = this->tailBlock; blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) { + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } } - + // Enqueue new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); - + this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template<typename U> bool dequeue(U& element) { @@ -1877,10 +1958,10 @@ private: auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { // Might be something to dequeue, let's give it a try - + // Note that this if is purely for performance purposes in the common case when the queue is // empty and the values are eventually consistent -- we may enter here spuriously. - + // Note that whatever the values of overcommit and tail are, they are not going to change (unless we // change them) and must be the same value at this point (inside the if) as when the if condition was // evaluated. @@ -1893,23 +1974,24 @@ private: // unfortunately that can't be shown to be correct using only the C++11 standard. // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case std::atomic_thread_fence(std::memory_order_acquire); - + // Increment optimistic counter, then check if it went over the boundary auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. - assert(overcommit <= myDequeueCount); - + // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) + // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. + // Note that we reload tail here in case it changed; it will be the same value as before or greater, since // this load is sequenced after (happens after) the earlier load above. This is supported by read-read // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order tail = this->tailIndex.load(std::memory_order_acquire); - if (details::likely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) { + if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) { // Guaranteed to be at least one element to dequeue! - + // Get the index. Note that since there's guaranteed to be at least one element, this // will never exceed tail. We need to do an acquire-release fence here since it's possible // that whatever condition got us to this point was for an earlier enqueued element (that @@ -1919,21 +2001,21 @@ private: // place with the more current condition (they must have acquired a tail that is at least // as recent). auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - - + + // Determine which block the element is in - + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - + // We need to be careful here about subtracting and dividing because of index wrap-around. // When an index wraps, we need to preserve the sign of the offset when dividing it by the // block size (in order to get a correct signed block count offset in all cases): auto headBase = localBlockIndex->entries[localBlockIndexHead].base; auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1); - auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE); + auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE)); auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; - + // Dequeue auto& el = *((*block)[index]); if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { @@ -1942,22 +2024,22 @@ private: struct Guard { Block* block; index_t index; - + ~Guard() { (*block)[index]->~T(); block->ConcurrentQueue::Block::template set_empty<explicit_context>(index); } } guard = { block, index }; - - element = std::move(el); + + element = std::move(el); // NOLINT } else { - element = std::move(el); - el.~T(); + element = std::move(el); // NOLINT + el.~T(); // NOLINT block->ConcurrentQueue::Block::template set_empty<explicit_context>(index); } - + return true; } else { @@ -1965,12 +2047,12 @@ private: this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write } } - + return false; } - + template<AllocationMode allocMode, typename It> - bool enqueue_bulk(It itemFirst, size_t count) + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) { // First, we need to make sure we have enough room to enqueue all of the elements; // this means pre-allocating blocks and putting them in the block index (but only if @@ -1979,9 +2061,9 @@ private: auto startBlock = this->tailBlock; auto originalBlockIndexFront = pr_blockIndexFront; auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; - + Block* firstAllocatedBlock = nullptr; - + // Figure out how many blocks we'll need to allocate, and do so size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)); index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); @@ -1990,39 +2072,46 @@ private: while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) { blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - + this->tailBlock = this->tailBlock->next; firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; - + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; entry.base = currentTailIndex; entry.block = this->tailBlock; pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); } - + // Now allocate as many blocks as necessary from the block pool while (blockBaseDiff > 0) { blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - + auto head = this->headIndex.load(std::memory_order_relaxed); assert(!details::circular_less_than<index_t>(currentTailIndex, head)); bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { - if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { // Failed to allocate, undo changes (but keep injected blocks) pr_blockIndexFront = originalBlockIndexFront; pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; return false; } - + else if (full || !new_block_index(originalBlockIndexSlotsUsed)) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + // pr_blockIndexFront is updated inside new_block_index, so we need to // update our fallback value too (since we keep the new index even if we // later fail) originalBlockIndexFront = originalBlockIndexSlotsUsed; } - + // Insert a new block in the circular linked list auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>(); if (newBlock == nullptr) { @@ -2031,8 +2120,8 @@ private: this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; return false; } - -#if MCDBGQ_TRACKMEM + +#ifdef MCDBGQ_TRACKMEM newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>(); @@ -2045,15 +2134,15 @@ private: } this->tailBlock = newBlock; firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; - + ++pr_blockIndexSlotsUsed; - + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; entry.base = currentTailIndex; entry.block = this->tailBlock; pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); } - + // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and // publish the new block index front auto block = firstAllocatedBlock; @@ -2064,12 +2153,12 @@ private: } block = block->next; } - - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) { + + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) { blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } } - + // Enqueue, one block at a time index_t newTailIndex = startTailIndex + static_cast<index_t>(count); currentTailIndex = startTailIndex; @@ -2080,11 +2169,11 @@ private: this->tailBlock = firstAllocatedBlock; } while (true) { - auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); + index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) { stopIndex = newTailIndex; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) { while (currentTailIndex != stopIndex) { new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); } @@ -2099,7 +2188,7 @@ private: // may only define a (noexcept) move constructor, and so calls to the // cctor will not compile, even if they are in an if branch that will never // be executed - new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); ++currentTailIndex; ++itemFirst; } @@ -2110,11 +2199,11 @@ private: // any allocated blocks in our linked list for later, though). auto constructedStopIndex = currentTailIndex; auto lastBlockEnqueued = this->tailBlock; - + pr_blockIndexFront = originalBlockIndexFront; pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; - + if (!details::is_trivially_destructible<T>::value) { auto block = startBlock; if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { @@ -2138,22 +2227,23 @@ private: MOODYCAMEL_RETHROW; } } - + if (this->tailBlock == endBlock) { assert(currentTailIndex == newTailIndex); break; } this->tailBlock = this->tailBlock->next; } - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) { - blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } - + this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template<typename It> size_t dequeue_bulk(It& itemFirst, size_t max) { @@ -2163,10 +2253,9 @@ private: if (details::circular_less_than<size_t>(0, desiredCount)) { desiredCount = desiredCount < max ? desiredCount : max; std::atomic_thread_fence(std::memory_order_acquire); - + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); - assert(overcommit <= myDequeueCount); - + tail = this->tailIndex.load(std::memory_order_acquire); auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit)); if (details::circular_less_than<size_t>(0, actualCount)) { @@ -2174,25 +2263,25 @@ private: if (actualCount < desiredCount) { this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); } - + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this // will never exceed tail. auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - + // Determine which block the first element is in auto localBlockIndex = blockIndex.load(std::memory_order_acquire); auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1); - auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE); + auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE)); auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); - + // Iterate the blocks and dequeue auto index = firstIndex; do { auto firstIndexInBlock = index; - auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); + index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; auto block = localBlockIndex->entries[indexIndex].block; if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { @@ -2224,19 +2313,19 @@ private: } block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock)); indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); - + firstIndexInBlock = index; endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; } while (index != firstIndex + actualCount); - + MOODYCAMEL_RETHROW; } } block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock)); indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); } while (index != firstIndex + actualCount); - + return actualCount; } else { @@ -2244,17 +2333,17 @@ private: this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); } } - + return 0; } - + private: struct BlockIndexEntry { index_t base; Block* block; }; - + struct BlockIndexHeader { size_t size; @@ -2262,12 +2351,12 @@ private: BlockIndexEntry* entries; void* prev; }; - - + + bool new_block_index(size_t numberOfFilledSlotsToExpose) { auto prevBlockSizeMask = pr_blockIndexSize - 1; - + // Create the new block pr_blockIndexSize <<= 1; auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); @@ -2275,9 +2364,9 @@ private: pr_blockIndexSize >>= 1; // Reset to allow graceful retry return false; } - + auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader))); - + // Copy in all the old indices, if any size_t j = 0; if (pr_blockIndexSlotsUsed != 0) { @@ -2287,72 +2376,72 @@ private: i = (i + 1) & prevBlockSizeMask; } while (i != pr_blockIndexFront); } - + // Update everything auto header = new (newRawPtr) BlockIndexHeader; header->size = pr_blockIndexSize; header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); header->entries = newBlockIndexEntries; header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later - + pr_blockIndexFront = j; pr_blockIndexEntries = newBlockIndexEntries; pr_blockIndexRaw = newRawPtr; blockIndex.store(header, std::memory_order_release); - + return true; } - + private: std::atomic<BlockIndexHeader*> blockIndex; - + // To be used by producer only -- consumer must use the ones in referenced by blockIndex size_t pr_blockIndexSlotsUsed; size_t pr_blockIndexSize; size_t pr_blockIndexFront; // Next slot (not current) BlockIndexEntry* pr_blockIndexEntries; void* pr_blockIndexRaw; - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG public: ExplicitProducer* nextExplicitProducer; private: #endif - -#if MCDBGQ_TRACKMEM + +#ifdef MCDBGQ_TRACKMEM friend struct MemStats; #endif }; - - + + ////////////////////////////////// // Implicit queue ////////////////////////////////// - + struct ImplicitProducer : public ProducerBase - { - ImplicitProducer(ConcurrentQueue* parent) : - ProducerBase(parent, false), + { + ImplicitProducer(ConcurrentQueue* parent_) : + ProducerBase(parent_, false), nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), blockIndex(nullptr) { new_block_index(); } - + ~ImplicitProducer() { // Note that since we're in the destructor we can assume that all enqueue/dequeue operations // completed already; this means that all undequeued elements are placed contiguously across // contiguous blocks, and that only the first and last remaining blocks can be only partially // empty (all other remaining blocks must be completely full). - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // Unregister ourselves for thread termination notification if (!this->inactive.load(std::memory_order_relaxed)) { details::ThreadExitNotifier::unsubscribe(&threadExitListener); } #endif - + // Destroy all remaining elements! auto tail = this->tailIndex.load(std::memory_order_relaxed); auto index = this->headIndex.load(std::memory_order_relaxed); @@ -2365,10 +2454,10 @@ private: // Free the old block this->parent->add_block_to_free_list(block); } - + block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); } - + ((*block)[index])->~T(); ++index; } @@ -2378,7 +2467,7 @@ private: if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) { this->parent->add_block_to_free_list(this->tailBlock); } - + // Destroy block index auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); if (localBlockIndex != nullptr) { @@ -2393,7 +2482,7 @@ private: } while (localBlockIndex != nullptr); } } - + template<AllocationMode allocMode, typename U> inline bool enqueue(U&& element) { @@ -2406,7 +2495,7 @@ private: if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { return false; } -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); #endif // Find out where we'll be inserting this block in the block index @@ -2414,7 +2503,7 @@ private: if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) { return false; } - + // Get ahold of a new block auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>(); if (newBlock == nullptr) { @@ -2422,12 +2511,12 @@ private: idxEntry->value.store(nullptr, std::memory_order_relaxed); return false; } -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>(); - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) { + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) { // May throw, try to insert now before we publish the fact that we have this new block MOODYCAMEL_TRY { new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element)); @@ -2439,25 +2528,25 @@ private: MOODYCAMEL_RETHROW; } } - + // Insert the new block into the index idxEntry->value.store(newBlock, std::memory_order_relaxed); - + this->tailBlock = newBlock; - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) { + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } } - + // Enqueue new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); - + this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - + template<typename U> bool dequeue(U& element) { @@ -2466,22 +2555,21 @@ private: index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { std::atomic_thread_fence(std::memory_order_acquire); - + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - assert(overcommit <= myDequeueCount); tail = this->tailIndex.load(std::memory_order_acquire); - if (details::likely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) { + if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) { index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - + // Determine which block the element is in auto entry = get_block_index_entry_for_index(index); - + // Dequeue auto block = entry->value.load(std::memory_order_relaxed); auto& el = *((*block)[index]); - + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX // Note: Acquiring the mutex with every dequeue instead of only when a block // is released is very sub-optimal, but it is, after all, purely debug code. debug::DebugLock lock(producer->mutex); @@ -2491,7 +2579,7 @@ private: index_t index; BlockIndexEntry* entry; ConcurrentQueue* parent; - + ~Guard() { (*block)[index]->~T(); @@ -2501,16 +2589,16 @@ private: } } } guard = { block, index, entry, this->parent }; - - element = std::move(el); + + element = std::move(el); // NOLINT } else { - element = std::move(el); - el.~T(); - + element = std::move(el); // NOLINT + el.~T(); // NOLINT + if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) { { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); #endif // Add the block back into the global free pool (and remove from block index) @@ -2519,45 +2607,49 @@ private: this->parent->add_block_to_free_list(block); // releases the above store } } - + return true; } else { this->dequeueOvercommit.fetch_add(1, std::memory_order_release); } } - + return false; } - + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4706) // assignment within conditional expression +#endif template<AllocationMode allocMode, typename It> bool enqueue_bulk(It itemFirst, size_t count) { // First, we need to make sure we have enough room to enqueue all of the elements; // this means pre-allocating blocks and putting them in the block index (but only if // all the allocations succeeded). - + // Note that the tailBlock we start off with may not be owned by us any more; // this happens if it was filled up exactly to the top (setting tailIndex to // the first index of the next block which is not yet allocated), then dequeued // completely (putting it on the free list) before we enqueue again. - + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); auto startBlock = this->tailBlock; Block* firstAllocatedBlock = nullptr; auto endBlock = this->tailBlock; - + // Figure out how many blocks we'll need to allocate, and do so size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)); index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); if (blockBaseDiff > 0) { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); #endif do { blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - + // Find out where we'll be inserting this block in the block index BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell Block* newBlock; @@ -2565,6 +2657,7 @@ private: auto head = this->headIndex.load(std::memory_order_relaxed); assert(!details::circular_less_than<index_t>(currentTailIndex, head)); bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) { // Index allocation or block allocation failed; revert any other allocations // and index insertions done so far for this operation @@ -2581,19 +2674,19 @@ private: } this->parent->add_blocks_to_free_list(firstAllocatedBlock); this->tailBlock = startBlock; - + return false; } - -#if MCDBGQ_TRACKMEM + +#ifdef MCDBGQ_TRACKMEM newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>(); newBlock->next = nullptr; - + // Insert the new block into the index idxEntry->value.store(newBlock, std::memory_order_relaxed); - + // Store the chain of blocks so that we can undo if later allocations fail, // and so that we can find the blocks when we do the actual enqueueing if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { @@ -2605,7 +2698,7 @@ private: firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; } while (blockBaseDiff > 0); } - + // Enqueue, one block at a time index_t newTailIndex = startTailIndex + static_cast<index_t>(count); currentTailIndex = startTailIndex; @@ -2615,11 +2708,11 @@ private: this->tailBlock = firstAllocatedBlock; } while (true) { - auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); + index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) { stopIndex = newTailIndex; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) { while (currentTailIndex != stopIndex) { new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); } @@ -2627,7 +2720,7 @@ private: else { MOODYCAMEL_TRY { while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); ++currentTailIndex; ++itemFirst; } @@ -2635,7 +2728,7 @@ private: MOODYCAMEL_CATCH (...) { auto constructedStopIndex = currentTailIndex; auto lastBlockEnqueued = this->tailBlock; - + if (!details::is_trivially_destructible<T>::value) { auto block = startBlock; if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { @@ -2656,7 +2749,7 @@ private: block = block->next; } } - + currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { currentTailIndex += static_cast<index_t>(BLOCK_SIZE); @@ -2669,7 +2762,7 @@ private: MOODYCAMEL_RETHROW; } } - + if (this->tailBlock == endBlock) { assert(currentTailIndex == newTailIndex); break; @@ -2679,7 +2772,10 @@ private: this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } - +#ifdef _MSC_VER +#pragma warning(pop) +#endif + template<typename It> size_t dequeue_bulk(It& itemFirst, size_t max) { @@ -2689,10 +2785,9 @@ private: if (details::circular_less_than<size_t>(0, desiredCount)) { desiredCount = desiredCount < max ? desiredCount : max; std::atomic_thread_fence(std::memory_order_acquire); - + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); - assert(overcommit <= myDequeueCount); - + tail = this->tailIndex.load(std::memory_order_acquire); auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit)); if (details::circular_less_than<size_t>(0, actualCount)) { @@ -2700,20 +2795,20 @@ private: if (actualCount < desiredCount) { this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); } - + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this // will never exceed tail. auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - + // Iterate the blocks and dequeue auto index = firstIndex; BlockIndexHeader* localBlockIndex; auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); do { auto blockStartIndex = index; - auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); + index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; - + auto entry = localBlockIndex->index[indexIndex]; auto block = entry->value.load(std::memory_order_relaxed); if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { @@ -2741,27 +2836,27 @@ private: while (index != endIndex) { (*block)[index++]->~T(); } - + if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); #endif entry->value.store(nullptr, std::memory_order_relaxed); this->parent->add_block_to_free_list(block); } indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); - + blockStartIndex = index; endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE); endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex; } while (index != firstIndex + actualCount); - + MOODYCAMEL_RETHROW; } } if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) { { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); #endif // Note that the set_many_empty above did a release, meaning that anybody who acquires the block @@ -2772,27 +2867,27 @@ private: } indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); } while (index != firstIndex + actualCount); - + return actualCount; } else { this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); } } - + return 0; } - + private: // The block size must be > 1, so any number with the low bit set is an invalid block base index static const index_t INVALID_BLOCK_BASE = 1; - + struct BlockIndexEntry { std::atomic<index_t> key; std::atomic<Block*> value; }; - + struct BlockIndexHeader { size_t capacity; @@ -2801,7 +2896,7 @@ private: BlockIndexEntry** index; BlockIndexHeader* prev; }; - + template<AllocationMode allocMode> inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) { @@ -2809,45 +2904,50 @@ private: if (localBlockIndex == nullptr) { return false; // this can happen if new_block_index failed in the constructor } - auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); idxEntry = localBlockIndex->index[newTail]; if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || idxEntry->value.load(std::memory_order_relaxed) == nullptr) { - + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); localBlockIndex->tail.store(newTail, std::memory_order_release); return true; } - + // No room in the old block index, try to allocate another one! - if (allocMode == CannotAlloc || !new_block_index()) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { return false; } - localBlockIndex = blockIndex.load(std::memory_order_relaxed); - newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); - idxEntry = localBlockIndex->index[newTail]; - assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); - idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); - localBlockIndex->tail.store(newTail, std::memory_order_release); - return true; + else if (!new_block_index()) { + return false; + } + else { + localBlockIndex = blockIndex.load(std::memory_order_relaxed); + newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } } - + inline void rewind_block_index_tail() { auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); } - + inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const { BlockIndexHeader* localBlockIndex; auto idx = get_block_index_index_for_index(index, localBlockIndex); return localBlockIndex->index[idx]; } - + inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX debug::DebugLock lock(mutex); #endif index &= ~static_cast<index_t>(BLOCK_SIZE - 1); @@ -2857,12 +2957,12 @@ private: assert(tailBase != INVALID_BLOCK_BASE); // Note: Must use division instead of shift because the index may wrap around, causing a negative // offset, whose negativity we want to preserve - auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE); + auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE)); size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); return idx; } - + bool new_block_index() { auto prev = blockIndex.load(std::memory_order_relaxed); @@ -2875,7 +2975,7 @@ private: if (raw == nullptr) { return false; } - + auto header = new (raw) BlockIndexHeader; auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader))); auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount)); @@ -2899,14 +2999,14 @@ private: header->index = index; header->capacity = nextBlockIndexCapacity; header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); - + blockIndex.store(header, std::memory_order_release); - + nextBlockIndexCapacity <<= 1; - + return true; } - + private: size_t nextBlockIndexCapacity; std::atomic<BlockIndexHeader*> blockIndex; @@ -2916,26 +3016,26 @@ private: details::ThreadExitListener threadExitListener; private: #endif - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG public: ImplicitProducer* nextImplicitProducer; private: #endif -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX mutable debug::DebugMutex mutex; #endif -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM friend struct MemStats; #endif }; - - + + ////////////////////////////////// // Block pool manipulation ////////////////////////////////// - + void populate_initial_block_list(size_t blockCount) { initialBlockPoolSize = blockCount; @@ -2943,7 +3043,7 @@ private: initialBlockPool = nullptr; return; } - + initialBlockPool = create_array<Block>(blockCount); if (initialBlockPool == nullptr) { initialBlockPoolSize = 0; @@ -2952,26 +3052,31 @@ private: initialBlockPool[i].dynamicallyAllocated = false; } } - + inline Block* try_get_block_from_initial_pool() { if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { return nullptr; } - + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); - + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; } - + inline void add_block_to_free_list(Block* block) { -#if MCDBGQ_TRACKMEM +#ifdef MCDBGQ_TRACKMEM block->owner = nullptr; #endif - freeList.add(block); + if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) { + destroy(block); + } + else { + freeList.add(block); + } } - + inline void add_blocks_to_free_list(Block* block) { while (block != nullptr) { @@ -2980,12 +3085,12 @@ private: block = next; } } - + inline Block* try_get_block_from_free_list() { return freeList.try_get(); } - + // Gets a free block from one of the memory pools, or allocates a new one (if applicable) template<AllocationMode canAlloc> Block* requisition_block() @@ -2994,21 +3099,22 @@ private: if (block != nullptr) { return block; } - + block = try_get_block_from_free_list(); if (block != nullptr) { return block; } - - if (canAlloc == CanAlloc) { + + MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) { return create<Block>(); } - - return nullptr; + else { + return nullptr; + } } - -#if MCDBGQ_TRACKMEM + +#ifdef MCDBGQ_TRACKMEM public: struct MemStats { size_t allocatedBlocks; @@ -3023,28 +3129,28 @@ private: size_t queueClassBytes; size_t implicitBlockIndexBytes; size_t explicitBlockIndexBytes; - + friend class ConcurrentQueue; - + private: static MemStats getFor(ConcurrentQueue* q) { MemStats stats = { 0 }; - + stats.elementsEnqueued = q->size_approx(); - + auto block = q->freeList.head_unsafe(); while (block != nullptr) { ++stats.allocatedBlocks; ++stats.freeBlocks; block = block->freeListNext.load(std::memory_order_relaxed); } - + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr; stats.implicitProducers += implicit ? 1 : 0; stats.explicitProducers += implicit ? 0 : 1; - + if (implicit) { auto prod = static_cast<ImplicitProducer*>(ptr); stats.queueClassBytes += sizeof(ImplicitProducer); @@ -3092,18 +3198,18 @@ private: } } } - + auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed); stats.allocatedBlocks += freeOnInitialPool; stats.freeBlocks += freeOnInitialPool; - + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; stats.queueClassBytes += sizeof(ConcurrentQueue); - + return stats; } }; - + // For debugging only. Not thread-safe. MemStats getMemStats() { @@ -3112,21 +3218,15 @@ private: private: friend struct MemStats; #endif - - + + ////////////////////////////////// // Producer list manipulation - ////////////////////////////////// - + ////////////////////////////////// + ProducerBase* recycle_or_create_producer(bool isExplicit) { - bool recycled; - return recycle_or_create_producer(isExplicit, recycled); - } - - ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled) - { -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugLock lock(implicitProdMutex); #endif // Try to re-use one first @@ -3135,31 +3235,29 @@ private: bool expected = true; if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) { // We caught one! It's been marked as activated, the caller can have it - recycled = true; return ptr; } } } - - recycled = false; + return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this)); } - + ProducerBase* add_producer(ProducerBase* producer) { // Handle failed memory allocation if (producer == nullptr) { return nullptr; } - + producerCount.fetch_add(1, std::memory_order_relaxed); - + // Add it to the lock-free list auto prevTail = producerListTail.load(std::memory_order_relaxed); do { producer->next = prevTail; } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG if (producer->isExplicit) { auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); @@ -3174,10 +3272,10 @@ private: } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed)); } #endif - + return producer; } - + void reown_producers() { // After another instance is moved-into/swapped-with this one, all the @@ -3187,31 +3285,31 @@ private: ptr->parent = this; } } - - + + ////////////////////////////////// // Implicit producer hash ////////////////////////////////// - + struct ImplicitProducerKVP { std::atomic<details::thread_id_t> key; ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place - + ImplicitProducerKVP() : value(nullptr) { } - + ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT { key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); value = other.value; } - + inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT { swap(other); return *this; } - + inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT { if (this != &other) { @@ -3220,93 +3318,100 @@ private: } } }; - + template<typename XT, typename XTraits> friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; - + struct ImplicitProducerHash { size_t capacity; ImplicitProducerKVP* entries; ImplicitProducerHash* prev; }; - + inline void populate_initial_implicit_producer_hash() { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return; - - implicitProducerHashCount.store(0, std::memory_order_relaxed); - auto hash = &initialImplicitProducerHash; - hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; - hash->entries = &initialImplicitProducerHashEntries[0]; - for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { - initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); } - hash->prev = nullptr; - implicitProducerHash.store(hash, std::memory_order_relaxed); } - + void swap_implicit_producer_hashes(ConcurrentQueue& other) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return; - - // Swap (assumes our implicit producer hash is initialized) - initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); - initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; - other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; - - details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); - - details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); - if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { - implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; } else { - ImplicitProducerHash* hash; - for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { - continue; + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); } - hash->prev = &initialImplicitProducerHash; - } - if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { - other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); - } - else { - ImplicitProducerHash* hash; - for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { - continue; + else { + ImplicitProducerHash* hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; } - hash->prev = &other.initialImplicitProducerHash; } } - + // Only fails (returns nullptr) if memory allocation fails ImplicitProducer* get_or_add_implicit_producer() { // Note that since the data is essentially thread-local (key is thread ID), // there's a reduced need for fences (memory ordering is already consistent // for any individual thread), except for the current table itself. - + // Start by looking for the thread ID in the current and all previous hash tables. // If it's not found, it must not be in there yet, since this same thread would // have added it previously to one of the tables that we traversed. - + // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table - -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugLock lock(implicitProdMutex); #endif - + auto id = details::thread_id(); auto hashedId = details::hash_thread_id(id); - + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { // Look for the id in this hash auto index = hashedId; while (true) { // Not an infinite loop because at least one slot is free in the hash table - index &= hash->capacity - 1; - + index &= hash->capacity - 1u; + auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); if (probedKey == id) { // Found it! If we had to search several hashes deep, though, we should lazily add it @@ -3318,15 +3423,14 @@ private: if (hash != mainHash) { index = hashedId; while (true) { - index &= mainHash->capacity - 1; - probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); + index &= mainHash->capacity - 1u; auto empty = details::invalid_thread_id; #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED auto reusable = details::invalid_thread_id2; - if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || - (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed) || + mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { #else - if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { #endif mainHash->entries[index].value = value; break; @@ -3334,7 +3438,7 @@ private: ++index; } } - + return value; } if (probedKey == details::invalid_thread_id) { @@ -3343,10 +3447,11 @@ private: ++index; } } - + // Insert! auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); while (true) { + // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) { // We've acquired the resize lock, try to allocate a bigger hash table. // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when @@ -3354,20 +3459,20 @@ private: // locked block). mainHash = implicitProducerHash.load(std::memory_order_acquire); if (newCount >= (mainHash->capacity >> 1)) { - auto newCapacity = mainHash->capacity << 1; + size_t newCapacity = mainHash->capacity << 1; while (newCount >= (newCapacity >> 1)) { newCapacity <<= 1; } auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); if (raw == nullptr) { // Allocation failed - implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed); + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); return nullptr; } - + auto newHash = new (raw) ImplicitProducerHash; - newHash->capacity = newCapacity; + newHash->capacity = static_cast<size_t>(newCapacity); newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash))); for (size_t i = 0; i != newCapacity; ++i) { new (newHash->entries + i) ImplicitProducerKVP; @@ -3382,40 +3487,36 @@ private: implicitProducerHashResizeInProgress.clear(std::memory_order_release); } } - + // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table // to finish being allocated by another thread (and if we just finished allocating above, the condition will // always be true) if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { - bool recycled; - auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled)); + auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false)); if (producer == nullptr) { - implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed); + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); return nullptr; } - if (recycled) { - implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed); - } - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; producer->threadExitListener.userData = producer; details::ThreadExitNotifier::subscribe(&producer->threadExitListener); #endif - + auto index = hashedId; while (true) { - index &= mainHash->capacity - 1; - auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); - + index &= mainHash->capacity - 1u; auto empty = details::invalid_thread_id; #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED auto reusable = details::invalid_thread_id2; - if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || - (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { -#else - if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { + if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); // already counted as a used slot + mainHash->entries[index].value = producer; + break; + } #endif + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { mainHash->entries[index].value = producer; break; } @@ -3423,22 +3524,19 @@ private: } return producer; } - + // Hmm, the old hash is quite full and somebody else is busy allocating a new one. // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, // we try to allocate ourselves). mainHash = implicitProducerHash.load(std::memory_order_acquire); } } - + #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED void implicit_producer_thread_exited(ImplicitProducer* producer) { - // Remove from thread exit listeners - details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener); - // Remove from hash -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugLock lock(implicitProdMutex); #endif auto hash = implicitProducerHash.load(std::memory_order_acquire); @@ -3446,26 +3544,25 @@ private: auto id = details::thread_id(); auto hashedId = details::hash_thread_id(id); details::thread_id_t probedKey; - + // We need to traverse all the hashes just in case other threads aren't on the current one yet and are // trying to add an entry thinking there's a free slot (because they reused a producer) for (; hash != nullptr; hash = hash->prev) { auto index = hashedId; do { - index &= hash->capacity - 1; - probedKey = hash->entries[index].key.load(std::memory_order_relaxed); - if (probedKey == id) { - hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release); + index &= hash->capacity - 1u; + probedKey = id; + if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) { break; } ++index; } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place } - + // Mark the queue as being recyclable producer->inactive.store(true, std::memory_order_release); } - + static void implicit_producer_thread_exited_callback(void* userData) { auto producer = static_cast<ImplicitProducer*>(userData); @@ -3473,88 +3570,109 @@ private: queue->implicit_producer_thread_exited(producer); } #endif - + ////////////////////////////////// // Utility functions ////////////////////////////////// - + + template<typename TAlign> + static inline void* aligned_malloc(size_t size) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value) + return (Traits::malloc)(size); + else { + size_t alignment = std::alignment_of<TAlign>::value; + void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); + if (!raw) + return nullptr; + char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*)); + *(reinterpret_cast<void**>(ptr) - 1) = raw; + return ptr; + } + } + + template<typename TAlign> + static inline void aligned_free(void* ptr) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value) + return (Traits::free)(ptr); + else + (Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr); + } + template<typename U> static inline U* create_array(size_t count) { assert(count > 0); - auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count)); - if (p == nullptr) { + U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count)); + if (p == nullptr) return nullptr; - } - - for (size_t i = 0; i != count; ++i) { + + for (size_t i = 0; i != count; ++i) new (p + i) U(); - } return p; } - + template<typename U> static inline void destroy_array(U* p, size_t count) { if (p != nullptr) { assert(count > 0); - for (size_t i = count; i != 0; ) { + for (size_t i = count; i != 0; ) (p + --i)->~U(); - } - (Traits::free)(p); } + aligned_free<U>(p); } - + template<typename U> static inline U* create() { - auto p = (Traits::malloc)(sizeof(U)); + void* p = aligned_malloc<U>(sizeof(U)); return p != nullptr ? new (p) U : nullptr; } - + template<typename U, typename A1> static inline U* create(A1&& a1) { - auto p = (Traits::malloc)(sizeof(U)); + void* p = aligned_malloc<U>(sizeof(U)); return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr; } - + template<typename U> static inline void destroy(U* p) { - if (p != nullptr) { + if (p != nullptr) p->~U(); - } - (Traits::free)(p); + aligned_free<U>(p); } private: std::atomic<ProducerBase*> producerListTail; std::atomic<std::uint32_t> producerCount; - + std::atomic<size_t> initialBlockPoolIndex; Block* initialBlockPool; size_t initialBlockPoolSize; - -#if !MCDBGQ_USEDEBUGFREELIST + +#ifndef MCDBGQ_USEDEBUGFREELIST FreeList<Block> freeList; #else debug::DebugFreeList<Block> freeList; #endif - + std::atomic<ImplicitProducerHash*> implicitProducerHash; std::atomic<size_t> implicitProducerHashCount; // Number of slots logically used ImplicitProducerHash initialImplicitProducerHash; std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries; std::atomic_flag implicitProducerHashResizeInProgress; - + std::atomic<std::uint32_t> nextExplicitConsumerId; std::atomic<std::uint32_t> globalExplicitConsumerOffset; - -#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH debug::DebugMutex implicitProdMutex; #endif - + #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG std::atomic<ExplicitProducer*> explicitProducers; std::atomic<ImplicitProducer*> implicitProducers; @@ -3585,7 +3703,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue) : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) { initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = -1; + lastKnownGlobalOffset = static_cast<std::uint32_t>(-1); } template<typename T, typename Traits> @@ -3593,7 +3711,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue) : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) { initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = -1; + lastKnownGlobalOffset = static_cast<std::uint32_t>(-1); } template<typename T, typename Traits> @@ -3620,6 +3738,10 @@ inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, ty } -#if defined(__GNUC__) +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) #pragma GCC diagnostic pop #endif
