This is an automated email from the ASF dual-hosted git repository.

adebreceni pushed a commit to branch minifi-api-reduced
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit bf1134e5a1e762211fe99ec33b425abfcce32cfb
Author: Adam Debreceni <[email protected]>
AuthorDate: Tue Jun 24 11:45:55 2025 +0200

    Review changes, upgrade concurrentqueue
---
 core-framework/include/core/ProcessorImpl.h    |    6 +-
 core-framework/src/core/ProcessorImpl.cpp      |    4 +-
 core-framework/src/io/OutputStream.cpp         |   12 +-
 extensions/aws/processors/AwsProcessor.h       |    3 +-
 extensions/aws/processors/DeleteS3Object.h     |    2 +-
 extensions/aws/processors/FetchS3Object.h      |    2 +-
 extensions/aws/processors/ListS3.h             |    2 +-
 extensions/aws/processors/PutKinesisStream.h   |    2 +-
 extensions/aws/processors/PutS3Object.h        |    2 +-
 extensions/aws/processors/S3Processor.cpp      |    2 +-
 extensions/aws/processors/S3Processor.h        |    1 +
 extensions/aws/tests/PutKinesisStreamTests.cpp |    2 +-
 extensions/python/ExecutePythonProcessor.cpp   |    1 -
 thirdparty/concurrentqueue/concurrentqueue.h   | 1370 +++++++++++++-----------
 14 files changed, 764 insertions(+), 647 deletions(-)

diff --git a/core-framework/include/core/ProcessorImpl.h 
b/core-framework/include/core/ProcessorImpl.h
index 6d51b70ff..6b14e5e3d 100644
--- a/core-framework/include/core/ProcessorImpl.h
+++ b/core-framework/include/core/ProcessorImpl.h
@@ -77,8 +77,10 @@ class ProcessorImpl : public virtual ProcessorApi {
  public:
   explicit ProcessorImpl(ProcessorMetadata info);
 
-  ProcessorImpl(const ProcessorImpl& parent) = delete;
-  ProcessorImpl& operator=(const ProcessorImpl& parent) = delete;
+  ProcessorImpl(const ProcessorImpl&) = delete;
+  ProcessorImpl(ProcessorImpl&&) = delete;
+  ProcessorImpl& operator=(const ProcessorImpl&) = delete;
+  ProcessorImpl& operator=(ProcessorImpl&&) = delete;
 
   ~ProcessorImpl() override;
 
diff --git a/core-framework/src/core/ProcessorImpl.cpp 
b/core-framework/src/core/ProcessorImpl.cpp
index 460ba47ff..68f5885bd 100644
--- a/core-framework/src/core/ProcessorImpl.cpp
+++ b/core-framework/src/core/ProcessorImpl.cpp
@@ -45,10 +45,10 @@ using namespace std::literals::chrono_literals;
 namespace org::apache::nifi::minifi::core {
 
 ProcessorImpl::ProcessorImpl(ProcessorMetadata info)
-    : info_(info),
+    : info_(std::move(info)),
       trigger_when_empty_(false),
       metrics_(std::make_shared<ProcessorMetricsImpl>(*this)),
-      logger_(info.logger) {
+      logger_(info_.logger) {
   logger_->log_debug("Processor {} created with uuid {}", getName(), 
getUUIDStr());
 }
 
diff --git a/core-framework/src/io/OutputStream.cpp 
b/core-framework/src/io/OutputStream.cpp
index c19fff602..1b48912c8 100644
--- a/core-framework/src/io/OutputStream.cpp
+++ b/core-framework/src/io/OutputStream.cpp
@@ -24,11 +24,7 @@
 #include "io/OutputStream.h"
 #include "utils/gsl.h"
 
-namespace org {
-namespace apache {
-namespace nifi {
-namespace minifi {
-namespace io {
+namespace org::apache::nifi::minifi::io {
 
 size_t OutputStream::write(const std::vector<uint8_t>& buffer, size_t len) {
   if (buffer.size() < len) {
@@ -77,8 +73,4 @@ size_t OutputStream::write_str(const char* str, uint32_t len, 
bool widen) {
   return ret + write(reinterpret_cast<const uint8_t *>(str), len);
 }
 
-} /* namespace io */
-} /* namespace minifi */
-} /* namespace nifi */
-} /* namespace apache */
-} /* namespace org */
+}  // namespace org::apache::nifi::minifi::io
diff --git a/extensions/aws/processors/AwsProcessor.h 
b/extensions/aws/processors/AwsProcessor.h
index 542d0c75b..f8ed99608 100644
--- a/extensions/aws/processors/AwsProcessor.h
+++ b/extensions/aws/processors/AwsProcessor.h
@@ -94,7 +94,7 @@ struct CommonProperties {
   std::string endpoint_override_url;
 };
 
-class AwsProcessor : public core::ProcessorImpl {
+class AwsProcessor : public core::ProcessorImpl {  // 
NOLINT(cppcoreguidelines-special-member-functions)
  public:
   EXTENSIONAPI static constexpr auto AccessKey = 
core::PropertyDefinitionBuilder<>::createProperty("Access Key")
       .withDescription("AWS account access key")
@@ -170,6 +170,7 @@ class AwsProcessor : public core::ProcessorImpl {
   });
 
   using ProcessorImpl::ProcessorImpl;
+  ~AwsProcessor() override = default;
 
   void onSchedule(core::ProcessContext& context, core::ProcessSessionFactory& 
session_factory) override;
 
diff --git a/extensions/aws/processors/DeleteS3Object.h 
b/extensions/aws/processors/DeleteS3Object.h
index 6a2047755..f723c58fd 100644
--- a/extensions/aws/processors/DeleteS3Object.h
+++ b/extensions/aws/processors/DeleteS3Object.h
@@ -35,7 +35,7 @@ class FlowProcessorS3TestsFixture;
 
 namespace org::apache::nifi::minifi::aws::processors {
 
-class DeleteS3Object : public S3Processor {
+class DeleteS3Object : public S3Processor {  // 
NOLINT(cppcoreguidelines-special-member-functions)
  public:
   EXTENSIONAPI static constexpr const char* Description = "Deletes FlowFiles 
on an Amazon S3 Bucket. If attempting to delete a file that does not exist, 
FlowFile is routed to success.";
 
diff --git a/extensions/aws/processors/FetchS3Object.h 
b/extensions/aws/processors/FetchS3Object.h
index fa5fd182f..19d9529bf 100644
--- a/extensions/aws/processors/FetchS3Object.h
+++ b/extensions/aws/processors/FetchS3Object.h
@@ -37,7 +37,7 @@ class FlowProcessorS3TestsFixture;
 
 namespace org::apache::nifi::minifi::aws::processors {
 
-class FetchS3Object : public S3Processor {
+class FetchS3Object : public S3Processor {  // 
NOLINT(cppcoreguidelines-special-member-functions)
  public:
   EXTENSIONAPI static constexpr const char* Description = "This Processor 
retrieves the contents of an S3 Object and writes it to the content of a 
FlowFile.";
 
diff --git a/extensions/aws/processors/ListS3.h 
b/extensions/aws/processors/ListS3.h
index b3e457952..4c9d64fb1 100644
--- a/extensions/aws/processors/ListS3.h
+++ b/extensions/aws/processors/ListS3.h
@@ -32,7 +32,7 @@
 
 namespace org::apache::nifi::minifi::aws::processors {
 
-class ListS3 : public S3Processor {
+class ListS3 : public S3Processor {  // 
NOLINT(cppcoreguidelines-special-member-functions)
  public:
   EXTENSIONAPI static constexpr const char* Description = "This Processor 
retrieves a listing of objects from an Amazon S3 bucket.";
 
diff --git a/extensions/aws/processors/PutKinesisStream.h 
b/extensions/aws/processors/PutKinesisStream.h
index 73d308dcf..458d52907 100644
--- a/extensions/aws/processors/PutKinesisStream.h
+++ b/extensions/aws/processors/PutKinesisStream.h
@@ -33,7 +33,7 @@
 
 namespace org::apache::nifi::minifi::aws::processors {
 
-class PutKinesisStream : public AwsProcessor {
+class PutKinesisStream : public AwsProcessor {  // 
NOLINT(cppcoreguidelines-special-member-functions)
  public:
   EXTENSIONAPI static constexpr const char* Description = "Sends the contents 
to a specified Amazon Kinesis. In order to send data to Kinesis, the stream 
name has to be specified.";
 
diff --git a/extensions/aws/processors/PutS3Object.h 
b/extensions/aws/processors/PutS3Object.h
index 0a5cbfe4e..03647b5da 100644
--- a/extensions/aws/processors/PutS3Object.h
+++ b/extensions/aws/processors/PutS3Object.h
@@ -43,7 +43,7 @@ class FlowProcessorS3TestsFixture;
 
 namespace org::apache::nifi::minifi::aws::processors {
 
-class PutS3Object : public S3Processor {
+class PutS3Object : public S3Processor {  // 
NOLINT(cppcoreguidelines-special-member-functions)
  public:
   static constexpr auto CANNED_ACLS = 
minifi::utils::getKeys(minifi::aws::s3::CANNED_ACL_MAP);
   static constexpr auto STORAGE_CLASSES = 
minifi::utils::getKeys(minifi::aws::s3::STORAGE_CLASS_MAP);
diff --git a/extensions/aws/processors/S3Processor.cpp 
b/extensions/aws/processors/S3Processor.cpp
index 6bb6402f4..532bb6012 100644
--- a/extensions/aws/processors/S3Processor.cpp
+++ b/extensions/aws/processors/S3Processor.cpp
@@ -33,7 +33,7 @@
 namespace org::apache::nifi::minifi::aws::processors {
 
 S3Processor::S3Processor(core::ProcessorMetadata info, 
std::unique_ptr<aws::s3::S3RequestSender> s3_request_sender)
-  : AwsProcessor(info),
+  : AwsProcessor(std::move(info)),
     s3_wrapper_(std::move(s3_request_sender)) {
 }
 
diff --git a/extensions/aws/processors/S3Processor.h 
b/extensions/aws/processors/S3Processor.h
index cdcee4c4c..a5f966b5d 100644
--- a/extensions/aws/processors/S3Processor.h
+++ b/extensions/aws/processors/S3Processor.h
@@ -51,6 +51,7 @@ class S3Processor : public AwsProcessor {
   EXTENSIONAPI static constexpr auto Properties = 
minifi::utils::array_cat(AwsProcessor::Properties, 
std::to_array<core::PropertyReference>({Bucket}));
 
   using AwsProcessor::AwsProcessor;
+  ~S3Processor() override = default;
 
   void onSchedule(core::ProcessContext& context, core::ProcessSessionFactory& 
session_factory) override;
 
diff --git a/extensions/aws/tests/PutKinesisStreamTests.cpp 
b/extensions/aws/tests/PutKinesisStreamTests.cpp
index b6532b5b3..3a3d4c105 100644
--- a/extensions/aws/tests/PutKinesisStreamTests.cpp
+++ b/extensions/aws/tests/PutKinesisStreamTests.cpp
@@ -83,7 +83,7 @@ class MockKinesisClient final : public 
Aws::Kinesis::KinesisClient {
   mutable uint32_t sequence_number_ = 0;
 };
 
-class PutKinesisStreamMocked final : public aws::processors::PutKinesisStream {
+class PutKinesisStreamMocked final : public aws::processors::PutKinesisStream 
{  // NOLINT(cppcoreguidelines-special-member-functions)
  public:
   static constexpr const char* Description = "PutKinesisStreamMocked";
 
diff --git a/extensions/python/ExecutePythonProcessor.cpp 
b/extensions/python/ExecutePythonProcessor.cpp
index 7daec15c1..48a58ad6f 100644
--- a/extensions/python/ExecutePythonProcessor.cpp
+++ b/extensions/python/ExecutePythonProcessor.cpp
@@ -142,7 +142,6 @@ void ExecutePythonProcessor::loadScript() {
     }
     loadScriptFromFile();
     last_script_write_time_ = utils::file::last_write_time(script_file_path_);
-    return;
   }
 }
 
diff --git a/thirdparty/concurrentqueue/concurrentqueue.h 
b/thirdparty/concurrentqueue/concurrentqueue.h
index db852d12c..b9c0bc7a8 100644
--- a/thirdparty/concurrentqueue/concurrentqueue.h
+++ b/thirdparty/concurrentqueue/concurrentqueue.h
@@ -5,7 +5,7 @@
 //    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
 
 // Simplified BSD license:
-// Copyright (c) 2013-2016, Cameron Desrochers.
+// Copyright (c) 2013-2020, Cameron Desrochers.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without 
modification,
@@ -27,9 +27,11 @@
 // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
OF THIS SOFTWARE,
 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+
 #pragma once
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
 // Traits::index_t are set to < 32 bits, causing integer promotion, causing 
warnings
 // upon assigning any computed values)
@@ -41,6 +43,13 @@
 #endif
 #endif
 
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+// VS2019 with /W4 warns about constant conditional expressions but unless 
/std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable 
the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
 #if defined(__APPLE__)
 #include "TargetConditionals.h"
 #endif
@@ -68,6 +77,7 @@
 #include <climits>             // for CHAR_BIT
 #include <array>
 #include <thread>              // partly for __WINPTHREADS_VERSION if on 
MinGW-w64 w/ POSIX threading
+#include <mutex>        // used for thread exit synchronization
 
 // Platform-specific definitions of a numeric thread ID type and an invalid 
value
 namespace moodycamel { namespace details {
@@ -95,10 +105,10 @@ namespace moodycamel { namespace details {
        static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;      // Not 
technically guaranteed to be invalid, but is never used in practice. Note that 
all Win32 thread IDs are presently multiples of 4.
        static inline thread_id_t thread_id() { return 
static_cast<thread_id_t>(::GetCurrentThreadId()); }
 } }
-#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || 
(defined(__APPLE__) && TARGET_OS_IPHONE)
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || 
(defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) || 
defined(MOODYCAMEL_NO_THREAD_LOCAL)
 namespace moodycamel { namespace details {
        static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) 
== 8, "std::thread::id is expected to be either 4 or 8 bytes");
-       
+
        typedef std::thread::id thread_id_t;
        static const thread_id_t invalid_thread_id;         // Default ctor 
creates invalid ID
 
@@ -145,10 +155,21 @@ namespace moodycamel { namespace details {
        typedef std::uintptr_t thread_id_t;
        static const thread_id_t invalid_thread_id  = 0;                // 
Address can't be nullptr
        static const thread_id_t invalid_thread_id2 = 1;                // 
Member accesses off a null pointer are also generally invalid. Plus it's not 
aligned.
-       static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL 
int x; return reinterpret_cast<thread_id_t>(&x); }
+       inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; 
return reinterpret_cast<thread_id_t>(&x); }
 } }
 #endif
 
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 
201402L
+#define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
 // Exceptions
 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && 
defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
@@ -161,8 +182,8 @@ namespace moodycamel { namespace details {
 #define MOODYCAMEL_RETHROW throw
 #define MOODYCAMEL_THROW(expr) throw (expr)
 #else
-#define MOODYCAMEL_TRY if (true)
-#define MOODYCAMEL_CATCH(...) else if (false)
+#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
 #define MOODYCAMEL_RETHROW
 #define MOODYCAMEL_THROW(expr)
 #endif
@@ -196,14 +217,14 @@ namespace moodycamel { namespace details {
 // VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has 
a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
 // g++ <=4.7 doesn't support thread_local either.
 // Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it 
to compile but it's unconfirmed to actually work
-#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && 
!defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && 
(!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) 
&& (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && 
!defined(_M_ARM) && !defined(__aarch64__)
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && 
!defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && 
(!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) 
&& (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && 
!defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__)
 // Assume `thread_local` is fully supported in all other C++11 
compilers/platforms
-//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // always disabled for 
now since several users report having problems with it on
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // tentatively enabled for 
now; years ago several users report having problems with it on
 #endif
 #endif
 #endif
 
-// VS2012 doesn't support deleted functions. 
+// VS2012 doesn't support deleted functions.
 // In this case, we declare the function normally but don't define it. A link 
error will be generated if the function is called.
 #ifndef MOODYCAMEL_DELETE_FUNCTION
 #if defined(_MSC_VER) && _MSC_VER < 1800
@@ -213,14 +234,52 @@ namespace moodycamel { namespace details {
 #endif
 #endif
 
+namespace moodycamel { namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant 
literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename 
details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+       template<int Align, typename T> struct Vs2013Aligned { };  // default, 
unsupported alignment
+       template<typename T> struct Vs2013Aligned<1, T> { typedef 
__declspec(align(1)) T type; };
+       template<typename T> struct Vs2013Aligned<2, T> { typedef 
__declspec(align(2)) T type; };
+       template<typename T> struct Vs2013Aligned<4, T> { typedef 
__declspec(align(4)) T type; };
+       template<typename T> struct Vs2013Aligned<8, T> { typedef 
__declspec(align(8)) T type; };
+       template<typename T> struct Vs2013Aligned<16, T> { typedef 
__declspec(align(16)) T type; };
+       template<typename T> struct Vs2013Aligned<32, T> { typedef 
__declspec(align(32)) T type; };
+       template<typename T> struct Vs2013Aligned<64, T> { typedef 
__declspec(align(64)) T type; };
+       template<typename T> struct Vs2013Aligned<128, T> { typedef 
__declspec(align(128)) T type; };
+       template<typename T> struct Vs2013Aligned<256, T> { typedef 
__declspec(align(256)) T type; };
+#else
+       template<typename T> struct identity { typedef T type; };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename 
details::identity<T>::type
+#endif
+#endif
+} }
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used 
from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See 
https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+ #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
 // Compiler-specific likely/unlikely hints
 namespace moodycamel { namespace details {
 #if defined(__GNUC__)
-       inline bool likely(bool x) { return __builtin_expect((x), true); }
-       inline bool unlikely(bool x) { return __builtin_expect((x), false); }
+       static inline bool (likely)(bool x) { return __builtin_expect((x), 
true); }
+       static inline bool (unlikely)(bool x) { return __builtin_expect((x), 
false); }
 #else
-       inline bool likely(bool x) { return x; }
-       inline bool unlikely(bool x) { return x; }
+       static inline bool (likely)(bool x) { return x; }
+       static inline bool (unlikely)(bool x) { return x; }
 #endif
 } }
 
@@ -238,11 +297,19 @@ namespace details {
                        : static_cast<T>(-1);
        };
 
-#if defined(__GNUC__) && !defined( __clang__ )
-       typedef ::max_align_t max_align_t;      // GCC forgot to add it to 
std:: for a while
+#if defined(__GLIBCXX__)
+       typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add 
it to std:: for a while
 #else
-       typedef std::max_align_t max_align_t;   // Others (e.g. MSVC) insist it 
can *only* be accessed via std::
+       typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) 
insist it can *only* be accessed via std::
 #endif
+
+       // Some platforms have incorrectly set max_align_t to a type with <8 
bytes alignment even while supporting
+       // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this 
with our own union. See issue #64.
+       typedef union {
+               std_max_align_t x;
+               long long y;
+               void* z;
+       } max_align_t;
 }
 
 // Default traits for the ConcurrentQueue. To change some of the
@@ -255,7 +322,7 @@ struct ConcurrentQueueDefaultTraits
 {
        // General-purpose size type. std::size_t is strongly recommended.
        typedef std::size_t size_t;
-       
+
        // The type used for the enqueue and dequeue indices. Must be at least 
as
        // large as size_t. Should be significantly larger than the number of 
elements
        // you expect to hold at once, especially if you have a high turnover 
rate;
@@ -267,47 +334,60 @@ struct ConcurrentQueueDefaultTraits
        // whether the queue is lock-free with a 64-int type depends on the 
whether
        // std::atomic<std::uint64_t> is lock-free, which is platform-specific.
        typedef std::size_t index_t;
-       
+
        // Internally, all elements are enqueued and dequeued from multi-element
        // blocks; this is the smallest controllable unit. If you expect few 
elements
        // but many producers, a smaller block size should be favoured. For few 
producers
        // and/or many elements, a larger block size is preferred. A sane 
default
        // is provided. Must be a power of 2.
        static const size_t BLOCK_SIZE = 32;
-       
+
        // For explicit producers (i.e. when using a producer token), the block 
is
        // checked for being empty by iterating through a list of flags, one 
per element.
        // For large block sizes, this is too inefficient, and switching to an 
atomic
        // counter-based approach is faster. The switch is made for block sizes 
strictly
        // larger than this threshold.
        static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
-       
+
        // How many full blocks can be expected for a single explicit producer? 
This should
        // reflect that number's maximum for optimal performance. Must be a 
power of 2.
        static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
-       
+
        // How many full blocks can be expected for a single implicit producer? 
This should
        // reflect that number's maximum for optimal performance. Must be a 
power of 2.
        static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
-       
+
        // The initial size of the hash table mapping thread IDs to implicit 
producers.
        // Note that the hash is resized every time it becomes half full.
        // Must be a power of two, and either 0 or at least 1. If 0, implicit 
production
        // (using the enqueue methods without an explicit producer token) is 
disabled.
        static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
-       
+
        // Controls the number of items that an explicit consumer (i.e. one 
with a token)
        // must consume before it causes all consumers to rotate and move on to 
the next
        // internal queue.
        static const std::uint32_t 
EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
-       
+
        // The maximum number of elements (inclusive) that can be enqueued to a 
sub-queue.
        // Enqueue operations that would cause this limit to be surpassed will 
fail. Note
        // that this limit is enforced at the block level (for performance 
reasons), i.e.
        // it's rounded up to the nearest block size.
        static const size_t MAX_SUBQUEUE_SIZE = 
details::const_numeric_max<size_t>::value;
-       
-       
+
+       // The number of times to spin before sleeping when waiting on a 
semaphore.
+       // Recommended values are on the order of 1000-10000 unless the number 
of
+       // consumer threads exceeds the number of idle cores (in which case try 
0-100).
+       // Only affects instances of the BlockingConcurrentQueue.
+       static const int MAX_SEMA_SPINS = 10000;
+
+       // Whether to recycle dynamically-allocated blocks into an internal 
free list or
+       // not. If false, only pre-allocated blocks (controlled by the 
constructor
+       // arguments) will be recycled, and all others will be `free`d back to 
the heap.
+       // Note that blocks consumed by explicit producers are only freed on 
destruction
+       // of the queue (not following destruction of the token) regardless of 
this trait.
+       static const bool RECYCLE_ALLOCATED_BLOCKS = false;
+
+
 #ifndef MCDBGQ_USE_RELACY
        // Memory allocation can be customized if needed.
        // malloc should return nullptr on failure, and handle alignment like 
std::malloc.
@@ -353,13 +433,13 @@ namespace details
                ConcurrentQueueProducerTypelessBase* next;
                std::atomic<bool> inactive;
                ProducerToken* token;
-               
+
                ConcurrentQueueProducerTypelessBase()
                        : next(nullptr), inactive(false), token(nullptr)
                {
                }
        };
-       
+
        template<bool use32> struct _hash_32_or_64 {
                static inline std::uint32_t hash(std::uint32_t h)
                {
@@ -385,28 +465,23 @@ namespace details
                }
        };
        template<std::size_t size> struct hash_32_or_64 : public 
_hash_32_or_64<(size > 4)> {  };
-       
+
        static inline size_t hash_thread_id(thread_id_t id)
        {
                static_assert(sizeof(thread_id_t) <= 8, "Expected a platform 
where thread IDs are at most 64-bit values");
                return 
static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
                        thread_id_converter<thread_id_t>::prehash(id)));
        }
-       
+
        template<typename T>
        static inline bool circular_less_than(T a, T b)
        {
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4554)
-#endif
                static_assert(std::is_integral<T>::value && 
!std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used 
only with unsigned integer types");
-               return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) 
<< static_cast<T>(sizeof(T) * CHAR_BIT - 1));
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+               return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) 
<< (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));
+               // Note: extra parens around rhs of operator<< is MSVC bug: 
https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
+               //       silencing the bug requires #pragma warning(disable: 
4554) around the calling code and has no effect when done here.
        }
-       
+
        template<typename U>
        static inline char* align_for(char* ptr)
        {
@@ -430,7 +505,7 @@ namespace details
                ++x;
                return x;
        }
-       
+
        template<typename T>
        static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& 
right)
        {
@@ -438,13 +513,13 @@ namespace details
                left.store(std::move(right.load(std::memory_order_relaxed)), 
std::memory_order_relaxed);
                right.store(std::move(temp), std::memory_order_relaxed);
        }
-       
+
        template<typename T>
        static inline T const& nomove(T const& x)
        {
                return x;
        }
-       
+
        template<bool Enable>
        struct nomove_if
        {
@@ -454,7 +529,7 @@ namespace details
                        return x;
                }
        };
-       
+
        template<>
        struct nomove_if<false>
        {
@@ -465,47 +540,56 @@ namespace details
                        return std::forward<U>(x);
                }
        };
-       
+
        template<typename It>
        static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> 
decltype(*it)
        {
                return *it;
        }
-       
+
 #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 
&& __GNUC_MINOR__ >= 8)
        template<typename T> struct is_trivially_destructible : 
std::is_trivially_destructible<T> { };
 #else
        template<typename T> struct is_trivially_destructible : 
std::has_trivial_destructor<T> { };
 #endif
-       
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 #ifdef MCDBGQ_USE_RELACY
        typedef RelacyThreadExitListener ThreadExitListener;
        typedef RelacyThreadExitNotifier ThreadExitNotifier;
 #else
+       class ThreadExitNotifier;
+
        struct ThreadExitListener
        {
                typedef void (*callback_t)(void*);
                callback_t callback;
                void* userData;
-               
+
                ThreadExitListener* next;               // reserved for use by 
the ThreadExitNotifier
+               ThreadExitNotifier* chain;              // reserved for use by 
the ThreadExitNotifier
        };
-       
-       
+
        class ThreadExitNotifier
        {
        public:
                static void subscribe(ThreadExitListener* listener)
                {
                        auto& tlsInst = instance();
+                       std::lock_guard<std::mutex> guard(mutex());
                        listener->next = tlsInst.tail;
+                       listener->chain = &tlsInst;
                        tlsInst.tail = listener;
                }
-               
+
                static void unsubscribe(ThreadExitListener* listener)
                {
-                       auto& tlsInst = instance();
+                       std::lock_guard<std::mutex> guard(mutex());
+                       if (!listener->chain) {
+                               return;  // race with ~ThreadExitNotifier
+                       }
+                       auto& tlsInst = *listener->chain;
+                       listener->chain = nullptr;
                        ThreadExitListener** prev = &tlsInst.tail;
                        for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = 
ptr->next) {
                                if (ptr == listener) {
@@ -515,34 +599,43 @@ namespace details
                                prev = &ptr->next;
                        }
                }
-               
+
        private:
                ThreadExitNotifier() : tail(nullptr) { }
                ThreadExitNotifier(ThreadExitNotifier const&) 
MOODYCAMEL_DELETE_FUNCTION;
                ThreadExitNotifier& operator=(ThreadExitNotifier const&) 
MOODYCAMEL_DELETE_FUNCTION;
-               
+
                ~ThreadExitNotifier()
                {
                        // This thread is about to exit, let everyone know!
                        assert(this == &instance() && "If this assert fails, 
you likely have a buggy compiler! Change the preprocessor conditions such that 
MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
+                       std::lock_guard<std::mutex> guard(mutex());
                        for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
+                               ptr->chain = nullptr;
                                ptr->callback(ptr->userData);
                        }
                }
-               
+
                // Thread-local
                static inline ThreadExitNotifier& instance()
                {
                        static thread_local ThreadExitNotifier notifier;
                        return notifier;
                }
-               
+
+               static inline std::mutex& mutex()
+               {
+                       // Must be static because the ThreadExitNotifier could 
be destroyed while unsubscribe is called
+                       static std::mutex mutex;
+                       return mutex;
+               }
+
        private:
                ThreadExitListener* tail;
        };
 #endif
 #endif
-       
+
        template<typename T> struct static_is_lock_free_num { enum { value = 0 
}; };
        template<> struct static_is_lock_free_num<signed char> { enum { value = 
ATOMIC_CHAR_LOCK_FREE }; };
        template<> struct static_is_lock_free_num<short> { enum { value = 
ATOMIC_SHORT_LOCK_FREE }; };
@@ -559,10 +652,10 @@ struct ProducerToken
 {
        template<typename T, typename Traits>
        explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
-       
+
        template<typename T, typename Traits>
        explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
-       
+
        ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
                : producer(other.producer)
        {
@@ -571,13 +664,13 @@ struct ProducerToken
                        producer->token = this;
                }
        }
-       
+
        inline ProducerToken& operator=(ProducerToken&& other) 
MOODYCAMEL_NOEXCEPT
        {
                swap(other);
                return *this;
        }
-       
+
        void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
        {
                std::swap(producer, other.producer);
@@ -588,7 +681,7 @@ struct ProducerToken
                        other.producer->token = &other;
                }
        }
-       
+
        // A token is always valid unless:
        //     1) Memory allocation failed during construction
        //     2) It was moved via the move constructor
@@ -598,7 +691,7 @@ struct ProducerToken
        // that the token is valid for use with a specific queue,
        // but not which one; that's up to the user to track.
        inline bool valid() const { return producer != nullptr; }
-       
+
        ~ProducerToken()
        {
                if (producer != nullptr) {
@@ -606,15 +699,15 @@ struct ProducerToken
                        producer->inactive.store(true, 
std::memory_order_release);
                }
        }
-       
+
        // Disable copying and assignment
        ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
        ProducerToken& operator=(ProducerToken const&) 
MOODYCAMEL_DELETE_FUNCTION;
-       
+
 private:
        template<typename T, typename Traits> friend class ConcurrentQueue;
        friend class ConcurrentQueueTests;
-       
+
 protected:
        details::ConcurrentQueueProducerTypelessBase* producer;
 };
@@ -624,21 +717,21 @@ struct ConsumerToken
 {
        template<typename T, typename Traits>
        explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
-       
+
        template<typename T, typename Traits>
        explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
-       
+
        ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
                : initialOffset(other.initialOffset), 
lastKnownGlobalOffset(other.lastKnownGlobalOffset), 
itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), 
currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
        {
        }
-       
+
        inline ConsumerToken& operator=(ConsumerToken&& other) 
MOODYCAMEL_NOEXCEPT
        {
                swap(other);
                return *this;
        }
-       
+
        void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
        {
                std::swap(initialOffset, other.initialOffset);
@@ -647,7 +740,7 @@ struct ConsumerToken
                std::swap(currentProducer, other.currentProducer);
                std::swap(desiredProducer, other.desiredProducer);
        }
-       
+
        // Disable copying and assignment
        ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
        ConsumerToken& operator=(ConsumerToken const&) 
MOODYCAMEL_DELETE_FUNCTION;
@@ -655,7 +748,7 @@ struct ConsumerToken
 private:
        template<typename T, typename Traits> friend class ConcurrentQueue;
        friend class ConcurrentQueueTests;
-       
+
 private: // but shared with ConcurrentQueue
        std::uint32_t initialOffset;
        std::uint32_t lastKnownGlobalOffset;
@@ -676,10 +769,10 @@ class ConcurrentQueue
 public:
        typedef ::moodycamel::ProducerToken producer_token_t;
        typedef ::moodycamel::ConsumerToken consumer_token_t;
-       
+
        typedef typename Traits::index_t index_t;
        typedef typename Traits::size_t size_t;
-       
+
        static const size_t BLOCK_SIZE = 
static_cast<size_t>(Traits::BLOCK_SIZE);
        static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 
static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
        static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 
static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
@@ -717,7 +810,7 @@ public:
        // queue is fully constructed before it starts being used by other 
threads (this
        // includes making the memory effects of construction visible, possibly 
with a
        // memory barrier).
-       explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+       explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)
                : producerListTail(nullptr),
                producerCount(0),
                initialBlockPoolIndex(0),
@@ -727,7 +820,7 @@ public:
                
implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
                populate_initial_implicit_producer_hash();
                populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity 
& (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
                // Track all the producers using a fully-resolved typed list for
                // each kind; this makes it possible to debug them starting from
@@ -737,7 +830,7 @@ public:
                implicitProducers.store(nullptr, std::memory_order_relaxed);
 #endif
        }
-       
+
        // Computes the correct amount of pre-allocated blocks for you based
        // on the minimum number of elements you want available at any given
        // time, and the maximum concurrent number of each type of producer.
@@ -752,13 +845,13 @@ public:
                populate_initial_implicit_producer_hash();
                size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) 
- 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + 
maxImplicitProducers);
                populate_initial_block_list(blocks);
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
                explicitProducers.store(nullptr, std::memory_order_relaxed);
                implicitProducers.store(nullptr, std::memory_order_relaxed);
 #endif
        }
-       
+
        // Note: The queue should not be accessed concurrently while it's
        // being deleted. It's up to the user to synchronize this.
        // This method is not thread safe.
@@ -774,9 +867,9 @@ public:
                        destroy(ptr);
                        ptr = next;
                }
-               
+
                // Destroy implicit producer hash tables
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 
0) {
                        auto hash = 
implicitProducerHash.load(std::memory_order_relaxed);
                        while (hash != nullptr) {
                                auto prev = hash->prev;
@@ -790,7 +883,7 @@ public:
                                hash = prev;
                        }
                }
-               
+
                // Destroy global free list
                auto block = freeList.head_unsafe();
                while (block != nullptr) {
@@ -800,7 +893,7 @@ public:
                        }
                        block = next;
                }
-               
+
                // Destroy initial free list
                destroy_array(initialBlockPool, initialBlockPoolSize);
        }
@@ -808,7 +901,7 @@ public:
        // Disable copying and copy assignment
        ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
        ConcurrentQueue& operator=(ConcurrentQueue const&) 
MOODYCAMEL_DELETE_FUNCTION;
-       
+
        // Moving is supported, but note that it is *not* a thread-safe 
operation.
        // Nobody can use the queue while it's being moved, and the memory 
effects
        // of that move must be propagated to other threads before they can use 
it.
@@ -829,31 +922,31 @@ public:
                
implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
                populate_initial_implicit_producer_hash();
                swap_implicit_producer_hashes(other);
-               
+
                other.producerListTail.store(nullptr, 
std::memory_order_relaxed);
                other.producerCount.store(0, std::memory_order_relaxed);
                other.nextExplicitConsumerId.store(0, 
std::memory_order_relaxed);
                other.globalExplicitConsumerOffset.store(0, 
std::memory_order_relaxed);
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
                
explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed),
 std::memory_order_relaxed);
                other.explicitProducers.store(nullptr, 
std::memory_order_relaxed);
                
implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed),
 std::memory_order_relaxed);
                other.implicitProducers.store(nullptr, 
std::memory_order_relaxed);
 #endif
-               
+
                other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
                other.initialBlockPoolSize = 0;
                other.initialBlockPool = nullptr;
-               
+
                reown_producers();
        }
-       
+
        inline ConcurrentQueue& operator=(ConcurrentQueue&& other) 
MOODYCAMEL_NOEXCEPT
        {
                return swap_internal(other);
        }
-       
+
        // Swaps this queue's state with the other's. Not thread-safe.
        // Swapping two queues does not invalidate their tokens, however
        // the tokens that were created for one queue must be used with
@@ -863,14 +956,14 @@ public:
        {
                swap_internal(other);
        }
-       
+
 private:
        ConcurrentQueue& swap_internal(ConcurrentQueue& other)
        {
                if (this == &other) {
                        return *this;
                }
-               
+
                details::swap_relaxed(producerListTail, other.producerListTail);
                details::swap_relaxed(producerCount, other.producerCount);
                details::swap_relaxed(initialBlockPoolIndex, 
other.initialBlockPoolIndex);
@@ -879,20 +972,20 @@ private:
                freeList.swap(other.freeList);
                details::swap_relaxed(nextExplicitConsumerId, 
other.nextExplicitConsumerId);
                details::swap_relaxed(globalExplicitConsumerOffset, 
other.globalExplicitConsumerOffset);
-               
+
                swap_implicit_producer_hashes(other);
-               
+
                reown_producers();
                other.reown_producers();
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
                details::swap_relaxed(explicitProducers, 
other.explicitProducers);
                details::swap_relaxed(implicitProducers, 
other.implicitProducers);
 #endif
-               
+
                return *this;
        }
-       
+
 public:
        // Enqueues a single item (by copying it).
        // Allocates memory if required. Only fails if memory allocation fails 
(or implicit
@@ -901,10 +994,10 @@ public:
        // Thread-safe.
        inline bool enqueue(T const& item)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-               return inner_enqueue<CanAlloc>(item);
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) return false;
+               else return inner_enqueue<CanAlloc>(item);
        }
-       
+
        // Enqueues a single item (by moving it, if possible).
        // Allocates memory if required. Only fails if memory allocation fails 
(or implicit
        // production is disabled because 
Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
@@ -912,10 +1005,10 @@ public:
        // Thread-safe.
        inline bool enqueue(T&& item)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-               return inner_enqueue<CanAlloc>(std::move(item));
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) return false;
+               else return inner_enqueue<CanAlloc>(std::move(item));
        }
-       
+
        // Enqueues a single item (by copying it) using an explicit producer 
token.
        // Allocates memory if required. Only fails if memory allocation fails 
(or
        // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -924,7 +1017,7 @@ public:
        {
                return inner_enqueue<CanAlloc>(token, item);
        }
-       
+
        // Enqueues a single item (by moving it, if possible) using an explicit 
producer token.
        // Allocates memory if required. Only fails if memory allocation fails 
(or
        // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -933,7 +1026,7 @@ public:
        {
                return inner_enqueue<CanAlloc>(token, std::move(item));
        }
-       
+
        // Enqueues several items.
        // Allocates memory if required. Only fails if memory allocation fails 
(or
        // implicit production is disabled because 
Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
@@ -943,10 +1036,10 @@ public:
        template<typename It>
        bool enqueue_bulk(It itemFirst, size_t count)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-               return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) return false;
+               else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
        }
-       
+
        // Enqueues several items using an explicit producer token.
        // Allocates memory if required. Only fails if memory allocation fails
        // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be 
surpassed).
@@ -958,7 +1051,7 @@ public:
        {
                return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
        }
-       
+
        // Enqueues a single item (by copying it).
        // Does not allocate memory. Fails if not enough room to enqueue (or 
implicit
        // production is disabled because 
Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
@@ -966,10 +1059,10 @@ public:
        // Thread-safe.
        inline bool try_enqueue(T const& item)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-               return inner_enqueue<CannotAlloc>(item);
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) return false;
+               else return inner_enqueue<CannotAlloc>(item);
        }
-       
+
        // Enqueues a single item (by moving it, if possible).
        // Does not allocate memory (except for one-time implicit producer).
        // Fails if not enough room to enqueue (or implicit production is
@@ -977,10 +1070,10 @@ public:
        // Thread-safe.
        inline bool try_enqueue(T&& item)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-               return inner_enqueue<CannotAlloc>(std::move(item));
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) return false;
+               else return inner_enqueue<CannotAlloc>(std::move(item));
        }
-       
+
        // Enqueues a single item (by copying it) using an explicit producer 
token.
        // Does not allocate memory. Fails if not enough room to enqueue.
        // Thread-safe.
@@ -988,7 +1081,7 @@ public:
        {
                return inner_enqueue<CannotAlloc>(token, item);
        }
-       
+
        // Enqueues a single item (by moving it, if possible) using an explicit 
producer token.
        // Does not allocate memory. Fails if not enough room to enqueue.
        // Thread-safe.
@@ -996,7 +1089,7 @@ public:
        {
                return inner_enqueue<CannotAlloc>(token, std::move(item));
        }
-       
+
        // Enqueues several items.
        // Does not allocate memory (except for one-time implicit producer).
        // Fails if not enough room to enqueue (or implicit production is
@@ -1007,10 +1100,10 @@ public:
        template<typename It>
        bool try_enqueue_bulk(It itemFirst, size_t count)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-               return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) return false;
+               else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
        }
-       
+
        // Enqueues several items using an explicit producer token.
        // Does not allocate memory. Fails if not enough room to enqueue.
        // Note: Use std::make_move_iterator if the elements should be moved
@@ -1021,9 +1114,9 @@ public:
        {
                return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
        }
-       
-       
-       
+
+
+
        // Attempts to dequeue from the queue.
        // Returns false if all producer streams appeared empty at the time they
        // were checked (so, the queue is likely but not guaranteed to be 
empty).
@@ -1046,11 +1139,11 @@ public:
                                ++nonEmptyCount;
                        }
                }
-               
+
                // If there was at least one non-empty queue but it appears 
empty at the time
                // we try to dequeue from it, we need to make sure every 
queue's been tried
                if (nonEmptyCount > 0) {
-                       if (details::likely(best->dequeue(item))) {
+                       if ((details::likely)(best->dequeue(item))) {
                                return true;
                        }
                        for (auto ptr = 
producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = 
ptr->next_prod()) {
@@ -1061,7 +1154,7 @@ public:
                }
                return false;
        }
-       
+
        // Attempts to dequeue from the queue.
        // Returns false if all producer streams appeared empty at the time they
        // were checked (so, the queue is likely but not guaranteed to be 
empty).
@@ -1081,7 +1174,7 @@ public:
                }
                return false;
        }
-       
+
        // Attempts to dequeue from the queue using an explicit consumer token.
        // Returns false if all producer streams appeared empty at the time they
        // were checked (so, the queue is likely but not guaranteed to be 
empty).
@@ -1094,13 +1187,13 @@ public:
                // If you see that the global offset has changed, you must 
reset your consumption counter and move to your designated place
                // If there's no items where you're supposed to be, keep moving 
until you find a producer with some items
                // If the global offset has not changed but you've run out of 
items to consume, move over from your current position until you find an 
producer with something in it
-               
+
                if (token.desiredProducer == nullptr || 
token.lastKnownGlobalOffset != 
globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
                        if (!update_current_producer_after_rotation(token)) {
                                return false;
                        }
                }
-               
+
                // If there was at least one non-empty queue but it appears 
empty at the time
                // we try to dequeue from it, we need to make sure every 
queue's been tried
                if 
(static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
@@ -1109,7 +1202,7 @@ public:
                        }
                        return true;
                }
-               
+
                auto tail = producerListTail.load(std::memory_order_acquire);
                auto ptr = 
static_cast<ProducerBase*>(token.currentProducer)->next_prod();
                if (ptr == nullptr) {
@@ -1128,7 +1221,7 @@ public:
                }
                return false;
        }
-       
+
        // Attempts to dequeue several elements from the queue.
        // Returns the number of items actually dequeued.
        // Returns 0 if all producer streams appeared empty at the time they
@@ -1146,7 +1239,7 @@ public:
                }
                return count;
        }
-       
+
        // Attempts to dequeue several elements from the queue using an 
explicit consumer token.
        // Returns the number of items actually dequeued.
        // Returns 0 if all producer streams appeared empty at the time they
@@ -1160,7 +1253,7 @@ public:
                                return 0;
                        }
                }
-               
+
                size_t count = 
static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
                if (count == max) {
                        if ((token.itemsConsumedFromCurrent += 
static_cast<std::uint32_t>(max)) >= 
EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
@@ -1170,7 +1263,7 @@ public:
                }
                token.itemsConsumedFromCurrent += 
static_cast<std::uint32_t>(count);
                max -= count;
-               
+
                auto tail = producerListTail.load(std::memory_order_acquire);
                auto ptr = 
static_cast<ProducerBase*>(token.currentProducer)->next_prod();
                if (ptr == nullptr) {
@@ -1194,9 +1287,9 @@ public:
                }
                return count;
        }
-       
-       
-       
+
+
+
        // Attempts to dequeue from a specific producer's inner queue.
        // If you happen to know which producer you want to dequeue from, this
        // is significantly faster than using the general-case try_dequeue 
methods.
@@ -1208,7 +1301,7 @@ public:
        {
                return 
static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
        }
-       
+
        // Attempts to dequeue several elements from a specific producer's 
inner queue.
        // Returns the number of items actually dequeued.
        // If you happen to know which producer you want to dequeue from, this
@@ -1221,8 +1314,8 @@ public:
        {
                return 
static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
        }
-       
-       
+
+
        // Returns an estimate of the total number of elements currently in the 
queue. This
        // estimate is only accurate if the queue has completely stabilized 
before it is called
        // (i.e. all enqueue and dequeue operations have completed and their 
memory effects are
@@ -1237,12 +1330,12 @@ public:
                }
                return size;
        }
-       
-       
+
+
        // Returns true if the underlying atomic variables used by
        // the queue are lock-free (they should be on most platforms).
        // Thread-safe.
-       static bool is_lock_free()
+       static constexpr bool is_lock_free()
        {
                return
                        details::static_is_lock_free<bool>::value == 2 &&
@@ -1257,42 +1350,45 @@ public:
 private:
        friend struct ProducerToken;
        friend struct ConsumerToken;
+       struct ExplicitProducer;
        friend struct ExplicitProducer;
+       struct ImplicitProducer;
+       friend struct ImplicitProducer;
        friend class ConcurrentQueueTests;
-               
+
        enum AllocationMode { CanAlloc, CannotAlloc };
-       
-       
+
+
        ///////////////////////////////
        // Queue methods
        ///////////////////////////////
-       
+
        template<AllocationMode canAlloc, typename U>
        inline bool inner_enqueue(producer_token_t const& token, U&& element)
        {
                return 
static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template
 enqueue<canAlloc>(std::forward<U>(element));
        }
-       
+
        template<AllocationMode canAlloc, typename U>
        inline bool inner_enqueue(U&& element)
        {
                auto producer = get_or_add_implicit_producer();
                return producer == nullptr ? false : 
producer->ConcurrentQueue::ImplicitProducer::template 
enqueue<canAlloc>(std::forward<U>(element));
        }
-       
+
        template<AllocationMode canAlloc, typename It>
        inline bool inner_enqueue_bulk(producer_token_t const& token, It 
itemFirst, size_t count)
        {
                return 
static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template
 enqueue_bulk<canAlloc>(itemFirst, count);
        }
-       
+
        template<AllocationMode canAlloc, typename It>
        inline bool inner_enqueue_bulk(It itemFirst, size_t count)
        {
                auto producer = get_or_add_implicit_producer();
                return producer == nullptr ? false : 
producer->ConcurrentQueue::ImplicitProducer::template 
enqueue_bulk<canAlloc>(itemFirst, count);
        }
-       
+
        inline bool update_current_producer_after_rotation(consumer_token_t& 
token)
        {
                // Ah, there's been a rotation, figure out where we should be!
@@ -1302,7 +1398,7 @@ private:
                }
                auto prodCount = producerCount.load(std::memory_order_relaxed);
                auto globalOffset = 
globalExplicitConsumerOffset.load(std::memory_order_relaxed);
-               if (details::unlikely(token.desiredProducer == nullptr)) {
+               if ((details::unlikely)(token.desiredProducer == nullptr)) {
                        // Aha, first time we're dequeueing anything.
                        // Figure out our local position
                        // Note: offset is from start, not end, but we're 
traversing from end -- subtract from count first
@@ -1315,7 +1411,7 @@ private:
                                }
                        }
                }
-               
+
                std::uint32_t delta = globalOffset - 
token.lastKnownGlobalOffset;
                if (delta >= prodCount) {
                        delta = delta % prodCount;
@@ -1326,27 +1422,27 @@ private:
                                token.desiredProducer = tail;
                        }
                }
-               
+
                token.lastKnownGlobalOffset = globalOffset;
                token.currentProducer = token.desiredProducer;
                token.itemsConsumedFromCurrent = 0;
                return true;
        }
-       
-       
+
+
        ///////////////////////////
        // Free list
        ///////////////////////////
-       
+
        template <typename N>
        struct FreeListNode
        {
                FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
-               
+
                std::atomic<std::uint32_t> freeListRefs;
                std::atomic<N*> freeListNext;
        };
-       
+
        // A simple CAS-based lock-free free list. Not the fastest thing in the 
world under heavy contention, but
        // simple and correct (assuming nodes are never freed until after the 
free list is destroyed), and fairly
        // speedy under low contention.
@@ -1356,15 +1452,15 @@ private:
                FreeList() : freeListHead(nullptr) { }
                FreeList(FreeList&& other) : 
freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { 
other.freeListHead.store(nullptr, std::memory_order_relaxed); }
                void swap(FreeList& other) { 
details::swap_relaxed(freeListHead, other.freeListHead); }
-               
+
                FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
                FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
-               
+
                inline void add(N* node)
                {
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
                        debug::DebugLock lock(mutex);
-#endif         
+#endif
                        // We know that the should-be-on-freelist bit is 0 at 
this point, so it's safe to
                        // set it using a fetch_add
                        if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, 
std::memory_order_acq_rel) == 0) {
@@ -1373,12 +1469,12 @@ private:
                                add_knowing_refcount_is_zero(node);
                        }
                }
-               
+
                inline N* try_get()
                {
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
                        debug::DebugLock lock(mutex);
-#endif         
+#endif
                        auto head = 
freeListHead.load(std::memory_order_acquire);
                        while (head != nullptr) {
                                auto prevHead = head;
@@ -1387,7 +1483,7 @@ private:
                                        head = 
freeListHead.load(std::memory_order_acquire);
                                        continue;
                                }
-                               
+
                                // Good, reference count has been incremented 
(it wasn't at zero), which means we can read the
                                // next and not worry about it changing between 
now and the time we do the CAS
                                auto next = 
head->freeListNext.load(std::memory_order_relaxed);
@@ -1395,27 +1491,27 @@ private:
                                        // Yay, got the node. This means it was 
on the list, which means shouldBeOnFreeList must be false no
                                        // matter the refcount (because nobody 
else knows it's been taken off yet, it can't have been put back on).
                                        
assert((head->freeListRefs.load(std::memory_order_relaxed) & 
SHOULD_BE_ON_FREELIST) == 0);
-                                       
+
                                        // Decrease refcount twice, once for 
our ref, and once for the list's ref
-                                       head->freeListRefs.fetch_add(-2, 
std::memory_order_release);
+                                       head->freeListRefs.fetch_sub(2, 
std::memory_order_release);
                                        return head;
                                }
-                               
+
                                // OK, the head must have changed on us, but we 
still need to decrease the refcount we increased.
                                // Note that we don't need to release any 
memory effects, but we do need to ensure that the reference
                                // count decrement happens-after the CAS on the 
head.
-                               refs = prevHead->freeListRefs.fetch_add(-1, 
std::memory_order_acq_rel);
+                               refs = prevHead->freeListRefs.fetch_sub(1, 
std::memory_order_acq_rel);
                                if (refs == SHOULD_BE_ON_FREELIST + 1) {
                                        add_knowing_refcount_is_zero(prevHead);
                                }
                        }
-                       
+
                        return nullptr;
                }
-               
+
                // Useful for traversing the list when there's no contention 
(e.g. to destroy remaining nodes)
                N* head_unsafe() const { return 
freeListHead.load(std::memory_order_relaxed); }
-               
+
        private:
                inline void add_knowing_refcount_is_zero(N* node)
                {
@@ -1440,47 +1536,47 @@ private:
                                return;
                        }
                }
-               
+
        private:
                // Implemented like a stack, but where node order doesn't 
matter (nodes are inserted out of order under contention)
                std::atomic<N*> freeListHead;
-       
+
        static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
        static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
-               
-#if MCDBGQ_NOLOCKFREE_FREELIST
+
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
                debug::DebugMutex mutex;
 #endif
        };
-       
-       
+
+
        ///////////////////////////
        // Block
        ///////////////////////////
-       
+
        enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
-       
+
        struct Block
        {
                Block()
-                       : next(nullptr), elementsCompletelyDequeued(0), 
freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), 
dynamicallyAllocated(true)
+                       : next(nullptr), elementsCompletelyDequeued(0), 
freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true)
                {
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
                        owner = nullptr;
 #endif
                }
-               
+
                template<InnerQueueContext context>
                inline bool is_empty() const
                {
-                       if (context == explicit_context && BLOCK_SIZE <= 
EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                       MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && 
BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
                                // Check flags
                                for (size_t i = 0; i < BLOCK_SIZE; ++i) {
                                        if 
(!emptyFlags[i].load(std::memory_order_relaxed)) {
                                                return false;
                                        }
                                }
-                               
+
                                // Aha, empty; make sure we have all other 
memory effects that happened before the empty flags were set
                                
std::atomic_thread_fence(std::memory_order_acquire);
                                return true;
@@ -1495,12 +1591,12 @@ private:
                                return false;
                        }
                }
-               
+
                // Returns true if the block is now empty (does not apply in 
explicit context)
                template<InnerQueueContext context>
-               inline bool set_empty(index_t i)
+               inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
                {
-                       if (context == explicit_context && BLOCK_SIZE <= 
EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                       MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && 
BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
                                // Set flag
                                assert(!emptyFlags[BLOCK_SIZE - 1 - 
static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 
1))].load(std::memory_order_relaxed));
                                emptyFlags[BLOCK_SIZE - 1 - 
static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, 
std::memory_order_release);
@@ -1513,13 +1609,13 @@ private:
                                return prevVal == BLOCK_SIZE - 1;
                        }
                }
-               
+
                // Sets multiple contiguous item statuses to 'empty' (assumes 
no wrapping and count > 0).
                // Returns true if the block is now empty (does not apply in 
explicit context).
                template<InnerQueueContext context>
-               inline bool set_many_empty(index_t i, size_t count)
+               inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, 
size_t count)
                {
-                       if (context == explicit_context && BLOCK_SIZE <= 
EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                       MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && 
BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
                                // Set flags
                                
std::atomic_thread_fence(std::memory_order_release);
                                i = BLOCK_SIZE - 1 - static_cast<size_t>(i & 
static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
@@ -1536,11 +1632,11 @@ private:
                                return prevVal + count == BLOCK_SIZE;
                        }
                }
-               
+
                template<InnerQueueContext context>
                inline void set_all_empty()
                {
-                       if (context == explicit_context && BLOCK_SIZE <= 
EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                       MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && 
BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
                                // Set all flags
                                for (size_t i = 0; i != BLOCK_SIZE; ++i) {
                                        emptyFlags[i].store(true, 
std::memory_order_relaxed);
@@ -1551,11 +1647,11 @@ private:
                                elementsCompletelyDequeued.store(BLOCK_SIZE, 
std::memory_order_relaxed);
                        }
                }
-               
+
                template<InnerQueueContext context>
                inline void reset_empty()
                {
-                       if (context == explicit_context && BLOCK_SIZE <= 
EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                       MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && 
BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
                                // Reset flags
                                for (size_t i = 0; i != BLOCK_SIZE; ++i) {
                                        emptyFlags[i].store(false, 
std::memory_order_relaxed);
@@ -1566,25 +1662,13 @@ private:
                                elementsCompletelyDequeued.store(0, 
std::memory_order_relaxed);
                        }
                }
-               
+
                inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return 
static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & 
static_cast<index_t>(BLOCK_SIZE - 1)); }
                inline T const* operator[](index_t idx) const 
MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void 
const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE 
- 1)); }
-               
+
        private:
-               // IMPORTANT: This must be the first member in Block, so that 
if T depends on the alignment of
-               // addresses returned by malloc, that alignment will be 
preserved. Apparently clang actually
-               // generates code that uses this assumption for AVX 
instructions in some cases. Ideally, we
-               // should also align Block to the alignment of T in case it's 
higher than malloc's 16-byte
-               // alignment, but this is hard to do in a cross-platform way. 
Assert for this case:
-               static_assert(std::alignment_of<T>::value <= 
std::alignment_of<details::max_align_t>::value, "The queue does not support 
super-aligned types at this time");
-               // Additionally, we need the alignment of Block itself to be a 
multiple of max_align_t since
-               // otherwise the appropriate padding will not be added at the 
end of Block in order to make
-               // arrays of Blocks all be properly aligned (not just the first 
one). We use a union to force
-               // this.
-               union {
-                       char elements[sizeof(T) * BLOCK_SIZE];
-                       details::max_align_t dummy;
-               };
+               static_assert(std::alignment_of<T>::value <= sizeof(T), "The 
queue does not support types with an alignment greater than their size at this 
time");
+               MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) 
elements;
        public:
                Block* next;
                std::atomic<size_t> elementsCompletelyDequeued;
@@ -1592,26 +1676,25 @@ private:
        public:
                std::atomic<std::uint32_t> freeListRefs;
                std::atomic<Block*> freeListNext;
-               std::atomic<bool> shouldBeOnFreeList;
                bool dynamicallyAllocated;              // Perhaps a better 
name for this would be 'isNotPartOfInitialBlockPool'
-               
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
                void* owner;
 #endif
        };
-       static_assert(std::alignment_of<Block>::value >= 
std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be 
at least as aligned as the type they are wrapping");
+       static_assert(std::alignment_of<Block>::value >= 
std::alignment_of<T>::value, "Internal error: Blocks must be at least as 
aligned as the type they are wrapping");
 
 
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 public:
        struct MemStats;
 private:
 #endif
-       
+
        ///////////////////////////
        // Producer base
        ///////////////////////////
-       
+
        struct ProducerBase : public 
details::ConcurrentQueueProducerTypelessBase
        {
                ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
@@ -1624,9 +1707,9 @@ private:
                        parent(parent_)
                {
                }
-               
-               virtual ~ProducerBase() { };
-               
+
+               virtual ~ProducerBase() { }
+
                template<typename U>
                inline bool dequeue(U& element)
                {
@@ -1637,7 +1720,7 @@ private:
                                return 
static_cast<ImplicitProducer*>(this)->dequeue(element);
                        }
                }
-               
+
                template<typename It>
                inline size_t dequeue_bulk(It& itemFirst, size_t max)
                {
@@ -1648,45 +1731,45 @@ private:
                                return 
static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
                        }
                }
-               
+
                inline ProducerBase* next_prod() const { return 
static_cast<ProducerBase*>(next); }
-               
+
                inline size_t size_approx() const
                {
                        auto tail = tailIndex.load(std::memory_order_relaxed);
                        auto head = headIndex.load(std::memory_order_relaxed);
                        return details::circular_less_than(head, tail) ? 
static_cast<size_t>(tail - head) : 0;
                }
-               
+
                inline index_t getTail() const { return 
tailIndex.load(std::memory_order_relaxed); }
        protected:
                std::atomic<index_t> tailIndex;         // Where to enqueue to 
next
                std::atomic<index_t> headIndex;         // Where to dequeue 
from next
-               
+
                std::atomic<index_t> dequeueOptimisticCount;
                std::atomic<index_t> dequeueOvercommit;
-               
+
                Block* tailBlock;
-               
+
        public:
                bool isExplicit;
                ConcurrentQueue* parent;
-               
+
        protected:
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
                friend struct MemStats;
 #endif
        };
-       
-       
+
+
        ///////////////////////////
        // Explicit queue
        ///////////////////////////
-               
+
        struct ExplicitProducer : public ProducerBase
        {
-               explicit ExplicitProducer(ConcurrentQueue* parent) :
-                       ProducerBase(parent, true),
+               explicit ExplicitProducer(ConcurrentQueue* parent_) :
+                       ProducerBase(parent_, true),
                        blockIndex(nullptr),
                        pr_blockIndexSlotsUsed(0),
                        pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
@@ -1694,14 +1777,14 @@ private:
                        pr_blockIndexEntries(nullptr),
                        pr_blockIndexRaw(nullptr)
                {
-                       size_t poolBasedIndexSize = 
details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1;
+                       size_t poolBasedIndexSize = 
details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
                        if (poolBasedIndexSize > pr_blockIndexSize) {
                                pr_blockIndexSize = poolBasedIndexSize;
                        }
-                       
+
                        new_block_index(0);             // This creates an 
index with double the number of current entries, i.e. 
EXPLICIT_INITIAL_INDEX_SIZE
                }
-               
+
                ~ExplicitProducer()
                {
                        // Destruct any elements not yet dequeued.
@@ -1720,7 +1803,7 @@ private:
                                        
assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, 
this->headIndex.load(std::memory_order_relaxed)));
                                        halfDequeuedBlock = 
pr_blockIndexEntries[i].block;
                                }
-                               
+
                                // Start at the head block (note the first line 
in the loop gives us the head from the tail on the first iteration)
                                auto block = this->tailBlock;
                                do {
@@ -1728,12 +1811,12 @@ private:
                                        if 
(block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
                                                continue;
                                        }
-                                       
+
                                        size_t i = 0;   // Offset into block
                                        if (block == halfDequeuedBlock) {
                                                i = 
static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & 
static_cast<index_t>(BLOCK_SIZE - 1));
                                        }
-                                       
+
                                        // Walk through all the items in the 
block; if this is the tail block, we need to stop when we reach the tail index
                                        auto lastValidIndex = 
(this->tailIndex.load(std::memory_order_relaxed) & 
static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : 
static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & 
static_cast<index_t>(BLOCK_SIZE - 1));
                                        while (i != BLOCK_SIZE && (block != 
this->tailBlock || i != lastValidIndex)) {
@@ -1741,22 +1824,17 @@ private:
                                        }
                                } while (block != this->tailBlock);
                        }
-                       
+
                        // Destroy all blocks that we own
                        if (this->tailBlock != nullptr) {
                                auto block = this->tailBlock;
                                do {
                                        auto nextBlock = block->next;
-                                       if (block->dynamicallyAllocated) {
-                                               destroy(block);
-                                       }
-                                       else {
-                                               
this->parent->add_block_to_free_list(block);
-                                       }
+                                       
this->parent->add_block_to_free_list(block);
                                        block = nextBlock;
                                } while (block != this->tailBlock);
                        }
-                       
+
                        // Destroy the block indices
                        auto header = 
static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
                        while (header != nullptr) {
@@ -1766,7 +1844,7 @@ private:
                                header = prev;
                        }
                }
-               
+
                template<AllocationMode allocMode, typename U>
                inline bool enqueue(U&& element)
                {
@@ -1777,10 +1855,10 @@ private:
                                auto startBlock = this->tailBlock;
                                auto originalBlockIndexSlotsUsed = 
pr_blockIndexSlotsUsed;
                                if (this->tailBlock != nullptr && 
this->tailBlock->next->ConcurrentQueue::Block::template 
is_empty<explicit_context>()) {
-                                       // We can re-use the block ahead of us, 
it's empty!                                     
+                                       // We can re-use the block ahead of us, 
it's empty!
                                        this->tailBlock = this->tailBlock->next;
                                        
this->tailBlock->ConcurrentQueue::Block::template 
reset_empty<explicit_context>();
-                                       
+
                                        // We'll put the block on the block 
index (guaranteed to be room since we're conceptually removing the
                                        // last block from it first -- except 
instead of removing then adding, we can just overwrite).
                                        // Note that there must be a valid 
block index here, since even if allocation failed in the ctor,
@@ -1805,18 +1883,21 @@ private:
                                                // Hmm, the circular block 
index is already full -- we'll need
                                                // to allocate a new index. 
Note pr_blockIndexRaw can only be nullptr if
                                                // the initial allocation 
failed in the constructor.
-                                               
-                                               if (allocMode == CannotAlloc || 
!new_block_index(pr_blockIndexSlotsUsed)) {
+
+                                               MOODYCAMEL_CONSTEXPR_IF 
(allocMode == CannotAlloc) {
+                                                       return false;
+                                               }
+                                               else if 
(!new_block_index(pr_blockIndexSlotsUsed)) {
                                                        return false;
                                                }
                                        }
-                                       
+
                                        // Insert a new block in the circular 
linked list
                                        auto newBlock = 
this->parent->ConcurrentQueue::template requisition_block<allocMode>();
                                        if (newBlock == nullptr) {
                                                return false;
                                        }
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
                                        newBlock->owner = this;
 #endif
                                        
newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
@@ -1830,8 +1911,8 @@ private:
                                        this->tailBlock = newBlock;
                                        ++pr_blockIndexSlotsUsed;
                                }
-                               
-                               if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new 
(nullptr) T(std::forward<U>(element)))) {
+
+                               MOODYCAMEL_CONSTEXPR_IF 
(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) 
T(std::forward<U>(element)))) {
                                        // The constructor may throw. We want 
the element not to appear in the queue in
                                        // that case (without corrupting the 
queue):
                                        MOODYCAMEL_TRY {
@@ -1849,27 +1930,27 @@ private:
                                        (void)startBlock;
                                        (void)originalBlockIndexSlotsUsed;
                                }
-                               
+
                                // Add block to block index
                                auto& entry = 
blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
                                entry.base = currentTailIndex;
                                entry.block = this->tailBlock;
                                
blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, 
std::memory_order_release);
                                pr_blockIndexFront = (pr_blockIndexFront + 1) & 
(pr_blockIndexSize - 1);
-                               
-                               if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new 
(nullptr) T(std::forward<U>(element)))) {
+
+                               MOODYCAMEL_CONSTEXPR_IF 
(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) 
T(std::forward<U>(element)))) {
                                        this->tailIndex.store(newTailIndex, 
std::memory_order_release);
                                        return true;
                                }
                        }
-                       
+
                        // Enqueue
                        new ((*this->tailBlock)[currentTailIndex]) 
T(std::forward<U>(element));
-                       
+
                        this->tailIndex.store(newTailIndex, 
std::memory_order_release);
                        return true;
                }
-               
+
                template<typename U>
                bool dequeue(U& element)
                {
@@ -1877,10 +1958,10 @@ private:
                        auto overcommit = 
this->dequeueOvercommit.load(std::memory_order_relaxed);
                        if 
(details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed)
 - overcommit, tail)) {
                                // Might be something to dequeue, let's give it 
a try
-                               
+
                                // Note that this if is purely for performance 
purposes in the common case when the queue is
                                // empty and the values are eventually 
consistent -- we may enter here spuriously.
-                               
+
                                // Note that whatever the values of overcommit 
and tail are, they are not going to change (unless we
                                // change them) and must be the same value at 
this point (inside the if) as when the if condition was
                                // evaluated.
@@ -1893,23 +1974,24 @@ private:
                                // unfortunately that can't be shown to be 
correct using only the C++11 standard.
                                // See 
http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
                                
std::atomic_thread_fence(std::memory_order_acquire);
-                               
+
                                // Increment optimistic counter, then check if 
it went over the boundary
                                auto myDequeueCount = 
this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-                               
+
                                // Note that since dequeueOvercommit must be <= 
dequeueOptimisticCount (because dequeueOvercommit is only ever
                                // incremented after dequeueOptimisticCount -- 
this is enforced in the `else` block below), and since we now
                                // have a version of dequeueOptimisticCount 
that is at least as recent as overcommit (due to the release upon
                                // incrementing dequeueOvercommit and the 
acquire above that synchronizes with it), overcommit <= myDequeueCount.
-                               assert(overcommit <= myDequeueCount);
-                               
+                               // However, we can't assert this since both 
dequeueOptimisticCount and dequeueOvercommit may (independently)
+                               // overflow; in such a case, though, the logic 
still holds since the difference between the two is maintained.
+
                                // Note that we reload tail here in case it 
changed; it will be the same value as before or greater, since
                                // this load is sequenced after (happens after) 
the earlier load above. This is supported by read-read
                                // coherency (as defined in the standard), 
explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
                                tail = 
this->tailIndex.load(std::memory_order_acquire);
-                               if 
(details::likely(details::circular_less_than<index_t>(myDequeueCount - 
overcommit, tail))) {
+                               if 
((details::likely)(details::circular_less_than<index_t>(myDequeueCount - 
overcommit, tail))) {
                                        // Guaranteed to be at least one 
element to dequeue!
-                                       
+
                                        // Get the index. Note that since 
there's guaranteed to be at least one element, this
                                        // will never exceed tail. We need to 
do an acquire-release fence here since it's possible
                                        // that whatever condition got us to 
this point was for an earlier enqueued element (that
@@ -1919,21 +2001,21 @@ private:
                                        // place with the more current 
condition (they must have acquired a tail that is at least
                                        // as recent).
                                        auto index = 
this->headIndex.fetch_add(1, std::memory_order_acq_rel);
-                                       
-                                       
+
+
                                        // Determine which block the element is 
in
-                                       
+
                                        auto localBlockIndex = 
blockIndex.load(std::memory_order_acquire);
                                        auto localBlockIndexHead = 
localBlockIndex->front.load(std::memory_order_acquire);
-                                       
+
                                        // We need to be careful here about 
subtracting and dividing because of index wrap-around.
                                        // When an index wraps, we need to 
preserve the sign of the offset when dividing it by the
                                        // block size (in order to get a 
correct signed block count offset in all cases):
                                        auto headBase = 
localBlockIndex->entries[localBlockIndexHead].base;
                                        auto blockBaseIndex = index & 
~static_cast<index_t>(BLOCK_SIZE - 1);
-                                       auto offset = 
static_cast<size_t>(static_cast<typename 
std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
+                                       auto offset = 
static_cast<size_t>(static_cast<typename 
std::make_signed<index_t>::type>(blockBaseIndex - headBase) / 
static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
                                        auto block = 
localBlockIndex->entries[(localBlockIndexHead + offset) & 
(localBlockIndex->size - 1)].block;
-                                       
+
                                        // Dequeue
                                        auto& el = *((*block)[index]);
                                        if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, 
element = std::move(el))) {
@@ -1942,22 +2024,22 @@ private:
                                                struct Guard {
                                                        Block* block;
                                                        index_t index;
-                                                       
+
                                                        ~Guard()
                                                        {
                                                                
(*block)[index]->~T();
                                                                
block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
                                                        }
                                                } guard = { block, index };
-                                               
-                                               element = std::move(el);
+
+                                               element = std::move(el); // 
NOLINT
                                        }
                                        else {
-                                               element = std::move(el);
-                                               el.~T();
+                                               element = std::move(el); // 
NOLINT
+                                               el.~T(); // NOLINT
                                                
block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
                                        }
-                                       
+
                                        return true;
                                }
                                else {
@@ -1965,12 +2047,12 @@ private:
                                        this->dequeueOvercommit.fetch_add(1, 
std::memory_order_release);                // Release so that the fetch_add on 
dequeueOptimisticCount is guaranteed to happen before this write
                                }
                        }
-               
+
                        return false;
                }
-               
+
                template<AllocationMode allocMode, typename It>
-               bool enqueue_bulk(It itemFirst, size_t count)
+               bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
                {
                        // First, we need to make sure we have enough room to 
enqueue all of the elements;
                        // this means pre-allocating blocks and putting them in 
the block index (but only if
@@ -1979,9 +2061,9 @@ private:
                        auto startBlock = this->tailBlock;
                        auto originalBlockIndexFront = pr_blockIndexFront;
                        auto originalBlockIndexSlotsUsed = 
pr_blockIndexSlotsUsed;
-                       
+
                        Block* firstAllocatedBlock = nullptr;
-                       
+
                        // Figure out how many blocks we'll need to allocate, 
and do so
                        size_t blockBaseDiff = ((startTailIndex + count - 1) & 
~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & 
~static_cast<index_t>(BLOCK_SIZE - 1));
                        index_t currentTailIndex = (startTailIndex - 1) & 
~static_cast<index_t>(BLOCK_SIZE - 1);
@@ -1990,39 +2072,46 @@ private:
                                while (blockBaseDiff > 0 && this->tailBlock != 
nullptr && this->tailBlock->next != firstAllocatedBlock && 
this->tailBlock->next->ConcurrentQueue::Block::template 
is_empty<explicit_context>()) {
                                        blockBaseDiff -= 
static_cast<index_t>(BLOCK_SIZE);
                                        currentTailIndex += 
static_cast<index_t>(BLOCK_SIZE);
-                                       
+
                                        this->tailBlock = this->tailBlock->next;
                                        firstAllocatedBlock = 
firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-                                       
+
                                        auto& entry = 
blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
                                        entry.base = currentTailIndex;
                                        entry.block = this->tailBlock;
                                        pr_blockIndexFront = 
(pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
                                }
-                               
+
                                // Now allocate as many blocks as necessary 
from the block pool
                                while (blockBaseDiff > 0) {
                                        blockBaseDiff -= 
static_cast<index_t>(BLOCK_SIZE);
                                        currentTailIndex += 
static_cast<index_t>(BLOCK_SIZE);
-                                       
+
                                        auto head = 
this->headIndex.load(std::memory_order_relaxed);
                                        
assert(!details::circular_less_than<index_t>(currentTailIndex, head));
                                        bool full = 
!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || 
(MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && 
(MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - 
head));
                                        if (pr_blockIndexRaw == nullptr || 
pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
-                                               if (allocMode == CannotAlloc || 
full || !new_block_index(originalBlockIndexSlotsUsed)) {
+                                               MOODYCAMEL_CONSTEXPR_IF 
(allocMode == CannotAlloc) {
                                                        // Failed to allocate, 
undo changes (but keep injected blocks)
                                                        pr_blockIndexFront = 
originalBlockIndexFront;
                                                        pr_blockIndexSlotsUsed 
= originalBlockIndexSlotsUsed;
                                                        this->tailBlock = 
startBlock == nullptr ? firstAllocatedBlock : startBlock;
                                                        return false;
                                                }
-                                               
+                                               else if (full || 
!new_block_index(originalBlockIndexSlotsUsed)) {
+                                                       // Failed to allocate, 
undo changes (but keep injected blocks)
+                                                       pr_blockIndexFront = 
originalBlockIndexFront;
+                                                       pr_blockIndexSlotsUsed 
= originalBlockIndexSlotsUsed;
+                                                       this->tailBlock = 
startBlock == nullptr ? firstAllocatedBlock : startBlock;
+                                                       return false;
+                                               }
+
                                                // pr_blockIndexFront is 
updated inside new_block_index, so we need to
                                                // update our fallback value 
too (since we keep the new index even if we
                                                // later fail)
                                                originalBlockIndexFront = 
originalBlockIndexSlotsUsed;
                                        }
-                                       
+
                                        // Insert a new block in the circular 
linked list
                                        auto newBlock = 
this->parent->ConcurrentQueue::template requisition_block<allocMode>();
                                        if (newBlock == nullptr) {
@@ -2031,8 +2120,8 @@ private:
                                                this->tailBlock = startBlock == 
nullptr ? firstAllocatedBlock : startBlock;
                                                return false;
                                        }
-                                       
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
                                        newBlock->owner = this;
 #endif
                                        
newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
@@ -2045,15 +2134,15 @@ private:
                                        }
                                        this->tailBlock = newBlock;
                                        firstAllocatedBlock = 
firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-                                       
+
                                        ++pr_blockIndexSlotsUsed;
-                                       
+
                                        auto& entry = 
blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
                                        entry.base = currentTailIndex;
                                        entry.block = this->tailBlock;
                                        pr_blockIndexFront = 
(pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
                                }
-                               
+
                                // Excellent, all allocations succeeded. Reset 
each block's emptiness before we fill them up, and
                                // publish the new block index front
                                auto block = firstAllocatedBlock;
@@ -2064,12 +2153,12 @@ private:
                                        }
                                        block = block->next;
                                }
-                               
-                               if (MOODYCAMEL_NOEXCEPT_CTOR(T, 
decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+
+                               MOODYCAMEL_CONSTEXPR_IF 
(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new 
(static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
                                        
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 
1) & (pr_blockIndexSize - 1), std::memory_order_release);
                                }
                        }
-                       
+
                        // Enqueue, one block at a time
                        index_t newTailIndex = startTailIndex + 
static_cast<index_t>(count);
                        currentTailIndex = startTailIndex;
@@ -2080,11 +2169,11 @@ private:
                                this->tailBlock = firstAllocatedBlock;
                        }
                        while (true) {
-                               auto stopIndex = (currentTailIndex & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+                               index_t stopIndex = (currentTailIndex & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
                                if 
(details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
                                        stopIndex = newTailIndex;
                                }
-                               if (MOODYCAMEL_NOEXCEPT_CTOR(T, 
decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+                               MOODYCAMEL_CONSTEXPR_IF 
(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new 
(static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
                                        while (currentTailIndex != stopIndex) {
                                                new 
((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
                                        }
@@ -2099,7 +2188,7 @@ private:
                                                        // may only define a 
(noexcept) move constructor, and so calls to the
                                                        // cctor will not 
compile, even if they are in an if branch that will never
                                                        // be executed
-                                                       new 
((*this->tailBlock)[currentTailIndex]) 
T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), 
new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+                                                       new 
((*this->tailBlock)[currentTailIndex]) 
T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new 
(static_cast<T*>(nullptr)) 
T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
                                                        ++currentTailIndex;
                                                        ++itemFirst;
                                                }
@@ -2110,11 +2199,11 @@ private:
                                                // any allocated blocks in our 
linked list for later, though).
                                                auto constructedStopIndex = 
currentTailIndex;
                                                auto lastBlockEnqueued = 
this->tailBlock;
-                                               
+
                                                pr_blockIndexFront = 
originalBlockIndexFront;
                                                pr_blockIndexSlotsUsed = 
originalBlockIndexSlotsUsed;
                                                this->tailBlock = startBlock == 
nullptr ? firstAllocatedBlock : startBlock;
-                                               
+
                                                if 
(!details::is_trivially_destructible<T>::value) {
                                                        auto block = startBlock;
                                                        if ((startTailIndex & 
static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
@@ -2138,22 +2227,23 @@ private:
                                                MOODYCAMEL_RETHROW;
                                        }
                                }
-                               
+
                                if (this->tailBlock == endBlock) {
                                        assert(currentTailIndex == 
newTailIndex);
                                        break;
                                }
                                this->tailBlock = this->tailBlock->next;
                        }
-                       
-                       if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), 
new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != 
nullptr) {
-                               
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 
1) & (pr_blockIndexSize - 1), std::memory_order_release);
+
+                       MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, 
decltype(*itemFirst), new (static_cast<T*>(nullptr)) 
T(details::deref_noexcept(itemFirst)))) {
+                               if (firstAllocatedBlock != nullptr)
+                                       
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 
1) & (pr_blockIndexSize - 1), std::memory_order_release);
                        }
-                       
+
                        this->tailIndex.store(newTailIndex, 
std::memory_order_release);
                        return true;
                }
-               
+
                template<typename It>
                size_t dequeue_bulk(It& itemFirst, size_t max)
                {
@@ -2163,10 +2253,9 @@ private:
                        if (details::circular_less_than<size_t>(0, 
desiredCount)) {
                                desiredCount = desiredCount < max ? 
desiredCount : max;
                                
std::atomic_thread_fence(std::memory_order_acquire);
-                               
+
                                auto myDequeueCount = 
this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
-                               assert(overcommit <= myDequeueCount);
-                               
+
                                tail = 
this->tailIndex.load(std::memory_order_acquire);
                                auto actualCount = static_cast<size_t>(tail - 
(myDequeueCount - overcommit));
                                if (details::circular_less_than<size_t>(0, 
actualCount)) {
@@ -2174,25 +2263,25 @@ private:
                                        if (actualCount < desiredCount) {
                                                
this->dequeueOvercommit.fetch_add(desiredCount - actualCount, 
std::memory_order_release);
                                        }
-                                       
+
                                        // Get the first index. Note that since 
there's guaranteed to be at least actualCount elements, this
                                        // will never exceed tail.
                                        auto firstIndex = 
this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
-                                       
+
                                        // Determine which block the first 
element is in
                                        auto localBlockIndex = 
blockIndex.load(std::memory_order_acquire);
                                        auto localBlockIndexHead = 
localBlockIndex->front.load(std::memory_order_acquire);
-                                       
+
                                        auto headBase = 
localBlockIndex->entries[localBlockIndexHead].base;
                                        auto firstBlockBaseIndex = firstIndex & 
~static_cast<index_t>(BLOCK_SIZE - 1);
-                                       auto offset = 
static_cast<size_t>(static_cast<typename 
std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
+                                       auto offset = 
static_cast<size_t>(static_cast<typename 
std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / 
static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
                                        auto indexIndex = (localBlockIndexHead 
+ offset) & (localBlockIndex->size - 1);
-                                       
+
                                        // Iterate the blocks and dequeue
                                        auto index = firstIndex;
                                        do {
                                                auto firstIndexInBlock = index;
-                                               auto endIndex = (index & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+                                               index_t endIndex = (index & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
                                                endIndex = 
details::circular_less_than<index_t>(firstIndex + 
static_cast<index_t>(actualCount), endIndex) ? firstIndex + 
static_cast<index_t>(actualCount) : endIndex;
                                                auto block = 
localBlockIndex->entries[indexIndex].block;
                                                if 
(MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = 
std::move((*(*block)[index])))) {
@@ -2224,19 +2313,19 @@ private:
                                                                        }
                                                                        
block->ConcurrentQueue::Block::template 
set_many_empty<explicit_context>(firstIndexInBlock, 
static_cast<size_t>(endIndex - firstIndexInBlock));
                                                                        
indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
-                                                                       
+
                                                                        
firstIndexInBlock = index;
                                                                        
endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + 
static_cast<index_t>(BLOCK_SIZE);
                                                                        
endIndex = details::circular_less_than<index_t>(firstIndex + 
static_cast<index_t>(actualCount), endIndex) ? firstIndex + 
static_cast<index_t>(actualCount) : endIndex;
                                                                } while (index 
!= firstIndex + actualCount);
-                                                               
+
                                                                
MOODYCAMEL_RETHROW;
                                                        }
                                                }
                                                
block->ConcurrentQueue::Block::template 
set_many_empty<explicit_context>(firstIndexInBlock, 
static_cast<size_t>(endIndex - firstIndexInBlock));
                                                indexIndex = (indexIndex + 1) & 
(localBlockIndex->size - 1);
                                        } while (index != firstIndex + 
actualCount);
-                                       
+
                                        return actualCount;
                                }
                                else {
@@ -2244,17 +2333,17 @@ private:
                                        
this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
                                }
                        }
-                       
+
                        return 0;
                }
-               
+
        private:
                struct BlockIndexEntry
                {
                        index_t base;
                        Block* block;
                };
-               
+
                struct BlockIndexHeader
                {
                        size_t size;
@@ -2262,12 +2351,12 @@ private:
                        BlockIndexEntry* entries;
                        void* prev;
                };
-               
-               
+
+
                bool new_block_index(size_t numberOfFilledSlotsToExpose)
                {
                        auto prevBlockSizeMask = pr_blockIndexSize - 1;
-                       
+
                        // Create the new block
                        pr_blockIndexSize <<= 1;
                        auto newRawPtr = 
static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + 
std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * 
pr_blockIndexSize));
@@ -2275,9 +2364,9 @@ private:
                                pr_blockIndexSize >>= 1;                // 
Reset to allow graceful retry
                                return false;
                        }
-                       
+
                        auto newBlockIndexEntries = 
reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr
 + sizeof(BlockIndexHeader)));
-                       
+
                        // Copy in all the old indices, if any
                        size_t j = 0;
                        if (pr_blockIndexSlotsUsed != 0) {
@@ -2287,72 +2376,72 @@ private:
                                        i = (i + 1) & prevBlockSizeMask;
                                } while (i != pr_blockIndexFront);
                        }
-                       
+
                        // Update everything
                        auto header = new (newRawPtr) BlockIndexHeader;
                        header->size = pr_blockIndexSize;
                        header->front.store(numberOfFilledSlotsToExpose - 1, 
std::memory_order_relaxed);
                        header->entries = newBlockIndexEntries;
                        header->prev = pr_blockIndexRaw;                // we 
link the new block to the old one so we can free it later
-                       
+
                        pr_blockIndexFront = j;
                        pr_blockIndexEntries = newBlockIndexEntries;
                        pr_blockIndexRaw = newRawPtr;
                        blockIndex.store(header, std::memory_order_release);
-                       
+
                        return true;
                }
-               
+
        private:
                std::atomic<BlockIndexHeader*> blockIndex;
-               
+
                // To be used by producer only -- consumer must use the ones in 
referenced by blockIndex
                size_t pr_blockIndexSlotsUsed;
                size_t pr_blockIndexSize;
                size_t pr_blockIndexFront;              // Next slot (not 
current)
                BlockIndexEntry* pr_blockIndexEntries;
                void* pr_blockIndexRaw;
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
        public:
                ExplicitProducer* nextExplicitProducer;
        private:
 #endif
-               
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
                friend struct MemStats;
 #endif
        };
-       
-       
+
+
        //////////////////////////////////
        // Implicit queue
        //////////////////////////////////
-       
+
        struct ImplicitProducer : public ProducerBase
-       {                       
-               ImplicitProducer(ConcurrentQueue* parent) :
-                       ProducerBase(parent, false),
+       {
+               ImplicitProducer(ConcurrentQueue* parent_) :
+                       ProducerBase(parent_, false),
                        nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
                        blockIndex(nullptr)
                {
                        new_block_index();
                }
-               
+
                ~ImplicitProducer()
                {
                        // Note that since we're in the destructor we can 
assume that all enqueue/dequeue operations
                        // completed already; this means that all undequeued 
elements are placed contiguously across
                        // contiguous blocks, and that only the first and last 
remaining blocks can be only partially
                        // empty (all other remaining blocks must be completely 
full).
-                       
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
                        // Unregister ourselves for thread termination 
notification
                        if (!this->inactive.load(std::memory_order_relaxed)) {
                                
details::ThreadExitNotifier::unsubscribe(&threadExitListener);
                        }
 #endif
-                       
+
                        // Destroy all remaining elements!
                        auto tail = 
this->tailIndex.load(std::memory_order_relaxed);
                        auto index = 
this->headIndex.load(std::memory_order_relaxed);
@@ -2365,10 +2454,10 @@ private:
                                                // Free the old block
                                                
this->parent->add_block_to_free_list(block);
                                        }
-                                       
+
                                        block = 
get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
                                }
-                               
+
                                ((*block)[index])->~T();
                                ++index;
                        }
@@ -2378,7 +2467,7 @@ private:
                        if (this->tailBlock != nullptr && (forceFreeLastBlock 
|| (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
                                
this->parent->add_block_to_free_list(this->tailBlock);
                        }
-                       
+
                        // Destroy block index
                        auto localBlockIndex = 
blockIndex.load(std::memory_order_relaxed);
                        if (localBlockIndex != nullptr) {
@@ -2393,7 +2482,7 @@ private:
                                } while (localBlockIndex != nullptr);
                        }
                }
-               
+
                template<AllocationMode allocMode, typename U>
                inline bool enqueue(U&& element)
                {
@@ -2406,7 +2495,7 @@ private:
                                if (!details::circular_less_than<index_t>(head, 
currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != 
details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || 
MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
                                        return false;
                                }
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                                debug::DebugLock lock(mutex);
 #endif
                                // Find out where we'll be inserting this block 
in the block index
@@ -2414,7 +2503,7 @@ private:
                                if 
(!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
                                        return false;
                                }
-                               
+
                                // Get ahold of a new block
                                auto newBlock = 
this->parent->ConcurrentQueue::template requisition_block<allocMode>();
                                if (newBlock == nullptr) {
@@ -2422,12 +2511,12 @@ private:
                                        idxEntry->value.store(nullptr, 
std::memory_order_relaxed);
                                        return false;
                                }
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
                                newBlock->owner = this;
 #endif
                                newBlock->ConcurrentQueue::Block::template 
reset_empty<implicit_context>();
-                               
-                               if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new 
(nullptr) T(std::forward<U>(element)))) {
+
+                               MOODYCAMEL_CONSTEXPR_IF 
(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) 
T(std::forward<U>(element)))) {
                                        // May throw, try to insert now before 
we publish the fact that we have this new block
                                        MOODYCAMEL_TRY {
                                                new 
((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
@@ -2439,25 +2528,25 @@ private:
                                                MOODYCAMEL_RETHROW;
                                        }
                                }
-                               
+
                                // Insert the new block into the index
                                idxEntry->value.store(newBlock, 
std::memory_order_relaxed);
-                               
+
                                this->tailBlock = newBlock;
-                               
-                               if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new 
(nullptr) T(std::forward<U>(element)))) {
+
+                               MOODYCAMEL_CONSTEXPR_IF 
(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) 
T(std::forward<U>(element)))) {
                                        this->tailIndex.store(newTailIndex, 
std::memory_order_release);
                                        return true;
                                }
                        }
-                       
+
                        // Enqueue
                        new ((*this->tailBlock)[currentTailIndex]) 
T(std::forward<U>(element));
-                       
+
                        this->tailIndex.store(newTailIndex, 
std::memory_order_release);
                        return true;
                }
-               
+
                template<typename U>
                bool dequeue(U& element)
                {
@@ -2466,22 +2555,21 @@ private:
                        index_t overcommit = 
this->dequeueOvercommit.load(std::memory_order_relaxed);
                        if 
(details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed)
 - overcommit, tail)) {
                                
std::atomic_thread_fence(std::memory_order_acquire);
-                               
+
                                index_t myDequeueCount = 
this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-                               assert(overcommit <= myDequeueCount);
                                tail = 
this->tailIndex.load(std::memory_order_acquire);
-                               if 
(details::likely(details::circular_less_than<index_t>(myDequeueCount - 
overcommit, tail))) {
+                               if 
((details::likely)(details::circular_less_than<index_t>(myDequeueCount - 
overcommit, tail))) {
                                        index_t index = 
this->headIndex.fetch_add(1, std::memory_order_acq_rel);
-                                       
+
                                        // Determine which block the element is 
in
                                        auto entry = 
get_block_index_entry_for_index(index);
-                                       
+
                                        // Dequeue
                                        auto block = 
entry->value.load(std::memory_order_relaxed);
                                        auto& el = *((*block)[index]);
-                                       
+
                                        if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, 
element = std::move(el))) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                                                // Note: Acquiring the mutex 
with every dequeue instead of only when a block
                                                // is released is very 
sub-optimal, but it is, after all, purely debug code.
                                                debug::DebugLock 
lock(producer->mutex);
@@ -2491,7 +2579,7 @@ private:
                                                        index_t index;
                                                        BlockIndexEntry* entry;
                                                        ConcurrentQueue* parent;
-                                                       
+
                                                        ~Guard()
                                                        {
                                                                
(*block)[index]->~T();
@@ -2501,16 +2589,16 @@ private:
                                                                }
                                                        }
                                                } guard = { block, index, 
entry, this->parent };
-                                               
-                                               element = std::move(el);
+
+                                               element = std::move(el); // 
NOLINT
                                        }
                                        else {
-                                               element = std::move(el);
-                                               el.~T();
-                                       
+                                               element = std::move(el); // 
NOLINT
+                                               el.~T(); // NOLINT
+
                                                if 
(block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
                                                        {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                                                                
debug::DebugLock lock(mutex);
 #endif
                                                                // Add the 
block back into the global free pool (and remove from block index)
@@ -2519,45 +2607,49 @@ private:
                                                        
this->parent->add_block_to_free_list(block);            // releases the above 
store
                                                }
                                        }
-                                       
+
                                        return true;
                                }
                                else {
                                        this->dequeueOvercommit.fetch_add(1, 
std::memory_order_release);
                                }
                        }
-               
+
                        return false;
                }
-               
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
                template<AllocationMode allocMode, typename It>
                bool enqueue_bulk(It itemFirst, size_t count)
                {
                        // First, we need to make sure we have enough room to 
enqueue all of the elements;
                        // this means pre-allocating blocks and putting them in 
the block index (but only if
                        // all the allocations succeeded).
-                       
+
                        // Note that the tailBlock we start off with may not be 
owned by us any more;
                        // this happens if it was filled up exactly to the top 
(setting tailIndex to
                        // the first index of the next block which is not yet 
allocated), then dequeued
                        // completely (putting it on the free list) before we 
enqueue again.
-                       
+
                        index_t startTailIndex = 
this->tailIndex.load(std::memory_order_relaxed);
                        auto startBlock = this->tailBlock;
                        Block* firstAllocatedBlock = nullptr;
                        auto endBlock = this->tailBlock;
-                       
+
                        // Figure out how many blocks we'll need to allocate, 
and do so
                        size_t blockBaseDiff = ((startTailIndex + count - 1) & 
~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & 
~static_cast<index_t>(BLOCK_SIZE - 1));
                        index_t currentTailIndex = (startTailIndex - 1) & 
~static_cast<index_t>(BLOCK_SIZE - 1);
                        if (blockBaseDiff > 0) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                                debug::DebugLock lock(mutex);
 #endif
                                do {
                                        blockBaseDiff -= 
static_cast<index_t>(BLOCK_SIZE);
                                        currentTailIndex += 
static_cast<index_t>(BLOCK_SIZE);
-                                       
+
                                        // Find out where we'll be inserting 
this block in the block index
                                        BlockIndexEntry* idxEntry = nullptr;  
// initialization here unnecessary but compiler can't always tell
                                        Block* newBlock;
@@ -2565,6 +2657,7 @@ private:
                                        auto head = 
this->headIndex.load(std::memory_order_relaxed);
                                        
assert(!details::circular_less_than<index_t>(currentTailIndex, head));
                                        bool full = 
!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || 
(MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && 
(MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - 
head));
+
                                        if (full || !(indexInserted = 
insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = 
this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == 
nullptr) {
                                                // Index allocation or block 
allocation failed; revert any other allocations
                                                // and index insertions done so 
far for this operation
@@ -2581,19 +2674,19 @@ private:
                                                }
                                                
this->parent->add_blocks_to_free_list(firstAllocatedBlock);
                                                this->tailBlock = startBlock;
-                                               
+
                                                return false;
                                        }
-                                       
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
                                        newBlock->owner = this;
 #endif
                                        
newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
                                        newBlock->next = nullptr;
-                                       
+
                                        // Insert the new block into the index
                                        idxEntry->value.store(newBlock, 
std::memory_order_relaxed);
-                                       
+
                                        // Store the chain of blocks so that we 
can undo if later allocations fail,
                                        // and so that we can find the blocks 
when we do the actual enqueueing
                                        if ((startTailIndex & 
static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
@@ -2605,7 +2698,7 @@ private:
                                        firstAllocatedBlock = 
firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
                                } while (blockBaseDiff > 0);
                        }
-                       
+
                        // Enqueue, one block at a time
                        index_t newTailIndex = startTailIndex + 
static_cast<index_t>(count);
                        currentTailIndex = startTailIndex;
@@ -2615,11 +2708,11 @@ private:
                                this->tailBlock = firstAllocatedBlock;
                        }
                        while (true) {
-                               auto stopIndex = (currentTailIndex & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+                               index_t stopIndex = (currentTailIndex & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
                                if 
(details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
                                        stopIndex = newTailIndex;
                                }
-                               if (MOODYCAMEL_NOEXCEPT_CTOR(T, 
decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+                               MOODYCAMEL_CONSTEXPR_IF 
(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new 
(static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
                                        while (currentTailIndex != stopIndex) {
                                                new 
((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
                                        }
@@ -2627,7 +2720,7 @@ private:
                                else {
                                        MOODYCAMEL_TRY {
                                                while (currentTailIndex != 
stopIndex) {
-                                                       new 
((*this->tailBlock)[currentTailIndex]) 
T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), 
new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+                                                       new 
((*this->tailBlock)[currentTailIndex]) 
T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new 
(static_cast<T*>(nullptr)) 
T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
                                                        ++currentTailIndex;
                                                        ++itemFirst;
                                                }
@@ -2635,7 +2728,7 @@ private:
                                        MOODYCAMEL_CATCH (...) {
                                                auto constructedStopIndex = 
currentTailIndex;
                                                auto lastBlockEnqueued = 
this->tailBlock;
-                                               
+
                                                if 
(!details::is_trivially_destructible<T>::value) {
                                                        auto block = startBlock;
                                                        if ((startTailIndex & 
static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
@@ -2656,7 +2749,7 @@ private:
                                                                block = 
block->next;
                                                        }
                                                }
-                                               
+
                                                currentTailIndex = 
(startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
                                                for (auto block = 
firstAllocatedBlock; block != nullptr; block = block->next) {
                                                        currentTailIndex += 
static_cast<index_t>(BLOCK_SIZE);
@@ -2669,7 +2762,7 @@ private:
                                                MOODYCAMEL_RETHROW;
                                        }
                                }
-                               
+
                                if (this->tailBlock == endBlock) {
                                        assert(currentTailIndex == 
newTailIndex);
                                        break;
@@ -2679,7 +2772,10 @@ private:
                        this->tailIndex.store(newTailIndex, 
std::memory_order_release);
                        return true;
                }
-               
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
                template<typename It>
                size_t dequeue_bulk(It& itemFirst, size_t max)
                {
@@ -2689,10 +2785,9 @@ private:
                        if (details::circular_less_than<size_t>(0, 
desiredCount)) {
                                desiredCount = desiredCount < max ? 
desiredCount : max;
                                
std::atomic_thread_fence(std::memory_order_acquire);
-                               
+
                                auto myDequeueCount = 
this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
-                               assert(overcommit <= myDequeueCount);
-                               
+
                                tail = 
this->tailIndex.load(std::memory_order_acquire);
                                auto actualCount = static_cast<size_t>(tail - 
(myDequeueCount - overcommit));
                                if (details::circular_less_than<size_t>(0, 
actualCount)) {
@@ -2700,20 +2795,20 @@ private:
                                        if (actualCount < desiredCount) {
                                                
this->dequeueOvercommit.fetch_add(desiredCount - actualCount, 
std::memory_order_release);
                                        }
-                                       
+
                                        // Get the first index. Note that since 
there's guaranteed to be at least actualCount elements, this
                                        // will never exceed tail.
                                        auto firstIndex = 
this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
-                                       
+
                                        // Iterate the blocks and dequeue
                                        auto index = firstIndex;
                                        BlockIndexHeader* localBlockIndex;
                                        auto indexIndex = 
get_block_index_index_for_index(index, localBlockIndex);
                                        do {
                                                auto blockStartIndex = index;
-                                               auto endIndex = (index & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+                                               index_t endIndex = (index & 
~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
                                                endIndex = 
details::circular_less_than<index_t>(firstIndex + 
static_cast<index_t>(actualCount), endIndex) ? firstIndex + 
static_cast<index_t>(actualCount) : endIndex;
-                                               
+
                                                auto entry = 
localBlockIndex->index[indexIndex];
                                                auto block = 
entry->value.load(std::memory_order_relaxed);
                                                if 
(MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = 
std::move((*(*block)[index])))) {
@@ -2741,27 +2836,27 @@ private:
                                                                        while 
(index != endIndex) {
                                                                                
(*block)[index++]->~T();
                                                                        }
-                                                                       
+
                                                                        if 
(block->ConcurrentQueue::Block::template 
set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex 
- blockStartIndex))) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                                                                                
debug::DebugLock lock(mutex);
 #endif
                                                                                
entry->value.store(nullptr, std::memory_order_relaxed);
                                                                                
this->parent->add_block_to_free_list(block);
                                                                        }
                                                                        
indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
-                                                                       
+
                                                                        
blockStartIndex = index;
                                                                        
endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + 
static_cast<index_t>(BLOCK_SIZE);
                                                                        
endIndex = details::circular_less_than<index_t>(firstIndex + 
static_cast<index_t>(actualCount), endIndex) ? firstIndex + 
static_cast<index_t>(actualCount) : endIndex;
                                                                } while (index 
!= firstIndex + actualCount);
-                                                               
+
                                                                
MOODYCAMEL_RETHROW;
                                                        }
                                                }
                                                if 
(block->ConcurrentQueue::Block::template 
set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex 
- blockStartIndex))) {
                                                        {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                                                                
debug::DebugLock lock(mutex);
 #endif
                                                                // Note that 
the set_many_empty above did a release, meaning that anybody who acquires the 
block
@@ -2772,27 +2867,27 @@ private:
                                                }
                                                indexIndex = (indexIndex + 1) & 
(localBlockIndex->capacity - 1);
                                        } while (index != firstIndex + 
actualCount);
-                                       
+
                                        return actualCount;
                                }
                                else {
                                        
this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
                                }
                        }
-                       
+
                        return 0;
                }
-               
+
        private:
                // The block size must be > 1, so any number with the low bit 
set is an invalid block base index
                static const index_t INVALID_BLOCK_BASE = 1;
-               
+
                struct BlockIndexEntry
                {
                        std::atomic<index_t> key;
                        std::atomic<Block*> value;
                };
-               
+
                struct BlockIndexHeader
                {
                        size_t capacity;
@@ -2801,7 +2896,7 @@ private:
                        BlockIndexEntry** index;
                        BlockIndexHeader* prev;
                };
-               
+
                template<AllocationMode allocMode>
                inline bool insert_block_index_entry(BlockIndexEntry*& 
idxEntry, index_t blockStartIndex)
                {
@@ -2809,45 +2904,50 @@ private:
                        if (localBlockIndex == nullptr) {
                                return false;  // this can happen if 
new_block_index failed in the constructor
                        }
-                       auto newTail = 
(localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & 
(localBlockIndex->capacity - 1);
+                       size_t newTail = 
(localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & 
(localBlockIndex->capacity - 1);
                        idxEntry = localBlockIndex->index[newTail];
                        if (idxEntry->key.load(std::memory_order_relaxed) == 
INVALID_BLOCK_BASE ||
                                idxEntry->value.load(std::memory_order_relaxed) 
== nullptr) {
-                               
+
                                idxEntry->key.store(blockStartIndex, 
std::memory_order_relaxed);
                                localBlockIndex->tail.store(newTail, 
std::memory_order_release);
                                return true;
                        }
-                       
+
                        // No room in the old block index, try to allocate 
another one!
-                       if (allocMode == CannotAlloc || !new_block_index()) {
+                       MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
                                return false;
                        }
-                       localBlockIndex = 
blockIndex.load(std::memory_order_relaxed);
-                       newTail = 
(localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & 
(localBlockIndex->capacity - 1);
-                       idxEntry = localBlockIndex->index[newTail];
-                       assert(idxEntry->key.load(std::memory_order_relaxed) == 
INVALID_BLOCK_BASE);
-                       idxEntry->key.store(blockStartIndex, 
std::memory_order_relaxed);
-                       localBlockIndex->tail.store(newTail, 
std::memory_order_release);
-                       return true;
+                       else if (!new_block_index()) {
+                               return false;
+                       }
+                       else {
+                               localBlockIndex = 
blockIndex.load(std::memory_order_relaxed);
+                               newTail = 
(localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & 
(localBlockIndex->capacity - 1);
+                               idxEntry = localBlockIndex->index[newTail];
+                               
assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+                               idxEntry->key.store(blockStartIndex, 
std::memory_order_relaxed);
+                               localBlockIndex->tail.store(newTail, 
std::memory_order_release);
+                               return true;
+                       }
                }
-               
+
                inline void rewind_block_index_tail()
                {
                        auto localBlockIndex = 
blockIndex.load(std::memory_order_relaxed);
                        
localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed)
 - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
                }
-               
+
                inline BlockIndexEntry* get_block_index_entry_for_index(index_t 
index) const
                {
                        BlockIndexHeader* localBlockIndex;
                        auto idx = get_block_index_index_for_index(index, 
localBlockIndex);
                        return localBlockIndex->index[idx];
                }
-               
+
                inline size_t get_block_index_index_for_index(index_t index, 
BlockIndexHeader*& localBlockIndex) const
                {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                        debug::DebugLock lock(mutex);
 #endif
                        index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
@@ -2857,12 +2957,12 @@ private:
                        assert(tailBase != INVALID_BLOCK_BASE);
                        // Note: Must use division instead of shift because the 
index may wrap around, causing a negative
                        // offset, whose negativity we want to preserve
-                       auto offset = static_cast<size_t>(static_cast<typename 
std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);
+                       auto offset = static_cast<size_t>(static_cast<typename 
std::make_signed<index_t>::type>(index - tailBase) / static_cast<typename 
std::make_signed<index_t>::type>(BLOCK_SIZE));
                        size_t idx = (tail + offset) & 
(localBlockIndex->capacity - 1);
                        
assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == 
index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != 
nullptr);
                        return idx;
                }
-               
+
                bool new_block_index()
                {
                        auto prev = blockIndex.load(std::memory_order_relaxed);
@@ -2875,7 +2975,7 @@ private:
                        if (raw == nullptr) {
                                return false;
                        }
-                       
+
                        auto header = new (raw) BlockIndexHeader;
                        auto entries = 
reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + 
sizeof(BlockIndexHeader)));
                        auto index = 
reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries)
 + sizeof(BlockIndexEntry) * entryCount));
@@ -2899,14 +2999,14 @@ private:
                        header->index = index;
                        header->capacity = nextBlockIndexCapacity;
                        header->tail.store((prevCapacity - 1) & 
(nextBlockIndexCapacity - 1), std::memory_order_relaxed);
-                       
+
                        blockIndex.store(header, std::memory_order_release);
-                       
+
                        nextBlockIndexCapacity <<= 1;
-                       
+
                        return true;
                }
-               
+
        private:
                size_t nextBlockIndexCapacity;
                std::atomic<BlockIndexHeader*> blockIndex;
@@ -2916,26 +3016,26 @@ private:
                details::ThreadExitListener threadExitListener;
        private:
 #endif
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
        public:
                ImplicitProducer* nextImplicitProducer;
        private:
 #endif
 
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                mutable debug::DebugMutex mutex;
 #endif
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
                friend struct MemStats;
 #endif
        };
-       
-       
+
+
        //////////////////////////////////
        // Block pool manipulation
        //////////////////////////////////
-       
+
        void populate_initial_block_list(size_t blockCount)
        {
                initialBlockPoolSize = blockCount;
@@ -2943,7 +3043,7 @@ private:
                        initialBlockPool = nullptr;
                        return;
                }
-               
+
                initialBlockPool = create_array<Block>(blockCount);
                if (initialBlockPool == nullptr) {
                        initialBlockPoolSize = 0;
@@ -2952,26 +3052,31 @@ private:
                        initialBlockPool[i].dynamicallyAllocated = false;
                }
        }
-       
+
        inline Block* try_get_block_from_initial_pool()
        {
                if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= 
initialBlockPoolSize) {
                        return nullptr;
                }
-               
+
                auto index = initialBlockPoolIndex.fetch_add(1, 
std::memory_order_relaxed);
-               
+
                return index < initialBlockPoolSize ? (initialBlockPool + 
index) : nullptr;
        }
-       
+
        inline void add_block_to_free_list(Block* block)
        {
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
                block->owner = nullptr;
 #endif
-               freeList.add(block);
+               if (!Traits::RECYCLE_ALLOCATED_BLOCKS && 
block->dynamicallyAllocated) {
+                       destroy(block);
+               }
+               else {
+                       freeList.add(block);
+               }
        }
-       
+
        inline void add_blocks_to_free_list(Block* block)
        {
                while (block != nullptr) {
@@ -2980,12 +3085,12 @@ private:
                        block = next;
                }
        }
-       
+
        inline Block* try_get_block_from_free_list()
        {
                return freeList.try_get();
        }
-       
+
        // Gets a free block from one of the memory pools, or allocates a new 
one (if applicable)
        template<AllocationMode canAlloc>
        Block* requisition_block()
@@ -2994,21 +3099,22 @@ private:
                if (block != nullptr) {
                        return block;
                }
-               
+
                block = try_get_block_from_free_list();
                if (block != nullptr) {
                        return block;
                }
-               
-               if (canAlloc == CanAlloc) {
+
+               MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
                        return create<Block>();
                }
-               
-               return nullptr;
+               else {
+                       return nullptr;
+               }
        }
-       
 
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
        public:
                struct MemStats {
                        size_t allocatedBlocks;
@@ -3023,28 +3129,28 @@ private:
                        size_t queueClassBytes;
                        size_t implicitBlockIndexBytes;
                        size_t explicitBlockIndexBytes;
-                       
+
                        friend class ConcurrentQueue;
-                       
+
                private:
                        static MemStats getFor(ConcurrentQueue* q)
                        {
                                MemStats stats = { 0 };
-                               
+
                                stats.elementsEnqueued = q->size_approx();
-                       
+
                                auto block = q->freeList.head_unsafe();
                                while (block != nullptr) {
                                        ++stats.allocatedBlocks;
                                        ++stats.freeBlocks;
                                        block = 
block->freeListNext.load(std::memory_order_relaxed);
                                }
-                               
+
                                for (auto ptr = 
q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = 
ptr->next_prod()) {
                                        bool implicit = 
dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
                                        stats.implicitProducers += implicit ? 1 
: 0;
                                        stats.explicitProducers += implicit ? 0 
: 1;
-                                       
+
                                        if (implicit) {
                                                auto prod = 
static_cast<ImplicitProducer*>(ptr);
                                                stats.queueClassBytes += 
sizeof(ImplicitProducer);
@@ -3092,18 +3198,18 @@ private:
                                                }
                                        }
                                }
-                               
+
                                auto freeOnInitialPool = 
q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= 
q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - 
q->initialBlockPoolIndex.load(std::memory_order_relaxed);
                                stats.allocatedBlocks += freeOnInitialPool;
                                stats.freeBlocks += freeOnInitialPool;
-                               
+
                                stats.blockClassBytes = sizeof(Block) * 
stats.allocatedBlocks;
                                stats.queueClassBytes += 
sizeof(ConcurrentQueue);
-                               
+
                                return stats;
                        }
                };
-               
+
                // For debugging only. Not thread-safe.
                MemStats getMemStats()
                {
@@ -3112,21 +3218,15 @@ private:
        private:
                friend struct MemStats;
 #endif
-       
-       
+
+
        //////////////////////////////////
        // Producer list manipulation
-       //////////////////////////////////      
-       
+       //////////////////////////////////
+
        ProducerBase* recycle_or_create_producer(bool isExplicit)
        {
-               bool recycled;
-               return recycle_or_create_producer(isExplicit, recycled);
-       }
-       
-       ProducerBase* recycle_or_create_producer(bool isExplicit, bool& 
recycled)
-       {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
                debug::DebugLock lock(implicitProdMutex);
 #endif
                // Try to re-use one first
@@ -3135,31 +3235,29 @@ private:
                                bool expected = true;
                                if 
(ptr->inactive.compare_exchange_strong(expected, /* desired */ false, 
std::memory_order_acquire, std::memory_order_relaxed)) {
                                        // We caught one! It's been marked as 
activated, the caller can have it
-                                       recycled = true;
                                        return ptr;
                                }
                        }
                }
-               
-               recycled = false;
+
                return add_producer(isExplicit ? 
static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : 
create<ImplicitProducer>(this));
        }
-       
+
        ProducerBase* add_producer(ProducerBase* producer)
        {
                // Handle failed memory allocation
                if (producer == nullptr) {
                        return nullptr;
                }
-               
+
                producerCount.fetch_add(1, std::memory_order_relaxed);
-               
+
                // Add it to the lock-free list
                auto prevTail = 
producerListTail.load(std::memory_order_relaxed);
                do {
                        producer->next = prevTail;
                } while (!producerListTail.compare_exchange_weak(prevTail, 
producer, std::memory_order_release, std::memory_order_relaxed));
-               
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
                if (producer->isExplicit) {
                        auto prevTailExplicit = 
explicitProducers.load(std::memory_order_relaxed);
@@ -3174,10 +3272,10 @@ private:
                        } while 
(!implicitProducers.compare_exchange_weak(prevTailImplicit, 
static_cast<ImplicitProducer*>(producer), std::memory_order_release, 
std::memory_order_relaxed));
                }
 #endif
-               
+
                return producer;
        }
-       
+
        void reown_producers()
        {
                // After another instance is moved-into/swapped-with this one, 
all the
@@ -3187,31 +3285,31 @@ private:
                        ptr->parent = this;
                }
        }
-       
-       
+
+
        //////////////////////////////////
        // Implicit producer hash
        //////////////////////////////////
-       
+
        struct ImplicitProducerKVP
        {
                std::atomic<details::thread_id_t> key;
                ImplicitProducer* value;                // No need for 
atomicity since it's only read by the thread that sets it in the first place
-               
+
                ImplicitProducerKVP() : value(nullptr) { }
-               
+
                ImplicitProducerKVP(ImplicitProducerKVP&& other) 
MOODYCAMEL_NOEXCEPT
                {
                        key.store(other.key.load(std::memory_order_relaxed), 
std::memory_order_relaxed);
                        value = other.value;
                }
-               
+
                inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& 
other) MOODYCAMEL_NOEXCEPT
                {
                        swap(other);
                        return *this;
                }
-               
+
                inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
                {
                        if (this != &other) {
@@ -3220,93 +3318,100 @@ private:
                        }
                }
        };
-       
+
        template<typename XT, typename XTraits>
        friend void moodycamel::swap(typename ConcurrentQueue<XT, 
XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, 
XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
-       
+
        struct ImplicitProducerHash
        {
                size_t capacity;
                ImplicitProducerKVP* entries;
                ImplicitProducerHash* prev;
        };
-       
+
        inline void populate_initial_implicit_producer_hash()
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-               
-               implicitProducerHashCount.store(0, std::memory_order_relaxed);
-               auto hash = &initialImplicitProducerHash;
-               hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-               hash->entries = &initialImplicitProducerHashEntries[0];
-               for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; 
++i) {
-                       
initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, 
std::memory_order_relaxed);
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) {
+                       return;
+               }
+               else {
+                       implicitProducerHashCount.store(0, 
std::memory_order_relaxed);
+                       auto hash = &initialImplicitProducerHash;
+                       hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+                       hash->entries = &initialImplicitProducerHashEntries[0];
+                       for (size_t i = 0; i != 
INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+                               
initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, 
std::memory_order_relaxed);
+                       }
+                       hash->prev = nullptr;
+                       implicitProducerHash.store(hash, 
std::memory_order_relaxed);
                }
-               hash->prev = nullptr;
-               implicitProducerHash.store(hash, std::memory_order_relaxed);
        }
-       
+
        void swap_implicit_producer_hashes(ConcurrentQueue& other)
        {
-               if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-               
-               // Swap (assumes our implicit producer hash is initialized)
-               
initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
-               initialImplicitProducerHash.entries = 
&initialImplicitProducerHashEntries[0];
-               other.initialImplicitProducerHash.entries = 
&other.initialImplicitProducerHashEntries[0];
-               
-               details::swap_relaxed(implicitProducerHashCount, 
other.implicitProducerHashCount);
-               
-               details::swap_relaxed(implicitProducerHash, 
other.implicitProducerHash);
-               if (implicitProducerHash.load(std::memory_order_relaxed) == 
&other.initialImplicitProducerHash) {
-                       
implicitProducerHash.store(&initialImplicitProducerHash, 
std::memory_order_relaxed);
+               MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 
0) {
+                       return;
                }
                else {
-                       ImplicitProducerHash* hash;
-                       for (hash = 
implicitProducerHash.load(std::memory_order_relaxed); hash->prev != 
&other.initialImplicitProducerHash; hash = hash->prev) {
-                               continue;
+                       // Swap (assumes our implicit producer hash is 
initialized)
+                       
initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+                       initialImplicitProducerHash.entries = 
&initialImplicitProducerHashEntries[0];
+                       other.initialImplicitProducerHash.entries = 
&other.initialImplicitProducerHashEntries[0];
+
+                       details::swap_relaxed(implicitProducerHashCount, 
other.implicitProducerHashCount);
+
+                       details::swap_relaxed(implicitProducerHash, 
other.implicitProducerHash);
+                       if 
(implicitProducerHash.load(std::memory_order_relaxed) == 
&other.initialImplicitProducerHash) {
+                               
implicitProducerHash.store(&initialImplicitProducerHash, 
std::memory_order_relaxed);
                        }
-                       hash->prev = &initialImplicitProducerHash;
-               }
-               if (other.implicitProducerHash.load(std::memory_order_relaxed) 
== &initialImplicitProducerHash) {
-                       
other.implicitProducerHash.store(&other.initialImplicitProducerHash, 
std::memory_order_relaxed);
-               }
-               else {
-                       ImplicitProducerHash* hash;
-                       for (hash = 
other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != 
&initialImplicitProducerHash; hash = hash->prev) {
-                               continue;
+                       else {
+                               ImplicitProducerHash* hash;
+                               for (hash = 
implicitProducerHash.load(std::memory_order_relaxed); hash->prev != 
&other.initialImplicitProducerHash; hash = hash->prev) {
+                                       continue;
+                               }
+                               hash->prev = &initialImplicitProducerHash;
+                       }
+                       if 
(other.implicitProducerHash.load(std::memory_order_relaxed) == 
&initialImplicitProducerHash) {
+                               
other.implicitProducerHash.store(&other.initialImplicitProducerHash, 
std::memory_order_relaxed);
+                       }
+                       else {
+                               ImplicitProducerHash* hash;
+                               for (hash = 
other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != 
&initialImplicitProducerHash; hash = hash->prev) {
+                                       continue;
+                               }
+                               hash->prev = &other.initialImplicitProducerHash;
                        }
-                       hash->prev = &other.initialImplicitProducerHash;
                }
        }
-       
+
        // Only fails (returns nullptr) if memory allocation fails
        ImplicitProducer* get_or_add_implicit_producer()
        {
                // Note that since the data is essentially thread-local (key is 
thread ID),
                // there's a reduced need for fences (memory ordering is 
already consistent
                // for any individual thread), except for the current table 
itself.
-               
+
                // Start by looking for the thread ID in the current and all 
previous hash tables.
                // If it's not found, it must not be in there yet, since this 
same thread would
                // have added it previously to one of the tables that we 
traversed.
-               
+
                // Code and algorithm adapted from 
http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
-               
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
                debug::DebugLock lock(implicitProdMutex);
 #endif
-               
+
                auto id = details::thread_id();
                auto hashedId = details::hash_thread_id(id);
-               
+
                auto mainHash = 
implicitProducerHash.load(std::memory_order_acquire);
+               assert(mainHash != nullptr);  // silence clang-tidy and MSVC 
warnings (hash cannot be null)
                for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
                        // Look for the id in this hash
                        auto index = hashedId;
                        while (true) {          // Not an infinite loop because 
at least one slot is free in the hash table
-                               index &= hash->capacity - 1;
-                               
+                               index &= hash->capacity - 1u;
+
                                auto probedKey = 
hash->entries[index].key.load(std::memory_order_relaxed);
                                if (probedKey == id) {
                                        // Found it! If we had to search 
several hashes deep, though, we should lazily add it
@@ -3318,15 +3423,14 @@ private:
                                        if (hash != mainHash) {
                                                index = hashedId;
                                                while (true) {
-                                                       index &= 
mainHash->capacity - 1;
-                                                       probedKey = 
mainHash->entries[index].key.load(std::memory_order_relaxed);
+                                                       index &= 
mainHash->capacity - 1u;
                                                        auto empty = 
details::invalid_thread_id;
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
                                                        auto reusable = 
details::invalid_thread_id2;
-                                                       if ((probedKey == empty 
   && mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_relaxed, std::memory_order_relaxed)) ||
-                                                               (probedKey == 
reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, 
std::memory_order_acquire, std::memory_order_acquire))) {
+                                                       if 
(mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_seq_cst, std::memory_order_relaxed) ||
+                                                               
mainHash->entries[index].key.compare_exchange_strong(reusable, id, 
std::memory_order_seq_cst, std::memory_order_relaxed)) {
 #else
-                                                       if ((probedKey == empty 
   && mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_relaxed, std::memory_order_relaxed))) {
+                                                       if 
(mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_seq_cst, std::memory_order_relaxed)) {
 #endif
                                                                
mainHash->entries[index].value = value;
                                                                break;
@@ -3334,7 +3438,7 @@ private:
                                                        ++index;
                                                }
                                        }
-                                       
+
                                        return value;
                                }
                                if (probedKey == details::invalid_thread_id) {
@@ -3343,10 +3447,11 @@ private:
                                ++index;
                        }
                }
-               
+
                // Insert!
                auto newCount = 1 + implicitProducerHashCount.fetch_add(1, 
std::memory_order_relaxed);
                while (true) {
+                       // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
                        if (newCount >= (mainHash->capacity >> 1) && 
!implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
                                // We've acquired the resize lock, try to 
allocate a bigger hash table.
                                // Note the acquire fence synchronizes with the 
release fence at the end of this block, and hence when
@@ -3354,20 +3459,20 @@ private:
                                // locked block).
                                mainHash = 
implicitProducerHash.load(std::memory_order_acquire);
                                if (newCount >= (mainHash->capacity >> 1)) {
-                                       auto newCapacity = mainHash->capacity 
<< 1;
+                                       size_t newCapacity = mainHash->capacity 
<< 1;
                                        while (newCount >= (newCapacity >> 1)) {
                                                newCapacity <<= 1;
                                        }
                                        auto raw = 
static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + 
std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) 
* newCapacity));
                                        if (raw == nullptr) {
                                                // Allocation failed
-                                               
implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);
+                                               
implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
                                                
implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
                                                return nullptr;
                                        }
-                                       
+
                                        auto newHash = new (raw) 
ImplicitProducerHash;
-                                       newHash->capacity = newCapacity;
+                                       newHash->capacity = 
static_cast<size_t>(newCapacity);
                                        newHash->entries = 
reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw
 + sizeof(ImplicitProducerHash)));
                                        for (size_t i = 0; i != newCapacity; 
++i) {
                                                new (newHash->entries + i) 
ImplicitProducerKVP;
@@ -3382,40 +3487,36 @@ private:
                                        
implicitProducerHashResizeInProgress.clear(std::memory_order_release);
                                }
                        }
-                       
+
                        // If it's < three-quarters full, add to the old one 
anyway so that we don't have to wait for the next table
                        // to finish being allocated by another thread (and if 
we just finished allocating above, the condition will
                        // always be true)
                        if (newCount < (mainHash->capacity >> 1) + 
(mainHash->capacity >> 2)) {
-                               bool recycled;
-                               auto producer = 
static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled));
+                               auto producer = 
static_cast<ImplicitProducer*>(recycle_or_create_producer(false));
                                if (producer == nullptr) {
-                                       implicitProducerHashCount.fetch_add(-1, 
std::memory_order_relaxed);
+                                       implicitProducerHashCount.fetch_sub(1, 
std::memory_order_relaxed);
                                        return nullptr;
                                }
-                               if (recycled) {
-                                       implicitProducerHashCount.fetch_add(-1, 
std::memory_order_relaxed);
-                               }
-                               
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
                                producer->threadExitListener.callback = 
&ConcurrentQueue::implicit_producer_thread_exited_callback;
                                producer->threadExitListener.userData = 
producer;
                                
details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
 #endif
-                               
+
                                auto index = hashedId;
                                while (true) {
-                                       index &= mainHash->capacity - 1;
-                                       auto probedKey = 
mainHash->entries[index].key.load(std::memory_order_relaxed);
-                                       
+                                       index &= mainHash->capacity - 1u;
                                        auto empty = details::invalid_thread_id;
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
                                        auto reusable = 
details::invalid_thread_id2;
-                                       if ((probedKey == empty    && 
mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_relaxed, std::memory_order_relaxed)) ||
-                                               (probedKey == reusable && 
mainHash->entries[index].key.compare_exchange_strong(reusable, id, 
std::memory_order_acquire, std::memory_order_acquire))) {
-#else
-                                       if ((probedKey == empty    && 
mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_relaxed, std::memory_order_relaxed))) {
+                                       if 
(mainHash->entries[index].key.compare_exchange_strong(reusable, id, 
std::memory_order_seq_cst, std::memory_order_relaxed)) {
+                                               
implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);  // already 
counted as a used slot
+                                               mainHash->entries[index].value 
= producer;
+                                               break;
+                                       }
 #endif
+                                       if 
(mainHash->entries[index].key.compare_exchange_strong(empty,    id, 
std::memory_order_seq_cst, std::memory_order_relaxed)) {
                                                mainHash->entries[index].value 
= producer;
                                                break;
                                        }
@@ -3423,22 +3524,19 @@ private:
                                }
                                return producer;
                        }
-                       
+
                        // Hmm, the old hash is quite full and somebody else is 
busy allocating a new one.
                        // We need to wait for the allocating thread to finish 
(if it succeeds, we add, if not,
                        // we try to allocate ourselves).
                        mainHash = 
implicitProducerHash.load(std::memory_order_acquire);
                }
        }
-       
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
        void implicit_producer_thread_exited(ImplicitProducer* producer)
        {
-               // Remove from thread exit listeners
-               
details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
-               
                // Remove from hash
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
                debug::DebugLock lock(implicitProdMutex);
 #endif
                auto hash = 
implicitProducerHash.load(std::memory_order_acquire);
@@ -3446,26 +3544,25 @@ private:
                auto id = details::thread_id();
                auto hashedId = details::hash_thread_id(id);
                details::thread_id_t probedKey;
-               
+
                // We need to traverse all the hashes just in case other 
threads aren't on the current one yet and are
                // trying to add an entry thinking there's a free slot (because 
they reused a producer)
                for (; hash != nullptr; hash = hash->prev) {
                        auto index = hashedId;
                        do {
-                               index &= hash->capacity - 1;
-                               probedKey = 
hash->entries[index].key.load(std::memory_order_relaxed);
-                               if (probedKey == id) {
-                                       
hash->entries[index].key.store(details::invalid_thread_id2, 
std::memory_order_release);
+                               index &= hash->capacity - 1u;
+                               probedKey = id;
+                               if 
(hash->entries[index].key.compare_exchange_strong(probedKey, 
details::invalid_thread_id2, std::memory_order_seq_cst, 
std::memory_order_relaxed)) {
                                        break;
                                }
                                ++index;
                        } while (probedKey != details::invalid_thread_id);      
        // Can happen if the hash has changed but we weren't put back in it 
yet, or if we weren't added to this hash in the first place
                }
-               
+
                // Mark the queue as being recyclable
                producer->inactive.store(true, std::memory_order_release);
        }
-       
+
        static void implicit_producer_thread_exited_callback(void* userData)
        {
                auto producer = static_cast<ImplicitProducer*>(userData);
@@ -3473,88 +3570,109 @@ private:
                queue->implicit_producer_thread_exited(producer);
        }
 #endif
-       
+
        //////////////////////////////////
        // Utility functions
        //////////////////////////////////
-       
+
+       template<typename TAlign>
+       static inline void* aligned_malloc(size_t size)
+       {
+               MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= 
std::alignment_of<details::max_align_t>::value)
+                       return (Traits::malloc)(size);
+               else {
+                       size_t alignment = std::alignment_of<TAlign>::value;
+                       void* raw = (Traits::malloc)(size + alignment - 1 + 
sizeof(void*));
+                       if (!raw)
+                               return nullptr;
+                       char* ptr = 
details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
+                       *(reinterpret_cast<void**>(ptr) - 1) = raw;
+                       return ptr;
+               }
+       }
+
+       template<typename TAlign>
+       static inline void aligned_free(void* ptr)
+       {
+               MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= 
std::alignment_of<details::max_align_t>::value)
+                       return (Traits::free)(ptr);
+               else
+                       (Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 
1) : nullptr);
+       }
+
        template<typename U>
        static inline U* create_array(size_t count)
        {
                assert(count > 0);
-               auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count));
-               if (p == nullptr) {
+               U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
+               if (p == nullptr)
                        return nullptr;
-               }
-               
-               for (size_t i = 0; i != count; ++i) {
+
+               for (size_t i = 0; i != count; ++i)
                        new (p + i) U();
-               }
                return p;
        }
-       
+
        template<typename U>
        static inline void destroy_array(U* p, size_t count)
        {
                if (p != nullptr) {
                        assert(count > 0);
-                       for (size_t i = count; i != 0; ) {
+                       for (size_t i = count; i != 0; )
                                (p + --i)->~U();
-                       }
-                       (Traits::free)(p);
                }
+               aligned_free<U>(p);
        }
-       
+
        template<typename U>
        static inline U* create()
        {
-               auto p = (Traits::malloc)(sizeof(U));
+               void* p = aligned_malloc<U>(sizeof(U));
                return p != nullptr ? new (p) U : nullptr;
        }
-       
+
        template<typename U, typename A1>
        static inline U* create(A1&& a1)
        {
-               auto p = (Traits::malloc)(sizeof(U));
+               void* p = aligned_malloc<U>(sizeof(U));
                return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
        }
-       
+
        template<typename U>
        static inline void destroy(U* p)
        {
-               if (p != nullptr) {
+               if (p != nullptr)
                        p->~U();
-               }
-               (Traits::free)(p);
+               aligned_free<U>(p);
        }
 
 private:
        std::atomic<ProducerBase*> producerListTail;
        std::atomic<std::uint32_t> producerCount;
-       
+
        std::atomic<size_t> initialBlockPoolIndex;
        Block* initialBlockPool;
        size_t initialBlockPoolSize;
-       
-#if !MCDBGQ_USEDEBUGFREELIST
+
+#ifndef MCDBGQ_USEDEBUGFREELIST
        FreeList<Block> freeList;
 #else
        debug::DebugFreeList<Block> freeList;
 #endif
-       
+
        std::atomic<ImplicitProducerHash*> implicitProducerHash;
        std::atomic<size_t> implicitProducerHashCount;          // Number of 
slots logically used
        ImplicitProducerHash initialImplicitProducerHash;
        std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> 
initialImplicitProducerHashEntries;
        std::atomic_flag implicitProducerHashResizeInProgress;
-       
+
        std::atomic<std::uint32_t> nextExplicitConsumerId;
        std::atomic<std::uint32_t> globalExplicitConsumerOffset;
-       
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
        debug::DebugMutex implicitProdMutex;
 #endif
-       
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
        std::atomic<ExplicitProducer*> explicitProducers;
        std::atomic<ImplicitProducer*> implicitProducers;
@@ -3585,7 +3703,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& 
queue)
        : itemsConsumedFromCurrent(0), currentProducer(nullptr), 
desiredProducer(nullptr)
 {
        initialOffset = queue.nextExplicitConsumerId.fetch_add(1, 
std::memory_order_release);
-       lastKnownGlobalOffset = -1;
+       lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
 }
 
 template<typename T, typename Traits>
@@ -3593,7 +3711,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, 
Traits>& queue)
        : itemsConsumedFromCurrent(0), currentProducer(nullptr), 
desiredProducer(nullptr)
 {
        initialOffset = reinterpret_cast<ConcurrentQueue<T, 
Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, 
std::memory_order_release);
-       lastKnownGlobalOffset = -1;
+       lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
 }
 
 template<typename T, typename Traits>
@@ -3620,6 +3738,10 @@ inline void swap(typename ConcurrentQueue<T, 
Traits>::ImplicitProducerKVP& a, ty
 
 }
 
-#if defined(__GNUC__)
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #pragma GCC diagnostic pop
 #endif


Reply via email to