This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
commit fb0a5889bbb2d83ef8f15f400ed47be3a7392c04 Author: Pindikura Ravindra <[email protected]> AuthorDate: Fri Sep 28 23:23:42 2018 +0530 [Gandiva] allow multiple module instances in cache - this is a workaround to reduce lock contention for regex exprs --- cpp/src/gandiva/cache.h | 2 +- cpp/src/gandiva/filter_cache_key.h | 21 ++++++++++++++++++--- cpp/src/gandiva/projector_cache_key.h | 20 +++++++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h index 20fc641..13f9a89 100644 --- a/cpp/src/gandiva/cache.h +++ b/cpp/src/gandiva/cache.h @@ -44,7 +44,7 @@ class Cache { private: LruCache<KeyType, ValueType> cache_; - static const int CACHE_SIZE = 100; + static const int CACHE_SIZE = 250; std::mutex mtx_; }; } // namespace gandiva diff --git a/cpp/src/gandiva/filter_cache_key.h b/cpp/src/gandiva/filter_cache_key.h index 825f0ee..9b0e402 100644 --- a/cpp/src/gandiva/filter_cache_key.h +++ b/cpp/src/gandiva/filter_cache_key.h @@ -20,9 +20,9 @@ #include <memory> #include <string> +#include <thread> #include "boost/functional/hash.hpp" - #include "gandiva/arrow.h" #include "gandiva/filter.h" @@ -30,14 +30,16 @@ namespace gandiva { class FilterCacheKey { public: FilterCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> configuration, - Expression& expression) - : schema_(schema), configuration_(configuration) { + Expression &expression) + : schema_(schema), configuration_(configuration), uniqifier_(0) { static const int kSeedValue = 4; size_t result = kSeedValue; expression_as_string_ = expression.ToString(); + UpdateUniqifier(expression_as_string_); boost::hash_combine(result, expression_as_string_); boost::hash_combine(result, configuration); boost::hash_combine(result, schema_->ToString()); + boost::hash_combine(result, uniqifier_); hash_code_ = result; } @@ -56,6 +58,10 @@ class FilterCacheKey { if (expression_as_string_ != other.expression_as_string_) { return false; } + + if (uniqifier_ != other.uniqifier_) { + return false; + } return true; } @@ -77,10 +83,19 @@ class FilterCacheKey { } private: + void UpdateUniqifier(const std::string expr) { + // caching of expressions with re2 patterns causes lock contention. So, use + // multiple instances to reduce contention. + if (expr.find(" like(") != std::string::npos) { + uniqifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) % 16; + } + } + const SchemaPtr schema_; const std::shared_ptr<Configuration> configuration_; std::string expression_as_string_; size_t hash_code_; + uint32_t uniqifier_; }; } // namespace gandiva #endif // GANDIVA_FILTER_CACHE_KEY_H diff --git a/cpp/src/gandiva/projector_cache_key.h b/cpp/src/gandiva/projector_cache_key.h index 1d73c33..260d6b8 100644 --- a/cpp/src/gandiva/projector_cache_key.h +++ b/cpp/src/gandiva/projector_cache_key.h @@ -22,6 +22,7 @@ #include <string> #include <vector> #include <algorithm> +#include <thread> #include "gandiva/arrow.h" #include "gandiva/projector.h" @@ -31,16 +32,18 @@ class ProjectorCacheKey { public: ProjectorCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> configuration, ExpressionVector expression_vector) - : schema_(schema), configuration_(configuration) { + : schema_(schema), configuration_(configuration), uniqifier_(0) { static const int kSeedValue = 4; size_t result = kSeedValue; for (auto& expr : expression_vector) { std::string expr_as_string = expr->ToString(); expressions_as_strings_.push_back(expr_as_string); boost::hash_combine(result, expr_as_string); + UpdateUniqifier(expr_as_string); } boost::hash_combine(result, configuration); boost::hash_combine(result, schema_->ToString()); + boost::hash_combine(result, uniqifier_); hash_code_ = result; } @@ -59,6 +62,10 @@ class ProjectorCacheKey { if (expressions_as_strings_ != other.expressions_as_strings_) { return false; } + + if (uniqifier_ != other.uniqifier_) { + return false; + } return true; } @@ -91,10 +98,21 @@ class ProjectorCacheKey { } private: + void UpdateUniqifier(const std::string expr) { + if (uniqifier_ == 0) { + // caching of expressions with re2 patterns causes lock contention. So, use + // multiple instances to reduce contention. + if (expr.find(" like(") != std::string::npos) { + uniqifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) % 16; + } + } + } + const SchemaPtr schema_; const std::shared_ptr<Configuration> configuration_; std::vector<std::string> expressions_as_strings_; size_t hash_code_; + uint32_t uniqifier_; }; } // namespace gandiva #endif // GANDIVA_PROJECTOR_CACHE_KEY_H
