This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit fb0a5889bbb2d83ef8f15f400ed47be3a7392c04
Author: Pindikura Ravindra <[email protected]>
AuthorDate: Fri Sep 28 23:23:42 2018 +0530

    [Gandiva] allow multiple module instances in cache
    
    - this is a workaround to reduce lock contention for regex exprs
---
 cpp/src/gandiva/cache.h               |  2 +-
 cpp/src/gandiva/filter_cache_key.h    | 21 ++++++++++++++++++---
 cpp/src/gandiva/projector_cache_key.h | 20 +++++++++++++++++++-
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h
index 20fc641..13f9a89 100644
--- a/cpp/src/gandiva/cache.h
+++ b/cpp/src/gandiva/cache.h
@@ -44,7 +44,7 @@ class Cache {
 
  private:
   LruCache<KeyType, ValueType> cache_;
-  static const int CACHE_SIZE = 100;
+  static const int CACHE_SIZE = 250;
   std::mutex mtx_;
 };
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/filter_cache_key.h 
b/cpp/src/gandiva/filter_cache_key.h
index 825f0ee..9b0e402 100644
--- a/cpp/src/gandiva/filter_cache_key.h
+++ b/cpp/src/gandiva/filter_cache_key.h
@@ -20,9 +20,9 @@
 
 #include <memory>
 #include <string>
+#include <thread>
 
 #include "boost/functional/hash.hpp"
-
 #include "gandiva/arrow.h"
 #include "gandiva/filter.h"
 
@@ -30,14 +30,16 @@ namespace gandiva {
 class FilterCacheKey {
  public:
   FilterCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> 
configuration,
-                 Expression& expression)
-      : schema_(schema), configuration_(configuration) {
+                 Expression &expression)
+      : schema_(schema), configuration_(configuration), uniqifier_(0) {
     static const int kSeedValue = 4;
     size_t result = kSeedValue;
     expression_as_string_ = expression.ToString();
+    UpdateUniqifier(expression_as_string_);
     boost::hash_combine(result, expression_as_string_);
     boost::hash_combine(result, configuration);
     boost::hash_combine(result, schema_->ToString());
+    boost::hash_combine(result, uniqifier_);
     hash_code_ = result;
   }
 
@@ -56,6 +58,10 @@ class FilterCacheKey {
     if (expression_as_string_ != other.expression_as_string_) {
       return false;
     }
+
+    if (uniqifier_ != other.uniqifier_) {
+      return false;
+    }
     return true;
   }
 
@@ -77,10 +83,19 @@ class FilterCacheKey {
   }
 
  private:
+  void UpdateUniqifier(const std::string expr) {
+    // caching of expressions with re2 patterns causes lock contention. So, use
+    // multiple instances to reduce contention.
+    if (expr.find(" like(") != std::string::npos) {
+      uniqifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) % 
16;
+    }
+  }
+
   const SchemaPtr schema_;
   const std::shared_ptr<Configuration> configuration_;
   std::string expression_as_string_;
   size_t hash_code_;
+  uint32_t uniqifier_;
 };
 }  // namespace gandiva
 #endif  // GANDIVA_FILTER_CACHE_KEY_H
diff --git a/cpp/src/gandiva/projector_cache_key.h 
b/cpp/src/gandiva/projector_cache_key.h
index 1d73c33..260d6b8 100644
--- a/cpp/src/gandiva/projector_cache_key.h
+++ b/cpp/src/gandiva/projector_cache_key.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <vector>
 #include <algorithm>
+#include <thread>
 
 #include "gandiva/arrow.h"
 #include "gandiva/projector.h"
@@ -31,16 +32,18 @@ class ProjectorCacheKey {
  public:
   ProjectorCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> 
configuration,
                     ExpressionVector expression_vector)
-      : schema_(schema), configuration_(configuration) {
+      : schema_(schema), configuration_(configuration), uniqifier_(0) {
     static const int kSeedValue = 4;
     size_t result = kSeedValue;
     for (auto& expr : expression_vector) {
       std::string expr_as_string = expr->ToString();
       expressions_as_strings_.push_back(expr_as_string);
       boost::hash_combine(result, expr_as_string);
+      UpdateUniqifier(expr_as_string);
     }
     boost::hash_combine(result, configuration);
     boost::hash_combine(result, schema_->ToString());
+    boost::hash_combine(result, uniqifier_);
     hash_code_ = result;
   }
 
@@ -59,6 +62,10 @@ class ProjectorCacheKey {
     if (expressions_as_strings_ != other.expressions_as_strings_) {
       return false;
     }
+
+    if (uniqifier_ != other.uniqifier_) {
+      return false;
+    }
     return true;
   }
 
@@ -91,10 +98,21 @@ class ProjectorCacheKey {
   }
 
  private:
+  void UpdateUniqifier(const std::string expr) {
+    if (uniqifier_ == 0) {
+      // caching of expressions with re2 patterns causes lock contention. So, 
use
+      // multiple instances to reduce contention.
+      if (expr.find(" like(") != std::string::npos) {
+        uniqifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) 
% 16;
+      }
+    }
+  }
+
   const SchemaPtr schema_;
   const std::shared_ptr<Configuration> configuration_;
   std::vector<std::string> expressions_as_strings_;
   size_t hash_code_;
+  uint32_t uniqifier_;
 };
 }  // namespace gandiva
 #endif  // GANDIVA_PROJECTOR_CACHE_KEY_H

Reply via email to