This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 42cb805  ARROW-10392: [C++][Gandiva] Avoid string copy while 
evaluating IN expression
42cb805 is described below

commit 42cb805e67e01b73394248e390053eff8e09af40
Author: Prudhvi Porandla <[email protected]>
AuthorDate: Wed Oct 28 13:52:45 2020 +0530

    ARROW-10392: [C++][Gandiva] Avoid string copy while evaluating IN expression
    
    Closes #8530 from pprudhvi/stringalloc
    
    Authored-by: Prudhvi Porandla <[email protected]>
    Signed-off-by: Pindikura Ravindra <[email protected]>
---
 cpp/src/gandiva/gdv_function_stubs.cc |  2 +-
 cpp/src/gandiva/in_holder.h           | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/gdv_function_stubs.cc 
b/cpp/src/gandiva/gdv_function_stubs.cc
index ad93ce8..53253b0 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -94,7 +94,7 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* 
data, int data_len,
   }
   gandiva::InHolder<std::string>* holder =
       reinterpret_cast<gandiva::InHolder<std::string>*>(ptr);
-  return holder->HasValue(std::string(data, data_len));
+  return holder->HasValue(arrow::util::string_view(data, data_len));
 }
 
 int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr,
diff --git a/cpp/src/gandiva/in_holder.h b/cpp/src/gandiva/in_holder.h
index 3e18c25..034745b 100644
--- a/cpp/src/gandiva/in_holder.h
+++ b/cpp/src/gandiva/in_holder.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <unordered_set>
 
+#include "arrow/util/hashing.h"
 #include "gandiva/arrow.h"
 #include "gandiva/gandiva_aliases.h"
 
@@ -42,4 +43,30 @@ class InHolder {
   std::unordered_set<Type> values_;
 };
 
+template <>
+class InHolder<std::string> {
+ public:
+  explicit InHolder(std::unordered_set<std::string> values) : 
values_(std::move(values)) {
+    values_lookup_.max_load_factor(0.25f);
+    for (const std::string& value : values_) {
+      values_lookup_.emplace(value);
+    }
+  }
+
+  bool HasValue(arrow::util::string_view value) const {
+    return values_lookup_.count(value) == 1;
+  }
+
+ private:
+  struct string_view_hash {
+   public:
+    std::size_t operator()(arrow::util::string_view v) const {
+      return arrow::internal::ComputeStringHash<0>(v.data(), v.length());
+    }
+  };
+
+  std::unordered_set<arrow::util::string_view, string_view_hash> 
values_lookup_;
+  const std::unordered_set<std::string> values_;
+};
+
 }  // namespace gandiva

Reply via email to