This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 42cb805 ARROW-10392: [C++][Gandiva] Avoid string copy while
evaluating IN expression
42cb805 is described below
commit 42cb805e67e01b73394248e390053eff8e09af40
Author: Prudhvi Porandla <[email protected]>
AuthorDate: Wed Oct 28 13:52:45 2020 +0530
ARROW-10392: [C++][Gandiva] Avoid string copy while evaluating IN expression
Closes #8530 from pprudhvi/stringalloc
Authored-by: Prudhvi Porandla <[email protected]>
Signed-off-by: Pindikura Ravindra <[email protected]>
---
cpp/src/gandiva/gdv_function_stubs.cc | 2 +-
cpp/src/gandiva/in_holder.h | 27 +++++++++++++++++++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc
b/cpp/src/gandiva/gdv_function_stubs.cc
index ad93ce8..53253b0 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -94,7 +94,7 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char*
data, int data_len,
}
gandiva::InHolder<std::string>* holder =
reinterpret_cast<gandiva::InHolder<std::string>*>(ptr);
- return holder->HasValue(std::string(data, data_len));
+ return holder->HasValue(arrow::util::string_view(data, data_len));
}
int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr,
diff --git a/cpp/src/gandiva/in_holder.h b/cpp/src/gandiva/in_holder.h
index 3e18c25..034745b 100644
--- a/cpp/src/gandiva/in_holder.h
+++ b/cpp/src/gandiva/in_holder.h
@@ -20,6 +20,7 @@
#include <string>
#include <unordered_set>
+#include "arrow/util/hashing.h"
#include "gandiva/arrow.h"
#include "gandiva/gandiva_aliases.h"
@@ -42,4 +43,30 @@ class InHolder {
std::unordered_set<Type> values_;
};
+template <>
+class InHolder<std::string> {
+ public:
+ explicit InHolder(std::unordered_set<std::string> values) :
values_(std::move(values)) {
+ values_lookup_.max_load_factor(0.25f);
+ for (const std::string& value : values_) {
+ values_lookup_.emplace(value);
+ }
+ }
+
+ bool HasValue(arrow::util::string_view value) const {
+ return values_lookup_.count(value) == 1;
+ }
+
+ private:
+ struct string_view_hash {
+ public:
+ std::size_t operator()(arrow::util::string_view v) const {
+ return arrow::internal::ComputeStringHash<0>(v.data(), v.length());
+ }
+ };
+
+ std::unordered_set<arrow::util::string_view, string_view_hash>
values_lookup_;
+ const std::unordered_set<std::string> values_;
+};
+
} // namespace gandiva