zhztheplayer commented on code in PR #11711:
URL:
https://github.com/apache/incubator-gluten/pull/11711#discussion_r2904278185
##########
cpp/velox/operators/functions/SparkExprToSubfieldFilterParser.cc:
##########
@@ -16,11 +16,64 @@
*/
#include "operators/functions/SparkExprToSubfieldFilterParser.h"
+#include "velox/common/base/BloomFilter.h"
+#include "velox/expression/Expr.h"
+#include "velox/vector/ComplexVector.h"
+
namespace gluten {
using namespace facebook::velox;
namespace {
+
+VectorPtr toConstant(const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator) {
+ auto exprSet = evaluator->compile(expr);
+ if (!exprSet->exprs()[0]->isConstantExpr()) {
+ return nullptr;
+ }
+ RowVector input(evaluator->pool(), ROW({}, {}), nullptr, 1,
std::vector<VectorPtr>{});
+ SelectivityVector rows(1);
+ VectorPtr result;
+ try {
+ evaluator->evaluate(exprSet.get(), rows, input, result);
+ } catch (const VeloxUserError&) {
+ return nullptr;
+ }
+ return result;
+}
+
+/// Filter backed by Velox's BloomFilter<> serialized data from
bloom_filter_agg.
+class SparkBloomFilter final : public common::Filter {
Review Comment:
Let's use `SparkMightContain` or so to align with Spark's function name.
##########
cpp/velox/operators/functions/SparkExprToSubfieldFilterParser.cc:
##########
@@ -16,11 +16,64 @@
*/
#include "operators/functions/SparkExprToSubfieldFilterParser.h"
+#include "velox/common/base/BloomFilter.h"
+#include "velox/expression/Expr.h"
+#include "velox/vector/ComplexVector.h"
+
namespace gluten {
using namespace facebook::velox;
namespace {
+
+VectorPtr toConstant(const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator) {
+ auto exprSet = evaluator->compile(expr);
+ if (!exprSet->exprs()[0]->isConstantExpr()) {
+ return nullptr;
+ }
+ RowVector input(evaluator->pool(), ROW({}, {}), nullptr, 1,
std::vector<VectorPtr>{});
+ SelectivityVector rows(1);
+ VectorPtr result;
+ try {
+ evaluator->evaluate(exprSet.get(), rows, input, result);
+ } catch (const VeloxUserError&) {
+ return nullptr;
+ }
Review Comment:
Why error is swallowed?
##########
cpp/velox/operators/functions/SparkExprToSubfieldFilterParser.cc:
##########
@@ -16,11 +16,64 @@
*/
#include "operators/functions/SparkExprToSubfieldFilterParser.h"
+#include "velox/common/base/BloomFilter.h"
+#include "velox/expression/Expr.h"
+#include "velox/vector/ComplexVector.h"
+
namespace gluten {
using namespace facebook::velox;
namespace {
+
+VectorPtr toConstant(const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator) {
+ auto exprSet = evaluator->compile(expr);
+ if (!exprSet->exprs()[0]->isConstantExpr()) {
+ return nullptr;
+ }
+ RowVector input(evaluator->pool(), ROW({}, {}), nullptr, 1,
std::vector<VectorPtr>{});
+ SelectivityVector rows(1);
+ VectorPtr result;
+ try {
+ evaluator->evaluate(exprSet.get(), rows, input, result);
+ } catch (const VeloxUserError&) {
+ return nullptr;
+ }
+ return result;
+}
+
+/// Filter backed by Velox's BloomFilter<> serialized data from
bloom_filter_agg.
+class SparkBloomFilter final : public common::Filter {
+ public:
+ SparkBloomFilter(std::vector<char> serializedData, bool nullAllowed)
+ : Filter(true, nullAllowed,
common::FilterKind::kBigintValuesUsingBloomFilter),
+ serializedData_(std::move(serializedData)) {}
+
+ bool testInt64(int64_t value) const final {
+ return BloomFilter<>::mayContain(serializedData_.data(),
folly::hasher<int64_t>()(value));
Review Comment:
We can create a bloom filter object as class member, since
`bloomFilter.mayContain` is faster than `BloomFilter<>::mayContain`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]