ffacs commented on code in PR #2453:
URL: https://github.com/apache/orc/pull/2453#discussion_r2468037589
##########
c++/src/sargs/SargsApplier.cc:
##########
@@ -185,4 +202,97 @@ namespace orc {
}
return fileStatsEvalResult_;
}
+
+ TruthValue SargsApplier::evaluateDictionaryForColumn(const StringDictionary&
dictionary,
+ const PredicateLeaf&
leaf) const {
+ // Only handle IN expressions for dictionary filtering
+ if (leaf.getOperator() != PredicateLeaf::Operator::IN) {
+ return TruthValue::YES_NO_NULL;
+ }
+
+ const std::vector<Literal>& literals = leaf.getLiteralList();
+ if (literals.empty()) {
+ return TruthValue::YES_NO_NULL;
+ }
+
+ // Pre-compute string views for literals to avoid repeated function calls
+ std::vector<std::string_view> literalViews;
+ literalViews.reserve(literals.size());
+ for (const auto& literal : literals) {
+ literalViews.emplace_back(literal.getStringView());
+ }
+
+ // Check if any dictionary entry matches any literal in the IN list
+ const int64_t* offsets = dictionary.dictionaryOffset.data();
+ const char* blob = dictionary.dictionaryBlob.data();
+ size_t dictSize = dictionary.dictionaryOffset.size() - 1;
+
+ for (size_t i = 0; i < dictSize; ++i) {
+ int64_t start = offsets[i];
+ int64_t length = offsets[i + 1] - start;
+ std::string_view dictEntry(blob + start, static_cast<size_t>(length));
+
+ // Check if this dictionary entry matches any literal in the IN list
+ for (const auto& literalView : literalViews) {
+ if (dictEntry == literalView) {
+ // Found a match - stripe might contain matching rows
+ return TruthValue::YES;
Review Comment:
We can only return TruthValue::YES when **all** rows satisfied the
predication.
##########
c++/src/sargs/SargsApplier.cc:
##########
@@ -185,4 +202,97 @@ namespace orc {
}
return fileStatsEvalResult_;
}
+
+ TruthValue SargsApplier::evaluateDictionaryForColumn(const StringDictionary&
dictionary,
+ const PredicateLeaf&
leaf) const {
+ // Only handle IN expressions for dictionary filtering
+ if (leaf.getOperator() != PredicateLeaf::Operator::IN) {
+ return TruthValue::YES_NO_NULL;
+ }
+
+ const std::vector<Literal>& literals = leaf.getLiteralList();
+ if (literals.empty()) {
+ return TruthValue::YES_NO_NULL;
+ }
+
+ // Pre-compute string views for literals to avoid repeated function calls
+ std::vector<std::string_view> literalViews;
+ literalViews.reserve(literals.size());
+ for (const auto& literal : literals) {
+ literalViews.emplace_back(literal.getStringView());
+ }
+
+ // Check if any dictionary entry matches any literal in the IN list
+ const int64_t* offsets = dictionary.dictionaryOffset.data();
+ const char* blob = dictionary.dictionaryBlob.data();
+ size_t dictSize = dictionary.dictionaryOffset.size() - 1;
+
+ for (size_t i = 0; i < dictSize; ++i) {
+ int64_t start = offsets[i];
+ int64_t length = offsets[i + 1] - start;
+ std::string_view dictEntry(blob + start, static_cast<size_t>(length));
+
+ // Check if this dictionary entry matches any literal in the IN list
+ for (const auto& literalView : literalViews) {
+ if (dictEntry == literalView) {
+ // Found a match - stripe might contain matching rows
+ return TruthValue::YES;
+ }
+ }
+ }
+
+ // No dictionary entry matches any literal in the IN list - skip stripe
+ return TruthValue::NO;
Review Comment:
ditto,We can only return TruthValue::NO when **no** rows satisfied the
predication.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]