Copilot commented on code in PR #60362:
URL: https://github.com/apache/doris/pull/60362#discussion_r2780415465
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java:
##########
@@ -700,6 +730,91 @@ protected Expression
processCompoundNewChildren(CompoundPredicate cp, List<Expre
}
}
+ private boolean isEnableVariantSchemaAutoCast(ExpressionRewriteContext
context) {
+ if (context == null || context.cascadesContext == null) {
+ return false;
+ }
+ SessionVariable sessionVariable =
context.cascadesContext.getConnectContext().getSessionVariable();
+ if (sessionVariable == null ||
!sessionVariable.isEnableVariantSchemaAutoCast()) {
+ return false;
+ }
+ return sessionVariable.isEnableVariantSchemaAutoCast();
+ }
+
+ private Expression wrapVariantElementAtWithCast(Expression expr) {
+ ElementAt elementAt = (ElementAt) expr;
+ if (suppressVariantElementAtCastDepth > 0) {
+ return elementAt;
+ }
Review Comment:
wrapVariantElementAtWithCast assumes the input is always an ElementAt and
unconditionally casts `expr` to ElementAt. This can throw ClassCastException
when auto-cast is enabled and maybeCastAliasExpression passes an Alias whose
child is not ElementAt (e.g., StructElement from struct dereference). Consider
guarding with `instanceof ElementAt` (and ideally also ensuring the resolved
root type is VariantType) and returning `expr` unchanged when it doesn't apply.
##########
be/src/vec/common/variant_util.cpp:
##########
@@ -102,6 +103,162 @@
namespace doris::vectorized::variant_util {
#include "common/compile_check_begin.h"
+inline void append_escaped_regex_char(std::string* regex_output, char ch) {
+ switch (ch) {
+ case '.':
+ case '^':
+ case '$':
+ case '+':
+ case '*':
+ case '?':
+ case '(':
+ case ')':
+ case '|':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case '\\':
+ regex_output->push_back('\\');
+ regex_output->push_back(ch);
+ break;
+ default:
+ regex_output->push_back(ch);
+ break;
+ }
+}
+
+// Small LRU to cap compiled glob patterns
+constexpr size_t kGlobRegexCacheCapacity = 256;
+
+struct GlobRegexCacheEntry {
+ std::shared_ptr<RE2> re2;
+ std::list<std::string>::iterator lru_it;
+};
+
+std::mutex g_glob_regex_cache_mutex;
+std::list<std::string> g_glob_regex_cache_lru;
+std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
Review Comment:
The glob regex cache globals have external linkage
(`g_glob_regex_cache_mutex`, `g_glob_regex_cache_lru`, `g_glob_regex_cache`).
Since they're only used in this translation unit, consider marking them
`static` or moving them into an anonymous namespace to avoid unintended symbol
exports and reduce the chance of name collisions.
```suggestion
static std::mutex g_glob_regex_cache_mutex;
static std::list<std::string> g_glob_regex_cache_lru;
static std::unordered_map<std::string, GlobRegexCacheEntry>
g_glob_regex_cache;
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java:
##########
@@ -211,6 +212,23 @@ private void checkMetricTypeIsUsedCorrectly(Plan plan) {
}
}
+ private boolean containsVariantTypeOutsideCast(Expression expr) {
+ return containsVariantTypeOutsideCast(expr, false);
+ }
+
+ private boolean containsVariantTypeOutsideCast(Expression expr, boolean
underCast) {
+ boolean nextUnderCast = underCast || expr instanceof Cast;
Review Comment:
containsVariantTypeOutsideCast treats any Cast node as a safe boundary and
ignores variant-typed expressions under it. This can incorrectly allow join
equal conditions that still evaluate to VARIANT (e.g., a cast whose target type
is VariantType / identity casts), bypassing the intended restriction. Consider
only suppressing the check when the Cast's result type is non-variant (or
checking `expr instanceof Cast && !expr.getDataType().isVariantType()`).
```suggestion
boolean nextUnderCast = underCast || (expr instanceof Cast &&
!expr.getDataType().isVariantType());
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java:
##########
@@ -700,6 +730,91 @@ protected Expression
processCompoundNewChildren(CompoundPredicate cp, List<Expre
}
}
+ private boolean isEnableVariantSchemaAutoCast(ExpressionRewriteContext
context) {
+ if (context == null || context.cascadesContext == null) {
+ return false;
+ }
+ SessionVariable sessionVariable =
context.cascadesContext.getConnectContext().getSessionVariable();
+ if (sessionVariable == null ||
!sessionVariable.isEnableVariantSchemaAutoCast()) {
+ return false;
+ }
+ return sessionVariable.isEnableVariantSchemaAutoCast();
Review Comment:
isEnableVariantSchemaAutoCast() redundantly checks and then returns
`sessionVariable.isEnableVariantSchemaAutoCast()` twice. This can be simplified
to a single return expression to reduce noise and avoid future inconsistencies
if the logic changes.
```suggestion
return sessionVariable != null &&
sessionVariable.isEnableVariantSchemaAutoCast();
```
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java:
##########
@@ -700,6 +730,91 @@ protected Expression
processCompoundNewChildren(CompoundPredicate cp, List<Expre
}
}
+ private boolean isEnableVariantSchemaAutoCast(ExpressionRewriteContext
context) {
+ if (context == null || context.cascadesContext == null) {
+ return false;
+ }
+ SessionVariable sessionVariable =
context.cascadesContext.getConnectContext().getSessionVariable();
+ if (sessionVariable == null ||
!sessionVariable.isEnableVariantSchemaAutoCast()) {
+ return false;
+ }
+ return sessionVariable.isEnableVariantSchemaAutoCast();
+ }
+
+ private Expression wrapVariantElementAtWithCast(Expression expr) {
+ ElementAt elementAt = (ElementAt) expr;
+ if (suppressVariantElementAtCastDepth > 0) {
+ return elementAt;
+ }
+ Optional<VariantElementAtPath> path =
resolveVariantElementAtPath(elementAt);
+ if (!path.isPresent()) {
+ return expr;
+ }
+ VariantType variantType = (VariantType) path.get().root.getDataType();
+ Optional<VariantField> matchingField =
variantType.findMatchingField(path.get().path);
+ if (!matchingField.isPresent()) {
+ return expr;
+ }
+ DataType targetType = matchingField.get().getDataType();
+ return new Cast(elementAt, targetType);
+ }
+
+ private Optional<VariantElementAtPath>
resolveVariantElementAtPath(ElementAt elementAt) {
+ List<String> segments = new ArrayList<>();
+ Expression current = elementAt;
+ Expression root = null;
+ while (current instanceof ElementAt) {
+ ElementAt currentElementAt = (ElementAt) current;
+ Optional<String> key = getVariantPathKey(currentElementAt.right());
+ if (!key.isPresent()) {
+ return Optional.empty();
+ }
+ segments.add(0, key.get());
+ Expression left = currentElementAt.left();
+ if (left instanceof Cast && !((Cast) left).isExplicitType()) {
+ left = ((Cast) left).child();
+ }
+ current = left;
+ root = left;
+ }
+ if (root == null || !(root.getDataType() instanceof VariantType)) {
+ return Optional.empty();
+ }
+ if (segments.isEmpty()) {
+ return Optional.empty();
+ }
+ return Optional.of(new VariantElementAtPath(root, String.join(".",
segments)));
+ }
+
+ private Optional<String> getVariantPathKey(Expression expr) {
+ if (expr instanceof StringLikeLiteral) {
+ return Optional.of(((StringLikeLiteral) expr).getStringValue());
+ }
+ return Optional.empty();
+ }
+
+ private static final class VariantElementAtPath {
+ private final Expression root;
+ private final String path;
+
+ private VariantElementAtPath(Expression root, String path) {
+ this.root = root;
+ this.path = path;
+ }
+ }
+
+ private Expression maybeCastAliasExpression(Alias alias,
ExpressionRewriteContext context) {
+ if (suppressVariantElementAtCastDepth > 0 ||
!isEnableVariantSchemaAutoCast(context)) {
+ return alias;
+ }
+ Expression child = alias.child();
+ Expression casted = wrapVariantElementAtWithCast(child);
Review Comment:
maybeCastAliasExpression calls wrapVariantElementAtWithCast(alias.child())
without checking the child expression type. When Alias wraps non-ElementAt
expressions (e.g., StructElement from nested struct access), this will crash
due to the cast inside wrapVariantElementAtWithCast. Add a type check (e.g.,
only attempt when child is ElementAt) or make wrapVariantElementAtWithCast
safely no-op for non-ElementAt inputs.
```suggestion
Expression casted = child;
if (child instanceof ElementAt) {
casted = wrapVariantElementAtWithCast(child);
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]