This is an automated email from the ASF dual-hosted git repository.
csy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/auron.git
The following commit(s) were added to refs/heads/master by this push:
new beb06381 [AURON #1763] Fix unexpected invocation of UDF (#1787)
beb06381 is described below
commit beb06381a355d6fa5c41129493428d4b55f5fdcb
Author: cxzl25 <[email protected]>
AuthorDate: Fri Dec 26 14:43:54 2025 +0800
[AURON #1763] Fix unexpected invocation of UDF (#1787)
# Which issue does this PR close?
Closes #1763
# Rationale for this change
Datafusion's BinaryExpr needs to meet a certain ratio before it can
perform short-circuit calculation. There is an optimization in Auron
that can perform short-circuit calculation.
https://github.com/apache/datafusion/blob/33ac70dd6d634da040cc34abd414425b176a2b99/datafusion/physical-expr/src/expressions/binary.rs#L641-L644
```rust
/// Based on the results calculated from the left side of the short-circuit
operation,
/// if the proportion of `true` is less than 0.2 and the current operation
is an `and`,
/// the `RecordBatch` will be filtered in advance.
const PRE_SELECTION_THRESHOLD: f32 = 0.2;
```
# What changes are included in this PR?
# Are there any user-facing changes?
Added the configuration `spark.auron.forceShortCircuitAndOr`
The default is false. When setting true, the short-circuit operation is
forced.
# How was this patch tested?
local test
---
.../auron/spark/configuration/SparkAuronConfiguration.java | 7 +++++++
.../main/scala/org/apache/spark/sql/auron/NativeConverters.scala | 9 +++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git
a/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java
b/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java
index 8d29a0bc..b503b59f 100644
---
a/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java
+++
b/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java
@@ -238,6 +238,13 @@ public class SparkAuronConfiguration extends
AuronConfiguration {
.booleanType()
.defaultValue(false);
+ public static final ConfigOption<Boolean> FORCE_SHORT_CIRCUIT_AND_OR =
ConfigOptions.key(
+ "auron.forceShortCircuitAndOr")
+ .description("force using short-circuit evaluation
(PhysicalSCAndExprNode/PhysicalSCOrExprNode) "
+ + "for And/Or expressions, regardless of whether rhs
contains HiveUDF. ")
+ .booleanType()
+ .defaultValue(false);
+
private final SparkConf sparkConf;
public SparkAuronConfiguration(SparkConf conf) {
diff --git
a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
index b137aea8..79803555 100644
---
a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
+++
b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
@@ -770,7 +770,10 @@ object NativeConverters extends Logging {
}
// if rhs is complex in and/or operators, use short-circuiting
implementation
- case And(lhs, rhs) if rhs.find(HiveUDFUtil.isHiveUDF).isDefined =>
+ // or if forceShortCircuitAndOr is enabled, always use short-circuiting
+ case And(lhs, rhs)
+ if
sparkAuronConfig.getBoolean(SparkAuronConfiguration.FORCE_SHORT_CIRCUIT_AND_OR)
+ || rhs.find(HiveUDFUtil.isHiveUDF).isDefined =>
buildExprNode {
_.setScAndExpr(
pb.PhysicalSCAndExprNode
@@ -778,7 +781,9 @@ object NativeConverters extends Logging {
.setLeft(convertExprWithFallback(lhs, isPruningExpr, fallback))
.setRight(convertExprWithFallback(rhs, isPruningExpr, fallback)))
}
- case Or(lhs, rhs) if rhs.find(HiveUDFUtil.isHiveUDF).isDefined =>
+ case Or(lhs, rhs)
+ if
sparkAuronConfig.getBoolean(SparkAuronConfiguration.FORCE_SHORT_CIRCUIT_AND_OR)
+ || rhs.find(HiveUDFUtil.isHiveUDF).isDefined =>
buildExprNode {
_.setScOrExpr(
pb.PhysicalSCOrExprNode