This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 060432595886bc9660cdea1a7c0c615f24f44017
Author: Joe McDonnell <joemcdonn...@cloudera.com>
AuthorDate: Tue Mar 25 16:13:20 2025 -0700

    IMPALA-13901: Make histogram/sample/group_concat ineligible for tuple 
caching
    
    "histogram" and "sample" rely on the same code as "appx_median"
    and incorporate randomness. "group_concat" is entirely dependent
    on the order of inputs. This expands tuple caching's list of
    ineligible functions to include these variable aggregate functions.
    
    Testing:
     - Added test cases to TupleCacheTest for these functions
    
    Change-Id: Ibb017dd4297a3bb219ffed2bd9abc94e6c5b2605
    Reviewed-on: http://gerrit.cloudera.org:8080/22668
    Reviewed-by: Michael Smith <michael.sm...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Reviewed-by: Yida Wu <wydbaggio...@gmail.com>
---
 .../java/org/apache/impala/analysis/FunctionCallExpr.java   |  4 +++-
 .../test/java/org/apache/impala/planner/TupleCacheTest.java | 13 ++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java 
b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
index ee9e44121..e6e303ba5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
@@ -413,7 +413,9 @@ public class FunctionCallExpr extends Expr {
             "coordinator", "current_database", "current_session", 
"current_user",
             "effective_user", "logged_in_user", "pid", "user", "version",
             // Sampling aggregate functions
-            "appx_median",
+            "appx_median", "histogram", "sample",
+            // Order sensitive aggregate functions
+            "group_concat",
             // AI Functions
             "ai_generate_text", "ai_generate_text_default");
     return functionNameInBuiltinSet(fnName_, knownNondeterministicFns);
diff --git a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java 
b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java
index 507b3dbb4..34221fa6e 100644
--- a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java
@@ -183,9 +183,16 @@ public class TupleCacheTest extends PlannerTestBase {
         "from (select * from functional.alltypesagg where id % 100 = day order 
by id " +
         "limit 99999) a group by day";
     verifyNIdenticalCacheKeys(groupConcatGroupAgg, groupConcatGroupAgg, 1);
-    // Only scan is cached, appx_median disables caching on aggregate.
-    String appxMedianAgg = "select appx_median(tinyint_col) from 
functional.alltypesagg";
-    verifyNIdenticalCacheKeys(appxMedianAgg, appxMedianAgg, 1);
+
+    // Only scan is cached, variable aggregate disable caching above that.
+    for (String aggFn : Arrays.asList("appx_median", "histogram", "sample")) {
+      String variableAggQuery =
+          String.format("select %s(tinyint_col) from functional.alltypesagg", 
aggFn);
+      verifyNIdenticalCacheKeys(variableAggQuery, variableAggQuery, 1);
+    }
+    String groupConcatOnlyScan =
+      "select group_concat(string_col) from functional.alltypes";
+    verifyNIdenticalCacheKeys(groupConcatOnlyScan, groupConcatOnlyScan, 1);
   }
 
   @Test

Reply via email to