This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


The following commit(s) were added to refs/heads/main by this push:
     new decaba17e63 ORCA: reject functions with prosupport in DXL translation
decaba17e63 is described below

commit decaba17e63bd30d7e35e1319bdd6d9c00f19e2f
Author: Jianghua Yang <yjhj...@gmail.com>
AuthorDate: Wed Aug 20 02:05:57 2025 +0000

    ORCA: reject functions with prosupport in DXL translation
    
    ORCA does not currently implement PostgreSQL's prosupport mechanism.
    Support functions (prosupport) provide planner-time optimization hints
    in the upstream planner, but ORCA has no equivalent logic. To avoid
    incorrect query plans, functions with prosupport are now rejected during
    DXL translation.
---
 .../regress/expected/misc_functions_optimizer.out  | 50 ++++++++++----------
 src/backend/gpopt/gpdbwrappers.cpp                 | 25 ++++++++++
 .../gpopt/translate/CTranslatorRelcacheToDXL.cpp   | 17 +++++++
 src/include/gpopt/gpdbwrappers.h                   |  6 +++
 .../regress/expected/misc_functions_optimizer.out  | 54 +++++++++++-----------
 5 files changed, 98 insertions(+), 54 deletions(-)

diff --git 
a/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out 
b/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out
index 8cdcba95fb1..56138548e88 100644
--- a/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out
@@ -235,7 +235,7 @@ WHERE my_int_eq(a.unique2, 42);
          ->  Hash
                ->  Seq Scan on tenk1 a
                      Filter: my_int_eq(unique2, 42)
- Optimizer: Pivotal Optimizer (GPORCA)
+ Optimizer: GPORCA
 (8 rows)
 
 -- With support function that knows it's int4eq, we get a different plan
@@ -243,17 +243,16 @@ ALTER FUNCTION my_int_eq(int, int) SUPPORT 
test_support_func;
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
 WHERE my_int_eq(a.unique2, 42);
-                      QUERY PLAN                      
-------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Hash Join
-         Hash Cond: (b.unique1 = a.unique1)
-         ->  Seq Scan on tenk1 b
-         ->  Hash
-               ->  Seq Scan on tenk1 a
-                     Filter: my_int_eq(unique2, 42)
- Optimizer: Pivotal Optimizer (GPORCA)
-(8 rows)
+   ->  Nested Loop
+         ->  Seq Scan on tenk1 a
+               Filter: my_int_eq(unique2, 42)
+         ->  Index Scan using tenk1_unique1 on tenk1 b
+               Index Cond: (unique1 = a.unique1)
+ Optimizer: Postgres query optimizer
+(7 rows)
 
 -- Also test non-default rowcount estimate
 CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
@@ -262,27 +261,26 @@ CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF 
integer
   SUPPORT test_support_func;
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                  QUERY PLAN                  
+----------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
-         Hash Cond: (a.unique1 = my_gen_series.my_gen_series)
-         ->  Seq Scan on tenk1 a
+         Hash Cond: (g.g = a.unique1)
+         ->  Function Scan on my_gen_series g
          ->  Hash
-               ->  Function Scan on my_gen_series
- Optimizer: Pivotal Optimizer (GPORCA)
+               ->  Seq Scan on tenk1 a
+ Optimizer: Postgres query optimizer
 (7 rows)
 
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN my_gen_series(1,5) g ON a.unique1 = g;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Hash Join
-         Hash Cond: (a.unique1 = my_gen_series.my_gen_series)
-         ->  Seq Scan on tenk1 a
-         ->  Hash
-               ->  Function Scan on my_gen_series
- Optimizer: Pivotal Optimizer (GPORCA)
-(7 rows)
+   ->  Nested Loop
+         ->  Function Scan on my_gen_series g
+         ->  Index Scan using tenk1_unique1 on tenk1 a
+               Index Cond: (unique1 = g.g)
+ Optimizer: Postgres query optimizer
+(6 rows)
 
diff --git a/src/backend/gpopt/gpdbwrappers.cpp 
b/src/backend/gpopt/gpdbwrappers.cpp
index 1deb53883d5..4e636a0c653 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -49,6 +49,7 @@ extern "C" {
 #include "partitioning/partdesc.h"
 #include "storage/lmgr.h"
 #include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/partcache.h"
 }
@@ -346,6 +347,30 @@ gpdb::FuncStability(Oid funcid)
        return '\0';
 }
 
+RegProcedure
+gpdb::FuncSupport(Oid funcid)
+{
+       GP_WRAP_START;
+       {
+               /* catalog tables: pg_proc */
+               return get_func_support(funcid);
+       }
+       GP_WRAP_END;
+       return InvalidOid;
+}
+
+Oid
+gpdb::FuncNamespace(Oid funcid)
+{
+       GP_WRAP_START;
+       {
+               /* catalog tables: pg_proc */
+               return get_func_namespace(funcid);
+       }
+       GP_WRAP_END;
+       return InvalidOid;
+}
+
 char
 gpdb::FuncExecLocation(Oid funcid)
 {
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index d0d57338b78..469d69fb60f 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -20,6 +20,7 @@ extern "C" {
 #include "catalog/heap.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_namespace.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_statistic_ext.h"
@@ -1437,6 +1438,22 @@ CTranslatorRelcacheToDXL::LookupFuncProps(
 
        *stability = GetFuncStability(gpdb::FuncStability(func_oid));
 
+       RegProcedure prosupport = gpdb::FuncSupport(func_oid);
+       if (OidIsValid(prosupport))
+       {
+               /*
+                 CBDB_FIXME:
+                 Check if function is NOT in pg_catalog namespace
+                 Functions outside pg_catalog are likely extension functions 
that unsupported yet.
+               */
+               Oid func_namespace = gpdb::FuncNamespace(func_oid);
+               if (func_namespace != PG_CATALOG_NAMESPACE)
+               {
+                       GPOS_RAISE(gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLUnsupportedFeature,
+                                          GPOS_WSZ_LIT("extension functions 
with prosupport unsupported"));
+               }
+       }
+
        if (gpdb::FuncExecLocation(func_oid) != PROEXECLOCATION_ANY)
        {
                GPOS_RAISE(gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLUnsupportedFeature,
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index fe387e471c0..261cd28b5f0 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -338,6 +338,12 @@ bool IsFuncNDVPreserving(Oid funcid);
 // stability property of given function
 char FuncStability(Oid funcid);
 
+// support function of given function
+RegProcedure FuncSupport(Oid funcid);
+
+// namespace of given function
+Oid FuncNamespace(Oid funcid);
+
 // exec location property of given function
 char FuncExecLocation(Oid funcid);
 
diff --git a/src/test/regress/expected/misc_functions_optimizer.out 
b/src/test/regress/expected/misc_functions_optimizer.out
index f5187fad45b..e5723fea5f3 100644
--- a/src/test/regress/expected/misc_functions_optimizer.out
+++ b/src/test/regress/expected/misc_functions_optimizer.out
@@ -256,8 +256,8 @@ CREATE FUNCTION my_int_eq(int, int) RETURNS bool
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
 WHERE my_int_eq(a.unique2, 42);
-                      QUERY PLAN                      
-------------------------------------------------------
+                     QUERY PLAN                     
+----------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (b.unique1 = a.unique1)
@@ -265,7 +265,7 @@ WHERE my_int_eq(a.unique2, 42);
          ->  Hash
                ->  Seq Scan on tenk1 a
                      Filter: my_int_eq(unique2, 42)
- Optimizer: Pivotal Optimizer (GPORCA)
+ Optimizer: GPORCA
 (8 rows)
 
 -- With support function that knows it's int4eq, we get a different plan
@@ -273,17 +273,16 @@ ALTER FUNCTION my_int_eq(int, int) SUPPORT 
test_support_func;
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
 WHERE my_int_eq(a.unique2, 42);
-                      QUERY PLAN                      
-------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Hash Join
-         Hash Cond: (b.unique1 = a.unique1)
-         ->  Seq Scan on tenk1 b
-         ->  Hash
-               ->  Seq Scan on tenk1 a
-                     Filter: my_int_eq(unique2, 42)
- Optimizer: Pivotal Optimizer (GPORCA)
-(8 rows)
+   ->  Nested Loop
+         ->  Seq Scan on tenk1 a
+               Filter: my_int_eq(unique2, 42)
+         ->  Index Scan using tenk1_unique1 on tenk1 b
+               Index Cond: (unique1 = a.unique1)
+ Optimizer: Postgres query optimizer
+(7 rows)
 
 -- Also test non-default rowcount estimate
 CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
@@ -292,27 +291,26 @@ CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF 
integer
   SUPPORT test_support_func;
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                  QUERY PLAN                  
+----------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
-         Hash Cond: (a.unique1 = my_gen_series.my_gen_series)
-         ->  Seq Scan on tenk1 a
+         Hash Cond: (g.g = a.unique1)
+         ->  Function Scan on my_gen_series g
          ->  Hash
-               ->  Function Scan on my_gen_series
- Optimizer: Pivotal Optimizer (GPORCA)
+               ->  Seq Scan on tenk1 a
+ Optimizer: Postgres query optimizer
 (7 rows)
 
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Hash Join
-         Hash Cond: (a.unique1 = my_gen_series.my_gen_series)
-         ->  Seq Scan on tenk1 a
-         ->  Hash
-               ->  Function Scan on my_gen_series
- Optimizer: Pivotal Optimizer (GPORCA)
-(7 rows)
+   ->  Nested Loop
+         ->  Function Scan on my_gen_series g
+         ->  Index Scan using tenk1_unique1 on tenk1 a
+               Index Cond: (unique1 = g.g)
+ Optimizer: Postgres query optimizer
+(6 rows)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org
For additional commands, e-mail: commits-h...@cloudberry.apache.org

Reply via email to