(cloudberry) 01/03: ORCA: fall back to planner for ORDER BY with ordering operators (amcanorderbyop)

yjhjstz Tue, 31 Mar 2026 17:44:11 -0700

This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch fix_orca_gist
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit e44249061855c4a669754dbd6ea1d1c61018e064
Author: Jianghua Yang <[email protected]>
AuthorDate: Tue Mar 31 06:41:46 2026 +0800

    ORCA: fall back to planner for ORDER BY with ordering operators 
(amcanorderbyop)
    
    ORCA does not support amcanorderbyop (KNN ordered index scans).
    Queries like `ORDER BY col <-> 'value' LIMIT N` on GiST indexes
    cannot produce ordered index scans in ORCA, resulting in inefficient
    Seq Scan + Sort plans instead of KNN-GiST Index Scan.
    
    Previously, these queries would accidentally get correct plans because
    column-level COLLATE "C" caused a blanket fallback to the PostgreSQL
    planner, which does support amcanorderbyop. After commit 3f4ce85ae6c
    added COLLATE "C" support to ORCA, these queries lost their fallback
    path.
    
    Add has_orderby_ordering_op() in walkers.c to detect when a query's
    ORDER BY clause contains an operator registered as AMOP_ORDER in
    pg_amop (e.g., <-> for trigram/point distance). When detected, ORCA
    falls back to the PostgreSQL planner which can generate KNN ordered
    index scans.
    
    The check is precise: only ORDER BY with ordering operators triggers
    fallback. Other queries on the same tables (WHERE with LIKE/%%,
    equality filters, etc.) continue to use ORCA normally.
---
 contrib/pg_trgm/expected/pg_trgm_optimizer.out     | 166 ++++++++++-----------
 src/backend/gpopt/gpdbwrappers.cpp                 |  11 ++
 .../gpopt/translate/CTranslatorQueryToDXL.cpp      |   9 ++
 src/backend/optimizer/util/walkers.c               |  54 +++++++
 src/include/gpopt/gpdbwrappers.h                   |   3 +
 src/include/optimizer/walkers.h                    |   1 +
 6 files changed, 157 insertions(+), 87 deletions(-)

diff --git a/contrib/pg_trgm/expected/pg_trgm_optimizer.out 
b/contrib/pg_trgm/expected/pg_trgm_optimizer.out
index 4597b8ca047..a2b012fa8fb 100644
--- a/contrib/pg_trgm/expected/pg_trgm_optimizer.out
+++ b/contrib/pg_trgm/expected/pg_trgm_optimizer.out
@@ -4665,17 +4665,16 @@ select count(*) from test_trgm where t ~ 
'[qwerty]{2}-?[qwerty]{2}';
 -- check handling of indexquals that generate no searchable conditions
 explain (costs off)
 select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
-                                       QUERY PLAN                              
          
------------------------------------------------------------------------------------------
- Finalize Aggregate
+                                    QUERY PLAN                                 
    
+-----------------------------------------------------------------------------------
+ Aggregate
    ->  Gather Motion 3:1  (slice1; segments: 3)
-         ->  Partial Aggregate
-               ->  Bitmap Heap Scan on test_trgm
-                     Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qwerty%'::text))
-                     ->  Bitmap Index Scan on trgm_idx
-                           Index Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qwerty%'::text))
- Optimizer: Postgres query optimizer
-(8 rows)
+         ->  Bitmap Heap Scan on test_trgm
+               Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text))
+               ->  Bitmap Index Scan on trgm_idx
+                     Index Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qwerty%'::text))
+ Optimizer: GPORCA
+(7 rows)
 
 select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
  count 
@@ -4685,17 +4684,16 @@ select count(*) from test_trgm where t like '%99%' and 
t like '%qwerty%';
 
 explain (costs off)
 select count(*) from test_trgm where t like '%99%' and t like '%qw%';
-                                     QUERY PLAN                                
      
--------------------------------------------------------------------------------------
- Finalize Aggregate
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Aggregate
    ->  Gather Motion 3:1  (slice1; segments: 3)
-         ->  Partial Aggregate
-               ->  Bitmap Heap Scan on test_trgm
-                     Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qw%'::text))
-                     ->  Bitmap Index Scan on trgm_idx
-                           Index Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qw%'::text))
- Optimizer: Postgres query optimizer
-(8 rows)
+         ->  Bitmap Heap Scan on test_trgm
+               Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
+               ->  Bitmap Index Scan on trgm_idx
+                     Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
+ Optimizer: GPORCA
+(7 rows)
 
 select count(*) from test_trgm where t like '%99%' and t like '%qw%';
  count 
@@ -4709,17 +4707,16 @@ create index t_trgm_idx on t_test_trgm using gin (t 
gin_trgm_ops);
 insert into t_test_trgm values ('qwerty99'), ('qwerty01');
 explain (costs off)
 select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
-                                       QUERY PLAN                              
          
------------------------------------------------------------------------------------------
- Finalize Aggregate
+                                    QUERY PLAN                                 
    
+-----------------------------------------------------------------------------------
+ Aggregate
    ->  Gather Motion 3:1  (slice1; segments: 3)
-         ->  Partial Aggregate
-               ->  Bitmap Heap Scan on t_test_trgm
-                     Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qwerty%'::text))
-                     ->  Bitmap Index Scan on t_trgm_idx
-                           Index Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qwerty%'::text))
- Optimizer: Postgres query optimizer
-(8 rows)
+         ->  Bitmap Heap Scan on t_test_trgm
+               Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text))
+               ->  Bitmap Index Scan on t_trgm_idx
+                     Index Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qwerty%'::text))
+ Optimizer: GPORCA
+(7 rows)
 
 select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
  count 
@@ -4729,17 +4726,16 @@ select count(*) from t_test_trgm where t like '%99%' 
and t like '%qwerty%';
 
 explain (costs off)
 select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
-                                     QUERY PLAN                                
      
--------------------------------------------------------------------------------------
- Finalize Aggregate
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Aggregate
    ->  Gather Motion 3:1  (slice1; segments: 3)
-         ->  Partial Aggregate
-               ->  Bitmap Heap Scan on t_test_trgm
-                     Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qw%'::text))
-                     ->  Bitmap Index Scan on t_trgm_idx
-                           Index Cond: ((t ~~ '%99%'::text) AND (t ~~ 
'%qw%'::text))
- Optimizer: Postgres query optimizer
-(8 rows)
+         ->  Bitmap Heap Scan on t_test_trgm
+               Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
+               ->  Bitmap Index Scan on t_trgm_idx
+                     Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
+ Optimizer: GPORCA
+(7 rows)
 
 select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
  count 
@@ -5094,27 +5090,25 @@ create index test2_idx_gist on test2 using gist (t 
gist_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
   select * from test2 where t like '%BCD%';
-                   QUERY PLAN                    
--------------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Bitmap Heap Scan on test2
-         Recheck Cond: (t ~~ '%BCD%'::text)
-         ->  Bitmap Index Scan on test2_idx_gist
-               Index Cond: (t ~~ '%BCD%'::text)
- Optimizer: Postgres query optimizer
-(6 rows)
+   ->  Index Scan using test2_idx_gist on test2
+         Index Cond: (t ~~ '%BCD%'::text)
+         Filter: (t ~~ '%BCD%'::text)
+ Optimizer: GPORCA
+(5 rows)
 
 explain (costs off)
   select * from test2 where t ilike '%BCD%';
-                   QUERY PLAN                    
--------------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Bitmap Heap Scan on test2
-         Recheck Cond: (t ~~* '%BCD%'::text)
-         ->  Bitmap Index Scan on test2_idx_gist
-               Index Cond: (t ~~* '%BCD%'::text)
- Optimizer: Postgres query optimizer
-(6 rows)
+   ->  Index Scan using test2_idx_gist on test2
+         Index Cond: (t ~~* '%BCD%'::text)
+         Filter: (t ~~* '%BCD%'::text)
+ Optimizer: GPORCA
+(5 rows)
 
 select * from test2 where t like '%BCD%';
  t 
@@ -5159,27 +5153,25 @@ select * from test2 where t like '  z foo%';
 
 explain (costs off)
   select * from test2 where t ~ '[abc]{3}';
-                    QUERY PLAN                    
---------------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Bitmap Heap Scan on test2
-         Recheck Cond: (t ~ '[abc]{3}'::text)
-         ->  Bitmap Index Scan on test2_idx_gist
-               Index Cond: (t ~ '[abc]{3}'::text)
- Optimizer: Postgres query optimizer
-(6 rows)
+   ->  Index Scan using test2_idx_gist on test2
+         Index Cond: (t ~ '[abc]{3}'::text)
+         Filter: (t ~ '[abc]{3}'::text)
+ Optimizer: GPORCA
+(5 rows)
 
 explain (costs off)
   select * from test2 where t ~* 'DEF';
-                   QUERY PLAN                    
--------------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Bitmap Heap Scan on test2
-         Recheck Cond: (t ~* 'DEF'::text)
-         ->  Bitmap Index Scan on test2_idx_gist
-               Index Cond: (t ~* 'DEF'::text)
- Optimizer: Postgres query optimizer
-(6 rows)
+   ->  Index Scan using test2_idx_gist on test2
+         Index Cond: (t ~* 'DEF'::text)
+         Filter: (t ~* 'DEF'::text)
+ Optimizer: GPORCA
+(5 rows)
 
 select * from test2 where t ~ '[abc]{3}';
    t    
@@ -5311,14 +5303,14 @@ select * from test2 where t ~ '/\d+/-\d';
 -- test = operator
 explain (costs off)
   select * from test2 where t = 'abcdef';
-                QUERY PLAN                
-------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Gather Motion 1:1  (slice1; segments: 1)
-   ->  Bitmap Heap Scan on test2
-         Recheck Cond: (t = 'abcdef'::text)
-         ->  Bitmap Index Scan on test2_idx_gist
-               Index Cond: (t = 'abcdef'::text)
-(2 rows)
+   ->  Index Scan using test2_idx_gist on test2
+         Index Cond: (t = 'abcdef'::text)
+         Filter: (t = 'abcdef'::text)
+ Optimizer: GPORCA
+(5 rows)
 
 select * from test2 where t = 'abcdef';
    t    
@@ -5328,14 +5320,14 @@ select * from test2 where t = 'abcdef';
 
 explain (costs off)
   select * from test2 where t = '%line%';
-                QUERY PLAN                
-------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Gather Motion 1:1  (slice1; segments: 1)
-   ->  Bitmap Heap Scan on test2
-         Recheck Cond: (t = '%line%'::text)
-         ->  Bitmap Index Scan on test2_idx_gist
-               Index Cond: (t = '%line%'::text)
-(6 rows)
+   ->  Index Scan using test2_idx_gist on test2
+         Index Cond: (t = '%line%'::text)
+         Filter: (t = '%line%'::text)
+ Optimizer: GPORCA
+(5 rows)
 
 select * from test2 where t = '%line%';
  t 
@@ -5423,7 +5415,7 @@ SELECT DISTINCT city, similarity(city, 'Warsaw'), 
show_limit()
                ->  Index Scan using restaurants_city_idx on restaurants
                      Index Cond: (city % 'Warsaw'::text)
                      Filter: (city % 'Warsaw'::text)
- Optimizer: Pivotal Optimizer (GPORCA)
+ Optimizer: GPORCA
 (9 rows)
 
 SELECT set_limit(0.3);
diff --git a/src/backend/gpopt/gpdbwrappers.cpp 
b/src/backend/gpopt/gpdbwrappers.cpp
index aca95b2cc0a..4d8d8c59100 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -2012,6 +2012,17 @@ gpdb::CheckCollation(Node *node)
        return -1;
 }
 
+bool
+gpdb::HasOrderByOrderingOp(Query *query)
+{
+       GP_WRAP_START;
+       {
+               return has_orderby_ordering_op(query);
+       }
+       GP_WRAP_END;
+       return false;
+}
+
 Node *
 gpdb::CoerceToCommonType(ParseState *pstate, Node *node, Oid target_type,
                                                 const char *context)
diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
index 20cc6557c28..99d87917b38 100644
--- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
@@ -324,6 +324,15 @@ CTranslatorQueryToDXL::CheckUnsupportedNodeTypes(Query 
*query)
                GPOS_RAISE(gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLUnsupportedFeature,
                                   GPOS_WSZ_LIT("Non-default collation"));
        }
+
+       // ORCA does not support amcanorderbyop (KNN ordered index scans).
+       // Fall back to the PostgreSQL planner for queries whose ORDER BY
+       // contains an ordering operator (e.g., <-> for distance).
+       if (gpdb::HasOrderByOrderingOp(query))
+       {
+               GPOS_RAISE(gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLUnsupportedFeature,
+                                  GPOS_WSZ_LIT("ORDER BY with ordering 
operator (amcanorderbyop)"));
+       }
 }
 
 //---------------------------------------------------------------------------
diff --git a/src/backend/optimizer/util/walkers.c 
b/src/backend/optimizer/util/walkers.c
index 3b3d0311d06..752dc4aacc3 100644
--- a/src/backend/optimizer/util/walkers.c
+++ b/src/backend/optimizer/util/walkers.c
@@ -8,11 +8,17 @@
 
 #include "postgres.h"
 
+#include "access/htup_details.h"
+#include "catalog/pg_amop.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
 #include "optimizer/walkers.h"
+#include "utils/catcache.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 /**
  * Plan node walker related methods.
@@ -1011,3 +1017,51 @@ check_collation_walker(Node *node, 
check_collation_context *context)
        }
 }
 
+/*
+ * has_orderby_ordering_op
+ *
+ * Check if any ORDER BY expression in the query uses an ordering operator
+ * (amoppurpose = AMOP_ORDER in pg_amop).  These operators (e.g., <-> for
+ * trigram distance or point distance) require amcanorderbyop index support
+ * (KNN-GiST) which ORCA does not implement.  Return true if such an
+ * operator is found, signaling that the query should fall back to the
+ * PostgreSQL planner.
+ */
+bool
+has_orderby_ordering_op(Query *query)
+{
+       ListCell   *lc;
+
+       if (query->sortClause == NIL)
+               return false;
+
+       foreach(lc, query->sortClause)
+       {
+               SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+               TargetEntry *tle = get_sortgroupclause_tle(sgc, 
query->targetList);
+               Node       *expr = (Node *) tle->expr;
+
+               if (!IsA(expr, OpExpr))
+                       continue;
+
+               Oid                     opno = ((OpExpr *) expr)->opno;
+               CatCList   *catlist = SearchSysCacheList1(AMOPOPID,
+                                                                               
                  ObjectIdGetDatum(opno));
+
+               for (int i = 0; i < catlist->n_members; i++)
+               {
+                       HeapTuple       tp = &catlist->members[i]->tuple;
+                       Form_pg_amop amop = (Form_pg_amop) GETSTRUCT(tp);
+
+                       if (amop->amoppurpose == AMOP_ORDER)
+                       {
+                               ReleaseSysCacheList(catlist);
+                               return true;
+                       }
+               }
+               ReleaseSysCacheList(catlist);
+       }
+
+       return false;
+}
+
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index 261cd28b5f0..9ef53169599 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -673,6 +673,9 @@ int FindNodes(Node *node, List *nodeTags);
 // look for nodes with non-default collation; returns 1 if any exist, -1 
otherwise
 int CheckCollation(Node *node);
 
+// check if ORDER BY uses an ordering operator (amcanorderbyop) unsupported by 
ORCA
+bool HasOrderByOrderingOp(Query *query);
+
 Node *CoerceToCommonType(ParseState *pstate, Node *node, Oid target_type,
                                                 const char *context);
 
diff --git a/src/include/optimizer/walkers.h b/src/include/optimizer/walkers.h
index 6d0d38717f5..d29bc5551e8 100644
--- a/src/include/optimizer/walkers.h
+++ b/src/include/optimizer/walkers.h
@@ -43,5 +43,6 @@ extern List *extract_nodes_plan(Plan *pl, int nodeTag, bool 
descendIntoSubquerie
 extern List *extract_nodes_expression(Node *node, int nodeTag, bool 
descendIntoSubqueries);
 extern int find_nodes(Node *node, List *nodeTags);
 extern int check_collation(Node *node);
+extern bool has_orderby_ordering_op(Query *query);
 
 #endif /* WALKERS_H_ */


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 01/03: ORCA: fall back to planner for ORDER BY with ordering operators (amcanorderbyop)

Reply via email to