This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch fix_orca_gist
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 9b501c0bcb3bba4eaa2f03ca5352a71693ee6dac
Author: Jianghua Yang <[email protected]>
AuthorDate: Tue Mar 31 07:00:24 2026 +0800

    ORCA: refine amcanorderbyop fallback to skip lossy distance functions
    
    Only fall back to the PostgreSQL planner when ALL ordering-operator
    expressions in ORDER BY have at least one direct Var (column reference)
    argument.  Expressions like "circle(p,1) <-> point(0,0)" wrap the
    column in a function call, which can cause "lossy distance functions
    are not supported in index-only scans" errors in the planner.  Leave
    such queries for ORCA to handle via Seq Scan + Sort.
---
 src/backend/optimizer/util/walkers.c               | 93 +++++++++++++++++-----
 .../regress/expected/create_index_optimizer.out    | 45 +++++------
 src/test/regress/expected/gist_optimizer.out       | 63 +++++++--------
 3 files changed, 121 insertions(+), 80 deletions(-)

diff --git a/src/backend/optimizer/util/walkers.c 
b/src/backend/optimizer/util/walkers.c
index 752dc4aacc3..0f3dc80097d 100644
--- a/src/backend/optimizer/util/walkers.c
+++ b/src/backend/optimizer/util/walkers.c
@@ -1017,20 +1017,71 @@ check_collation_walker(Node *node, 
check_collation_context *context)
        }
 }
 
+/*
+ * is_ordering_op
+ *
+ * Return true if the operator is registered as an ordering operator
+ * (amoppurpose = AMOP_ORDER) in any opfamily in pg_amop.
+ */
+static bool
+is_ordering_op(Oid opno)
+{
+       CatCList   *catlist = SearchSysCacheList1(AMOPOPID,
+                                                                               
          ObjectIdGetDatum(opno));
+
+       for (int i = 0; i < catlist->n_members; i++)
+       {
+               HeapTuple       tp = &catlist->members[i]->tuple;
+               Form_pg_amop amop = (Form_pg_amop) GETSTRUCT(tp);
+
+               if (amop->amoppurpose == AMOP_ORDER)
+               {
+                       ReleaseSysCacheList(catlist);
+                       return true;
+               }
+       }
+       ReleaseSysCacheList(catlist);
+       return false;
+}
+
+/*
+ * has_plain_var_arg
+ *
+ * Return true if the OpExpr has at least one direct Var argument
+ * (not wrapped in a function or other expression).
+ */
+static bool
+has_plain_var_arg(OpExpr *op)
+{
+       ListCell   *arg_lc;
+
+       foreach(arg_lc, op->args)
+       {
+               if (IsA(lfirst(arg_lc), Var))
+                       return true;
+       }
+       return false;
+}
+
 /*
  * has_orderby_ordering_op
  *
- * Check if any ORDER BY expression in the query uses an ordering operator
- * (amoppurpose = AMOP_ORDER in pg_amop).  These operators (e.g., <-> for
- * trigram distance or point distance) require amcanorderbyop index support
- * (KNN-GiST) which ORCA does not implement.  Return true if such an
- * operator is found, signaling that the query should fall back to the
- * PostgreSQL planner.
+ * Check if the query's ORDER BY uses ordering operators (amoppurpose =
+ * AMOP_ORDER in pg_amop) that the PostgreSQL planner can safely optimize
+ * with KNN-GiST index scans but ORCA cannot.
+ *
+ * Return true only when ALL ordering-operator expressions in ORDER BY
+ * have at least one direct Var (column reference) argument.  Expressions
+ * like "circle(p,1) <-> point(0,0)" wrap the column in a function,
+ * which can cause "lossy distance functions are not supported in
+ * index-only scans" errors in the planner.  In such cases we leave the
+ * query for ORCA to handle via Seq Scan + Sort.
  */
 bool
 has_orderby_ordering_op(Query *query)
 {
        ListCell   *lc;
+       bool            found_ordering_op = false;
 
        if (query->sortClause == NIL)
                return false;
@@ -1044,24 +1095,24 @@ has_orderby_ordering_op(Query *query)
                if (!IsA(expr, OpExpr))
                        continue;
 
-               Oid                     opno = ((OpExpr *) expr)->opno;
-               CatCList   *catlist = SearchSysCacheList1(AMOPOPID,
-                                                                               
                  ObjectIdGetDatum(opno));
+               OpExpr     *opexpr = (OpExpr *) expr;
 
-               for (int i = 0; i < catlist->n_members; i++)
-               {
-                       HeapTuple       tp = &catlist->members[i]->tuple;
-                       Form_pg_amop amop = (Form_pg_amop) GETSTRUCT(tp);
+               if (!is_ordering_op(opexpr->opno))
+                       continue;
 
-                       if (amop->amoppurpose == AMOP_ORDER)
-                       {
-                               ReleaseSysCacheList(catlist);
-                               return true;
-                       }
-               }
-               ReleaseSysCacheList(catlist);
+               /*
+                * Found an ordering operator.  Check that at least one 
argument is
+                * a plain Var.  If any ordering operator has only computed 
arguments
+                * (e.g., function calls wrapping columns), bail out 
immediately —
+                * falling back to the planner could produce lossy distance 
errors
+                * in index-only scans.
+                */
+               found_ordering_op = true;
+
+               if (!has_plain_var_arg(opexpr))
+                       return false;
        }
 
-       return false;
+       return found_ordering_op;
 }
 
diff --git a/src/test/regress/expected/create_index_optimizer.out 
b/src/test/regress/expected/create_index_optimizer.out
index 65f5f92b8bd..aca6fbb1332 100644
--- a/src/test/regress/expected/create_index_optimizer.out
+++ b/src/test/regress/expected/create_index_optimizer.out
@@ -652,18 +652,16 @@ SELECT * FROM point_tblv WHERE f1 IS NOT NULL ORDER BY f1 
<-> '0,1';
 --SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1';
 EXPLAIN (COSTS OFF)
 SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> 
'0,1';
-                                                                               
 QUERY PLAN                                                                     
           
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Result
-   ->  Sort
-         Sort Key: ((f1 <-> '(0,1)'::point))
-         ->  Result
-               ->  Gather Motion 3:1  (slice1; segments: 3)
-                     ->  Index Scan using gpointind on point_tbl
-                           Index Cond: (f1 <@ '(10,10),(-10,-10)'::box)
-                           Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 
<-> '(0,0)'::point) <> 'Infinity'::double precision) AND (f1 <@ 
'(10,10),(-10,-10)'::box))
- Optimizer: Pivotal Optimizer (GPORCA)
-(9 rows)
+                                                    QUERY PLAN                 
                                    
+-------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: ((point_tbl.f1 <-> '(0,1)'::point))
+   ->  Index Only Scan using gpointind on point_tbl
+         Index Cond: (f1 <@ '(10,10),(-10,-10)'::box)
+         Order By: (f1 <-> '(0,1)'::point)
+         Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 <-> 
'(0,0)'::point) <> 'Infinity'::double precision))
+ Optimizer: Postgres query optimizer
+(7 rows)
 
 SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> 
'0,1';
    f1    
@@ -767,18 +765,19 @@ SET enable_indexscan = OFF;
 SET enable_bitmapscan = ON;
 EXPLAIN (COSTS OFF)
 SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> 
'0,1';
-                                                                               
 QUERY PLAN                                                                     
           
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Result
+                                                       QUERY PLAN              
                                          
+-------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: ((point_tbl.f1 <-> '(0,1)'::point))
    ->  Sort
-         Sort Key: ((f1 <-> '(0,1)'::point))
-         ->  Result
-               ->  Gather Motion 3:1  (slice1; segments: 3)
-                     ->  Index Scan using gpointind on point_tbl
-                           Index Cond: (f1 <@ '(10,10),(-10,-10)'::box)
-                           Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 
<-> '(0,0)'::point) <> 'Infinity'::double precision) AND (f1 <@ 
'(10,10),(-10,-10)'::box))
- Optimizer: Pivotal Optimizer (GPORCA)
-(9 rows)
+         Sort Key: ((point_tbl.f1 <-> '(0,1)'::point))
+         ->  Bitmap Heap Scan on point_tbl
+               Recheck Cond: (f1 <@ '(10,10),(-10,-10)'::box)
+               Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 <-> 
'(0,0)'::point) <> 'Infinity'::double precision))
+               ->  Bitmap Index Scan on gpointind
+                     Index Cond: (f1 <@ '(10,10),(-10,-10)'::box)
+ Optimizer: Postgres query optimizer
+(10 rows)
 
 SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> 
'0,1';
    f1    
diff --git a/src/test/regress/expected/gist_optimizer.out 
b/src/test/regress/expected/gist_optimizer.out
index e9020c5db70..abb8b5524cf 100644
--- a/src/test/regress/expected/gist_optimizer.out
+++ b/src/test/regress/expected/gist_optimizer.out
@@ -98,18 +98,15 @@ select p from gist_tbl where p <@ box(point(0,0), 
point(0.5, 0.5));
 explain (costs off)
 select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5))
 order by p <-> point(0.201, 0.201);
-                             QUERY PLAN                              
----------------------------------------------------------------------
- Result
-   ->  Gather Motion 3:1  (slice1; segments: 3)
-         Merge Key: ((p <-> '(0.201,0.201)'::point))
-         ->  Sort
-               Sort Key: ((p <-> '(0.201,0.201)'::point))
-               ->  Index Scan using gist_tbl_point_index on gist_tbl
-                     Index Cond: (p <@ '(0.5,0.5),(0,0)'::box)
-                     Filter: (p <@ '(0.5,0.5),(0,0)'::box)
- Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(9 rows)
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: ((p <-> '(0.201,0.201)'::point))
+   ->  Index Only Scan using gist_tbl_point_index on gist_tbl
+         Index Cond: (p <@ '(0.5,0.5),(0,0)'::box)
+         Order By: (p <-> '(0.201,0.201)'::point)
+ Optimizer: Postgres query optimizer
+(6 rows)
 
 select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5))
 order by p <-> point(0.201, 0.201);
@@ -132,18 +129,15 @@ order by p <-> point(0.201, 0.201);
 explain (costs off)
 select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5))
 order by point(0.101, 0.101) <-> p;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- Result
-   ->  Gather Motion 3:1  (slice1; segments: 3)
-         Merge Key: (('(0.101,0.101)'::point <-> p))
-         ->  Sort
-               Sort Key: (('(0.101,0.101)'::point <-> p))
-               ->  Index Scan using gist_tbl_point_index on gist_tbl
-                     Index Cond: (p <@ '(0.5,0.5),(0,0)'::box)
-                     Filter: (p <@ '(0.5,0.5),(0,0)'::box)
- Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(9 rows)
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: (('(0.101,0.101)'::point <-> p))
+   ->  Index Only Scan using gist_tbl_point_index on gist_tbl
+         Index Cond: (p <@ '(0.5,0.5),(0,0)'::box)
+         Order By: (p <-> '(0.101,0.101)'::point)
+ Optimizer: Postgres query optimizer
+(6 rows)
 
 select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5))
 order by point(0.101, 0.101) <-> p;
@@ -248,18 +242,15 @@ select b from gist_tbl where b <@ box(point(5,5), 
point(6,6));
 explain (costs off)
 select b from gist_tbl where b <@ box(point(5,5), point(6,6))
 order by b <-> point(5.2, 5.91);
-                            QUERY PLAN                             
--------------------------------------------------------------------
- Result
-   ->  Gather Motion 3:1  (slice1; segments: 3)
-         Merge Key: ((b <-> '(5.2,5.91)'::point))
-         ->  Sort
-               Sort Key: ((b <-> '(5.2,5.91)'::point))
-               ->  Index Scan using gist_tbl_box_index on gist_tbl
-                     Index Cond: (b <@ '(6,6),(5,5)'::box)
-                     Filter: (b <@ '(6,6),(5,5)'::box)
- Optimizer: Pivotal Optimizer (GPORCA)
-(9 rows)
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: ((b <-> '(5.2,5.91)'::point))
+   ->  Index Only Scan using gist_tbl_box_index on gist_tbl
+         Index Cond: (b <@ '(6,6),(5,5)'::box)
+         Order By: (b <-> '(5.2,5.91)'::point)
+ Optimizer: Postgres query optimizer
+(6 rows)
 
 select b from gist_tbl where b <@ box(point(5,5), point(6,6))
 order by b <-> point(5.2, 5.91);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to