This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch fix_orca_gist in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 9b501c0bcb3bba4eaa2f03ca5352a71693ee6dac Author: Jianghua Yang <[email protected]> AuthorDate: Tue Mar 31 07:00:24 2026 +0800 ORCA: refine amcanorderbyop fallback to skip lossy distance functions Only fall back to the PostgreSQL planner when ALL ordering-operator expressions in ORDER BY have at least one direct Var (column reference) argument. Expressions like "circle(p,1) <-> point(0,0)" wrap the column in a function call, which can cause "lossy distance functions are not supported in index-only scans" errors in the planner. Leave such queries for ORCA to handle via Seq Scan + Sort. --- src/backend/optimizer/util/walkers.c | 93 +++++++++++++++++----- .../regress/expected/create_index_optimizer.out | 45 +++++------ src/test/regress/expected/gist_optimizer.out | 63 +++++++-------- 3 files changed, 121 insertions(+), 80 deletions(-) diff --git a/src/backend/optimizer/util/walkers.c b/src/backend/optimizer/util/walkers.c index 752dc4aacc3..0f3dc80097d 100644 --- a/src/backend/optimizer/util/walkers.c +++ b/src/backend/optimizer/util/walkers.c @@ -1017,20 +1017,71 @@ check_collation_walker(Node *node, check_collation_context *context) } } +/* + * is_ordering_op + * + * Return true if the operator is registered as an ordering operator + * (amoppurpose = AMOP_ORDER) in any opfamily in pg_amop. + */ +static bool +is_ordering_op(Oid opno) +{ + CatCList *catlist = SearchSysCacheList1(AMOPOPID, + ObjectIdGetDatum(opno)); + + for (int i = 0; i < catlist->n_members; i++) + { + HeapTuple tp = &catlist->members[i]->tuple; + Form_pg_amop amop = (Form_pg_amop) GETSTRUCT(tp); + + if (amop->amoppurpose == AMOP_ORDER) + { + ReleaseSysCacheList(catlist); + return true; + } + } + ReleaseSysCacheList(catlist); + return false; +} + +/* + * has_plain_var_arg + * + * Return true if the OpExpr has at least one direct Var argument + * (not wrapped in a function or other expression). + */ +static bool +has_plain_var_arg(OpExpr *op) +{ + ListCell *arg_lc; + + foreach(arg_lc, op->args) + { + if (IsA(lfirst(arg_lc), Var)) + return true; + } + return false; +} + /* * has_orderby_ordering_op * - * Check if any ORDER BY expression in the query uses an ordering operator - * (amoppurpose = AMOP_ORDER in pg_amop). These operators (e.g., <-> for - * trigram distance or point distance) require amcanorderbyop index support - * (KNN-GiST) which ORCA does not implement. Return true if such an - * operator is found, signaling that the query should fall back to the - * PostgreSQL planner. + * Check if the query's ORDER BY uses ordering operators (amoppurpose = + * AMOP_ORDER in pg_amop) that the PostgreSQL planner can safely optimize + * with KNN-GiST index scans but ORCA cannot. + * + * Return true only when ALL ordering-operator expressions in ORDER BY + * have at least one direct Var (column reference) argument. Expressions + * like "circle(p,1) <-> point(0,0)" wrap the column in a function, + * which can cause "lossy distance functions are not supported in + * index-only scans" errors in the planner. In such cases we leave the + * query for ORCA to handle via Seq Scan + Sort. */ bool has_orderby_ordering_op(Query *query) { ListCell *lc; + bool found_ordering_op = false; if (query->sortClause == NIL) return false; @@ -1044,24 +1095,24 @@ has_orderby_ordering_op(Query *query) if (!IsA(expr, OpExpr)) continue; - Oid opno = ((OpExpr *) expr)->opno; - CatCList *catlist = SearchSysCacheList1(AMOPOPID, - ObjectIdGetDatum(opno)); + OpExpr *opexpr = (OpExpr *) expr; - for (int i = 0; i < catlist->n_members; i++) - { - HeapTuple tp = &catlist->members[i]->tuple; - Form_pg_amop amop = (Form_pg_amop) GETSTRUCT(tp); + if (!is_ordering_op(opexpr->opno)) + continue; - if (amop->amoppurpose == AMOP_ORDER) - { - ReleaseSysCacheList(catlist); - return true; - } - } - ReleaseSysCacheList(catlist); + /* + * Found an ordering operator. Check that at least one argument is + * a plain Var. If any ordering operator has only computed arguments + * (e.g., function calls wrapping columns), bail out immediately — + * falling back to the planner could produce lossy distance errors + * in index-only scans. + */ + found_ordering_op = true; + + if (!has_plain_var_arg(opexpr)) + return false; } - return false; + return found_ordering_op; } diff --git a/src/test/regress/expected/create_index_optimizer.out b/src/test/regress/expected/create_index_optimizer.out index 65f5f92b8bd..aca6fbb1332 100644 --- a/src/test/regress/expected/create_index_optimizer.out +++ b/src/test/regress/expected/create_index_optimizer.out @@ -652,18 +652,16 @@ SELECT * FROM point_tblv WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; --SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; EXPLAIN (COSTS OFF) SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Result - -> Sort - Sort Key: ((f1 <-> '(0,1)'::point)) - -> Result - -> Gather Motion 3:1 (slice1; segments: 3) - -> Index Scan using gpointind on point_tbl - Index Cond: (f1 <@ '(10,10),(-10,-10)'::box) - Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 <-> '(0,0)'::point) <> 'Infinity'::double precision) AND (f1 <@ '(10,10),(-10,-10)'::box)) - Optimizer: Pivotal Optimizer (GPORCA) -(9 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ((point_tbl.f1 <-> '(0,1)'::point)) + -> Index Only Scan using gpointind on point_tbl + Index Cond: (f1 <@ '(10,10),(-10,-10)'::box) + Order By: (f1 <-> '(0,1)'::point) + Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 <-> '(0,0)'::point) <> 'Infinity'::double precision)) + Optimizer: Postgres query optimizer +(7 rows) SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; f1 @@ -767,18 +765,19 @@ SET enable_indexscan = OFF; SET enable_bitmapscan = ON; EXPLAIN (COSTS OFF) SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Result + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ((point_tbl.f1 <-> '(0,1)'::point)) -> Sort - Sort Key: ((f1 <-> '(0,1)'::point)) - -> Result - -> Gather Motion 3:1 (slice1; segments: 3) - -> Index Scan using gpointind on point_tbl - Index Cond: (f1 <@ '(10,10),(-10,-10)'::box) - Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 <-> '(0,0)'::point) <> 'Infinity'::double precision) AND (f1 <@ '(10,10),(-10,-10)'::box)) - Optimizer: Pivotal Optimizer (GPORCA) -(9 rows) + Sort Key: ((point_tbl.f1 <-> '(0,1)'::point)) + -> Bitmap Heap Scan on point_tbl + Recheck Cond: (f1 <@ '(10,10),(-10,-10)'::box) + Filter: ((f1 <> '(1e-300,-1e-300)'::point) AND ((f1 <-> '(0,0)'::point) <> 'Infinity'::double precision)) + -> Bitmap Index Scan on gpointind + Index Cond: (f1 <@ '(10,10),(-10,-10)'::box) + Optimizer: Postgres query optimizer +(10 rows) SELECT * FROM point_tblv WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; f1 diff --git a/src/test/regress/expected/gist_optimizer.out b/src/test/regress/expected/gist_optimizer.out index e9020c5db70..abb8b5524cf 100644 --- a/src/test/regress/expected/gist_optimizer.out +++ b/src/test/regress/expected/gist_optimizer.out @@ -98,18 +98,15 @@ select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5)); explain (costs off) select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5)) order by p <-> point(0.201, 0.201); - QUERY PLAN ---------------------------------------------------------------------- - Result - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: ((p <-> '(0.201,0.201)'::point)) - -> Sort - Sort Key: ((p <-> '(0.201,0.201)'::point)) - -> Index Scan using gist_tbl_point_index on gist_tbl - Index Cond: (p <@ '(0.5,0.5),(0,0)'::box) - Filter: (p <@ '(0.5,0.5),(0,0)'::box) - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 -(9 rows) + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ((p <-> '(0.201,0.201)'::point)) + -> Index Only Scan using gist_tbl_point_index on gist_tbl + Index Cond: (p <@ '(0.5,0.5),(0,0)'::box) + Order By: (p <-> '(0.201,0.201)'::point) + Optimizer: Postgres query optimizer +(6 rows) select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5)) order by p <-> point(0.201, 0.201); @@ -132,18 +129,15 @@ order by p <-> point(0.201, 0.201); explain (costs off) select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5)) order by point(0.101, 0.101) <-> p; - QUERY PLAN ---------------------------------------------------------------------- - Result - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (('(0.101,0.101)'::point <-> p)) - -> Sort - Sort Key: (('(0.101,0.101)'::point <-> p)) - -> Index Scan using gist_tbl_point_index on gist_tbl - Index Cond: (p <@ '(0.5,0.5),(0,0)'::box) - Filter: (p <@ '(0.5,0.5),(0,0)'::box) - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 -(9 rows) + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (('(0.101,0.101)'::point <-> p)) + -> Index Only Scan using gist_tbl_point_index on gist_tbl + Index Cond: (p <@ '(0.5,0.5),(0,0)'::box) + Order By: (p <-> '(0.101,0.101)'::point) + Optimizer: Postgres query optimizer +(6 rows) select p from gist_tbl where p <@ box(point(0,0), point(0.5, 0.5)) order by point(0.101, 0.101) <-> p; @@ -248,18 +242,15 @@ select b from gist_tbl where b <@ box(point(5,5), point(6,6)); explain (costs off) select b from gist_tbl where b <@ box(point(5,5), point(6,6)) order by b <-> point(5.2, 5.91); - QUERY PLAN -------------------------------------------------------------------- - Result - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: ((b <-> '(5.2,5.91)'::point)) - -> Sort - Sort Key: ((b <-> '(5.2,5.91)'::point)) - -> Index Scan using gist_tbl_box_index on gist_tbl - Index Cond: (b <@ '(6,6),(5,5)'::box) - Filter: (b <@ '(6,6),(5,5)'::box) - Optimizer: Pivotal Optimizer (GPORCA) -(9 rows) + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ((b <-> '(5.2,5.91)'::point)) + -> Index Only Scan using gist_tbl_box_index on gist_tbl + Index Cond: (b <@ '(6,6),(5,5)'::box) + Order By: (b <-> '(5.2,5.91)'::point) + Optimizer: Postgres query optimizer +(6 rows) select b from gist_tbl where b <@ box(point(5,5), point(6,6)) order by b <-> point(5.2, 5.91); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
