This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch fix_orca_gist in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit e44249061855c4a669754dbd6ea1d1c61018e064 Author: Jianghua Yang <[email protected]> AuthorDate: Tue Mar 31 06:41:46 2026 +0800 ORCA: fall back to planner for ORDER BY with ordering operators (amcanorderbyop) ORCA does not support amcanorderbyop (KNN ordered index scans). Queries like `ORDER BY col <-> 'value' LIMIT N` on GiST indexes cannot produce ordered index scans in ORCA, resulting in inefficient Seq Scan + Sort plans instead of KNN-GiST Index Scan. Previously, these queries would accidentally get correct plans because column-level COLLATE "C" caused a blanket fallback to the PostgreSQL planner, which does support amcanorderbyop. After commit 3f4ce85ae6c added COLLATE "C" support to ORCA, these queries lost their fallback path. Add has_orderby_ordering_op() in walkers.c to detect when a query's ORDER BY clause contains an operator registered as AMOP_ORDER in pg_amop (e.g., <-> for trigram/point distance). When detected, ORCA falls back to the PostgreSQL planner which can generate KNN ordered index scans. The check is precise: only ORDER BY with ordering operators triggers fallback. Other queries on the same tables (WHERE with LIKE/%%, equality filters, etc.) continue to use ORCA normally. --- contrib/pg_trgm/expected/pg_trgm_optimizer.out | 166 ++++++++++----------- src/backend/gpopt/gpdbwrappers.cpp | 11 ++ .../gpopt/translate/CTranslatorQueryToDXL.cpp | 9 ++ src/backend/optimizer/util/walkers.c | 54 +++++++ src/include/gpopt/gpdbwrappers.h | 3 + src/include/optimizer/walkers.h | 1 + 6 files changed, 157 insertions(+), 87 deletions(-) diff --git a/contrib/pg_trgm/expected/pg_trgm_optimizer.out b/contrib/pg_trgm/expected/pg_trgm_optimizer.out index 4597b8ca047..a2b012fa8fb 100644 --- a/contrib/pg_trgm/expected/pg_trgm_optimizer.out +++ b/contrib/pg_trgm/expected/pg_trgm_optimizer.out @@ -4665,17 +4665,16 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; -- check handling of indexquals that generate no searchable conditions explain (costs off) select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate + QUERY PLAN +----------------------------------------------------------------------------------- + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on test_trgm - Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) - -> Bitmap Index Scan on trgm_idx - Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) - Optimizer: Postgres query optimizer -(8 rows) + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + Optimizer: GPORCA +(7 rows) select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; count @@ -4685,17 +4684,16 @@ select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; explain (costs off) select count(*) from test_trgm where t like '%99%' and t like '%qw%'; - QUERY PLAN -------------------------------------------------------------------------------------- - Finalize Aggregate + QUERY PLAN +------------------------------------------------------------------------------- + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on test_trgm - Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) - -> Bitmap Index Scan on trgm_idx - Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) - Optimizer: Postgres query optimizer -(8 rows) + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + Optimizer: GPORCA +(7 rows) select count(*) from test_trgm where t like '%99%' and t like '%qw%'; count @@ -4709,17 +4707,16 @@ create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); insert into t_test_trgm values ('qwerty99'), ('qwerty01'); explain (costs off) select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate + QUERY PLAN +----------------------------------------------------------------------------------- + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on t_test_trgm - Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) - -> Bitmap Index Scan on t_trgm_idx - Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) - Optimizer: Postgres query optimizer -(8 rows) + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + Optimizer: GPORCA +(7 rows) select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; count @@ -4729,17 +4726,16 @@ select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; explain (costs off) select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; - QUERY PLAN -------------------------------------------------------------------------------------- - Finalize Aggregate + QUERY PLAN +------------------------------------------------------------------------------- + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on t_test_trgm - Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) - -> Bitmap Index Scan on t_trgm_idx - Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) - Optimizer: Postgres query optimizer -(8 rows) + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + Optimizer: GPORCA +(7 rows) select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; count @@ -5094,27 +5090,25 @@ create index test2_idx_gist on test2 using gist (t gist_trgm_ops); set enable_seqscan=off; explain (costs off) select * from test2 where t like '%BCD%'; - QUERY PLAN -------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Bitmap Heap Scan on test2 - Recheck Cond: (t ~~ '%BCD%'::text) - -> Bitmap Index Scan on test2_idx_gist - Index Cond: (t ~~ '%BCD%'::text) - Optimizer: Postgres query optimizer -(6 rows) + -> Index Scan using test2_idx_gist on test2 + Index Cond: (t ~~ '%BCD%'::text) + Filter: (t ~~ '%BCD%'::text) + Optimizer: GPORCA +(5 rows) explain (costs off) select * from test2 where t ilike '%BCD%'; - QUERY PLAN -------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Bitmap Heap Scan on test2 - Recheck Cond: (t ~~* '%BCD%'::text) - -> Bitmap Index Scan on test2_idx_gist - Index Cond: (t ~~* '%BCD%'::text) - Optimizer: Postgres query optimizer -(6 rows) + -> Index Scan using test2_idx_gist on test2 + Index Cond: (t ~~* '%BCD%'::text) + Filter: (t ~~* '%BCD%'::text) + Optimizer: GPORCA +(5 rows) select * from test2 where t like '%BCD%'; t @@ -5159,27 +5153,25 @@ select * from test2 where t like ' z foo%'; explain (costs off) select * from test2 where t ~ '[abc]{3}'; - QUERY PLAN --------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Bitmap Heap Scan on test2 - Recheck Cond: (t ~ '[abc]{3}'::text) - -> Bitmap Index Scan on test2_idx_gist - Index Cond: (t ~ '[abc]{3}'::text) - Optimizer: Postgres query optimizer -(6 rows) + -> Index Scan using test2_idx_gist on test2 + Index Cond: (t ~ '[abc]{3}'::text) + Filter: (t ~ '[abc]{3}'::text) + Optimizer: GPORCA +(5 rows) explain (costs off) select * from test2 where t ~* 'DEF'; - QUERY PLAN -------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Bitmap Heap Scan on test2 - Recheck Cond: (t ~* 'DEF'::text) - -> Bitmap Index Scan on test2_idx_gist - Index Cond: (t ~* 'DEF'::text) - Optimizer: Postgres query optimizer -(6 rows) + -> Index Scan using test2_idx_gist on test2 + Index Cond: (t ~* 'DEF'::text) + Filter: (t ~* 'DEF'::text) + Optimizer: GPORCA +(5 rows) select * from test2 where t ~ '[abc]{3}'; t @@ -5311,14 +5303,14 @@ select * from test2 where t ~ '/\d+/-\d'; -- test = operator explain (costs off) select * from test2 where t = 'abcdef'; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) - -> Bitmap Heap Scan on test2 - Recheck Cond: (t = 'abcdef'::text) - -> Bitmap Index Scan on test2_idx_gist - Index Cond: (t = 'abcdef'::text) -(2 rows) + -> Index Scan using test2_idx_gist on test2 + Index Cond: (t = 'abcdef'::text) + Filter: (t = 'abcdef'::text) + Optimizer: GPORCA +(5 rows) select * from test2 where t = 'abcdef'; t @@ -5328,14 +5320,14 @@ select * from test2 where t = 'abcdef'; explain (costs off) select * from test2 where t = '%line%'; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) - -> Bitmap Heap Scan on test2 - Recheck Cond: (t = '%line%'::text) - -> Bitmap Index Scan on test2_idx_gist - Index Cond: (t = '%line%'::text) -(6 rows) + -> Index Scan using test2_idx_gist on test2 + Index Cond: (t = '%line%'::text) + Filter: (t = '%line%'::text) + Optimizer: GPORCA +(5 rows) select * from test2 where t = '%line%'; t @@ -5423,7 +5415,7 @@ SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit() -> Index Scan using restaurants_city_idx on restaurants Index Cond: (city % 'Warsaw'::text) Filter: (city % 'Warsaw'::text) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) SELECT set_limit(0.3); diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index aca95b2cc0a..4d8d8c59100 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -2012,6 +2012,17 @@ gpdb::CheckCollation(Node *node) return -1; } +bool +gpdb::HasOrderByOrderingOp(Query *query) +{ + GP_WRAP_START; + { + return has_orderby_ordering_op(query); + } + GP_WRAP_END; + return false; +} + Node * gpdb::CoerceToCommonType(ParseState *pstate, Node *node, Oid target_type, const char *context) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index 20cc6557c28..99d87917b38 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -324,6 +324,15 @@ CTranslatorQueryToDXL::CheckUnsupportedNodeTypes(Query *query) GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT("Non-default collation")); } + + // ORCA does not support amcanorderbyop (KNN ordered index scans). + // Fall back to the PostgreSQL planner for queries whose ORDER BY + // contains an ordering operator (e.g., <-> for distance). + if (gpdb::HasOrderByOrderingOp(query)) + { + GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, + GPOS_WSZ_LIT("ORDER BY with ordering operator (amcanorderbyop)")); + } } //--------------------------------------------------------------------------- diff --git a/src/backend/optimizer/util/walkers.c b/src/backend/optimizer/util/walkers.c index 3b3d0311d06..752dc4aacc3 100644 --- a/src/backend/optimizer/util/walkers.c +++ b/src/backend/optimizer/util/walkers.c @@ -8,11 +8,17 @@ #include "postgres.h" +#include "access/htup_details.h" +#include "catalog/pg_amop.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" #include "optimizer/walkers.h" +#include "utils/catcache.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" /** * Plan node walker related methods. @@ -1011,3 +1017,51 @@ check_collation_walker(Node *node, check_collation_context *context) } } +/* + * has_orderby_ordering_op + * + * Check if any ORDER BY expression in the query uses an ordering operator + * (amoppurpose = AMOP_ORDER in pg_amop). These operators (e.g., <-> for + * trigram distance or point distance) require amcanorderbyop index support + * (KNN-GiST) which ORCA does not implement. Return true if such an + * operator is found, signaling that the query should fall back to the + * PostgreSQL planner. + */ +bool +has_orderby_ordering_op(Query *query) +{ + ListCell *lc; + + if (query->sortClause == NIL) + return false; + + foreach(lc, query->sortClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); + Node *expr = (Node *) tle->expr; + + if (!IsA(expr, OpExpr)) + continue; + + Oid opno = ((OpExpr *) expr)->opno; + CatCList *catlist = SearchSysCacheList1(AMOPOPID, + ObjectIdGetDatum(opno)); + + for (int i = 0; i < catlist->n_members; i++) + { + HeapTuple tp = &catlist->members[i]->tuple; + Form_pg_amop amop = (Form_pg_amop) GETSTRUCT(tp); + + if (amop->amoppurpose == AMOP_ORDER) + { + ReleaseSysCacheList(catlist); + return true; + } + } + ReleaseSysCacheList(catlist); + } + + return false; +} + diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h index 261cd28b5f0..9ef53169599 100644 --- a/src/include/gpopt/gpdbwrappers.h +++ b/src/include/gpopt/gpdbwrappers.h @@ -673,6 +673,9 @@ int FindNodes(Node *node, List *nodeTags); // look for nodes with non-default collation; returns 1 if any exist, -1 otherwise int CheckCollation(Node *node); +// check if ORDER BY uses an ordering operator (amcanorderbyop) unsupported by ORCA +bool HasOrderByOrderingOp(Query *query); + Node *CoerceToCommonType(ParseState *pstate, Node *node, Oid target_type, const char *context); diff --git a/src/include/optimizer/walkers.h b/src/include/optimizer/walkers.h index 6d0d38717f5..d29bc5551e8 100644 --- a/src/include/optimizer/walkers.h +++ b/src/include/optimizer/walkers.h @@ -43,5 +43,6 @@ extern List *extract_nodes_plan(Plan *pl, int nodeTag, bool descendIntoSubquerie extern List *extract_nodes_expression(Node *node, int nodeTag, bool descendIntoSubqueries); extern int find_nodes(Node *node, List *nodeTags); extern int check_collation(Node *node); +extern bool has_orderby_ordering_op(Query *query); #endif /* WALKERS_H_ */ --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
