On Thu, Oct 24, 2024 at 3:01 PM Tender Wang <tndrw...@gmail.com> wrote: > > I feel that it's hard only to use one struct(for example, X), which just > calls equal(X, expr) > can check both the expression match and the collation match. >
in RelOptInfo->partexprs, maybe we should mention that the partition key collation is stored in RelOptInfo->part_scheme, not here. > Maybe we should add another collation match checks in > match_clause_to_partition_key(), like > partition pruning logic does. > in match_clause_to_partition_key we already have else if (IsA(clause, OpExpr) && list_length(((OpExpr *) clause)->args) == 2) { /* * Partition key match also requires collation match. There may be * multiple partkeys with the same expression but different * collations, so failure is NOMATCH. */ if (!PartCollMatchesExprColl(partcoll, opclause->inputcollid)) return PARTCLAUSE_NOMATCH; } else if (IsA(clause, ScalarArrayOpExpr)) { if (!equal(leftop, partkey) || !PartCollMatchesExprColl(partcoll, saop->inputcollid)) return PARTCLAUSE_NOMATCH; } So I think match_clause_to_partition_key handling collation is fine. I think the problem is match_expr_to_partition_keys don't have a collation related check. CREATE TABLE pagg_join1 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); CREATE TABLE pagg_join2 (c text collate "C") PARTITION BY LIST(c collate case_insensitive); CREATE TABLE pagg_join3 (c text collate "POSIX") PARTITION BY LIST(c collate "C"); CREATE TABLE pagg_join4 (c text collate case_insensitive) PARTITION BY LIST(c collate ignore_accents); Our partition-wise join is based on Equi-join [1]. In some cases,column and partitionkey collation are different, but if these two collations are deterministic, then texteq should work as expected. So I think, pagg_join3 can do partition-wise join, I think pagg_join2 can do partition-wise join also. we can let all (pagg_join1, pagg_join2, pagg_join3, pagg_join4) cannot do partition-wise join (join with themself), or we can let pagg_join2, pagg_join3 do partition-wise join (join with themself). POC attached, will let pagg_join2, pagg_join3 do partition-wise join. [1] https://en.wikipedia.org/wiki/Join_%28SQL%29#Equi-join
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index d7266e4cdb..6214d01794 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -74,7 +74,7 @@ static bool have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, List *restrictlist); static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, - bool strict_op); + bool strict_op, bool *coll_inderministic); static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype); @@ -2104,6 +2104,7 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, Expr *expr1; Expr *expr2; bool strict_op; + bool coll_inderministic = false; int ipk1; int ipk2; @@ -2167,10 +2168,11 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, * Only clauses referencing the partition keys are useful for * partitionwise join. */ - ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op); + ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op, &coll_inderministic); if (ipk1 < 0) continue; - ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op); + + ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op, &coll_inderministic); if (ipk2 < 0) continue; @@ -2181,6 +2183,10 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, if (ipk1 != ipk2) continue; + /* if either collation is inderministic, cannot do partitionwise join */ + if (coll_inderministic) + return false; + /* Ignore clause if we already proved these keys equal. */ if (pk_known_equal[ipk1]) continue; @@ -2296,9 +2302,12 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, * strict_op must be true if the expression will be compared with the * partition key using a strict operator. This allows us to consider * nullable as well as nonnullable partition keys. + * coll_inderministic return true if exprCollation(expr) is inderministic. if + * expr is inderministic, that means same value with different apperance can + * live in different partition. In that case, we cannot do partition-wise join. */ static int -match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) +match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op, bool *coll_inderministic) { int cnt; @@ -2319,7 +2328,15 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) foreach(lc, rel->partexprs[cnt]) { if (equal(lfirst(lc), expr)) + { + Oid colloid = exprCollation((Node *) expr); + + if (OidIsValid(colloid) && + !get_collation_isdeterministic(colloid)) + *coll_inderministic = true; + return cnt; + } } if (!strict_op) @@ -2335,7 +2352,14 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) foreach(lc, rel->nullable_partexprs[cnt]) { if (equal(lfirst(lc), expr)) + { + Oid colloid = exprCollation((Node *) expr); + + if (OidIsValid(colloid) && + !get_collation_isdeterministic(colloid)) + *coll_inderministic = true; return cnt; + } } }