I missed a case when column collation and partition key collation are the same and indeterministic. that should be fine for partition-wise join. so v2 attached.
have_partkey_equi_join, match_expr_to_partition_keys didn't do any collation related check. propose v2 change disallow partitionwise join for case when column collation is indeterministic *and* is differ from partition key's collation. the attached partition_wise_join_collation.sql is the test script. you may use it to compare with the master behavior.
partition_wise_join_collation.sql
Description: application/sql
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index d7266e4cdb..428751b05f 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -74,7 +74,7 @@ static bool have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, List *restrictlist); static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, - bool strict_op); + bool strict_op, bool *coll_incompatiable); static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype); @@ -2104,6 +2104,7 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, Expr *expr1; Expr *expr2; bool strict_op; + bool coll_incompatiable = false; int ipk1; int ipk2; @@ -2167,10 +2168,11 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, * Only clauses referencing the partition keys are useful for * partitionwise join. */ - ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op); + ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op, &coll_incompatiable); if (ipk1 < 0) continue; - ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op); + + ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op, &coll_incompatiable); if (ipk2 < 0) continue; @@ -2181,6 +2183,15 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, if (ipk1 != ipk2) continue; + /* + * we generally assume parttion key and expr's collation are fine for + * partition-wise join. forgidden case is column collation is + * indeterministic and partition key's collation not same as column's. + * see match_expr_to_partition_keys also. + */ + if (coll_incompatiable) + return false; + /* Ignore clause if we already proved these keys equal. */ if (pk_known_equal[ipk1]) continue; @@ -2296,9 +2307,15 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, * strict_op must be true if the expression will be compared with the * partition key using a strict operator. This allows us to consider * nullable as well as nonnullable partition keys. + * if exprCollation(expr) is inderministic also not equal to partcollation, + * that means same value with different apperances can live in different + * partition, coll_incompatiable return set to true. In that case, we cannot do + * partition-wise join. we are OK with expression's collation same as partition + * key's even though they are indeterministic. + * */ static int -match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) +match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op, bool *coll_incompatiable) { int cnt; @@ -2315,11 +2332,22 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) { ListCell *lc; + Oid partcoll = rel->part_scheme->partcollation[cnt]; + /* We can always match to the non-nullable partition keys. */ foreach(lc, rel->partexprs[cnt]) { if (equal(lfirst(lc), expr)) + { + Oid colloid = exprCollation((Node *) expr); + + if ((partcoll != colloid) && + OidIsValid(colloid) && + !get_collation_isdeterministic(colloid)) + *coll_incompatiable = true; + return cnt; + } } if (!strict_op) @@ -2335,7 +2363,15 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) foreach(lc, rel->nullable_partexprs[cnt]) { if (equal(lfirst(lc), expr)) + { + Oid colloid = exprCollation((Node *) expr); + + if ((partcoll != colloid) && + OidIsValid(colloid) && + !get_collation_isdeterministic(colloid)) + *coll_incompatiable = true; return cnt; + } } }