On Thu, Oct 24, 2024 at 3:01 PM Tender Wang <tndrw...@gmail.com> wrote:
>
> I feel that it's hard only to use one struct(for example, X), which just 
> calls equal(X, expr)
> can check both the expression match and the collation match.
>

in RelOptInfo->partexprs, maybe we should mention that the partition
key collation is stored
in RelOptInfo->part_scheme, not here.

> Maybe we should add another collation match checks in 
> match_clause_to_partition_key(), like
> partition pruning logic does.
>
in match_clause_to_partition_key
we already have

else if (IsA(clause, OpExpr) &&
list_length(((OpExpr *) clause)->args) == 2)
{
        /*
         * Partition key match also requires collation match.  There may be
         * multiple partkeys with the same expression but different
         * collations, so failure is NOMATCH.
         */
        if (!PartCollMatchesExprColl(partcoll, opclause->inputcollid))
            return PARTCLAUSE_NOMATCH;
}
else if (IsA(clause, ScalarArrayOpExpr))
{
        if (!equal(leftop, partkey) ||
            !PartCollMatchesExprColl(partcoll, saop->inputcollid))
            return PARTCLAUSE_NOMATCH;
}
So I think match_clause_to_partition_key handling collation is fine.

I think the problem is match_expr_to_partition_keys
don't have a collation related check.

CREATE TABLE pagg_join1 (c text collate case_insensitive) PARTITION BY
LIST(c collate "C");
CREATE TABLE pagg_join2 (c text collate "C") PARTITION BY LIST(c
collate case_insensitive);
CREATE TABLE pagg_join3 (c text collate "POSIX") PARTITION BY LIST(c
collate "C");
CREATE TABLE pagg_join4 (c text collate case_insensitive) PARTITION BY
LIST(c collate ignore_accents);

Our partition-wise join is based on Equi-join [1].
In some cases,column and partitionkey collation are different,
but if these two collations are deterministic, then texteq should work
as expected.
So I think, pagg_join3 can do partition-wise join,
I think pagg_join2 can do partition-wise join also.

we can let all (pagg_join1, pagg_join2, pagg_join3, pagg_join4) cannot
do partition-wise join (join with themself),
or we can let pagg_join2, pagg_join3 do partition-wise join (join with
themself).


POC attached, will let pagg_join2, pagg_join3 do partition-wise join.


[1] https://en.wikipedia.org/wiki/Join_%28SQL%29#Equi-join
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index d7266e4cdb..6214d01794 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -74,7 +74,7 @@ static bool have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
 								   RelOptInfo *rel1, RelOptInfo *rel2,
 								   JoinType jointype, List *restrictlist);
 static int	match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
-										 bool strict_op);
+										 bool strict_op, bool *coll_inderministic);
 static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
 											RelOptInfo *outer_rel, RelOptInfo *inner_rel,
 											JoinType jointype);
@@ -2104,6 +2104,7 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
 		Expr	   *expr1;
 		Expr	   *expr2;
 		bool		strict_op;
+		bool		coll_inderministic = false;
 		int			ipk1;
 		int			ipk2;
 
@@ -2167,10 +2168,11 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
 		 * Only clauses referencing the partition keys are useful for
 		 * partitionwise join.
 		 */
-		ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
+		ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op, &coll_inderministic);
 		if (ipk1 < 0)
 			continue;
-		ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
+
+		ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op, &coll_inderministic);
 		if (ipk2 < 0)
 			continue;
 
@@ -2181,6 +2183,10 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
 		if (ipk1 != ipk2)
 			continue;
 
+		/* if either collation is inderministic, cannot do partitionwise join */
+		if (coll_inderministic)
+			return false;
+
 		/* Ignore clause if we already proved these keys equal. */
 		if (pk_known_equal[ipk1])
 			continue;
@@ -2296,9 +2302,12 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
  * strict_op must be true if the expression will be compared with the
  * partition key using a strict operator.  This allows us to consider
  * nullable as well as nonnullable partition keys.
+ * coll_inderministic return true if exprCollation(expr) is inderministic. if
+ * expr is inderministic, that means same value with different apperance can
+ * live in different partition. In that case, we cannot do partition-wise join.
  */
 static int
-match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
+match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op, bool *coll_inderministic)
 {
 	int			cnt;
 
@@ -2319,7 +2328,15 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
 		foreach(lc, rel->partexprs[cnt])
 		{
 			if (equal(lfirst(lc), expr))
+			{
+				Oid		colloid =  exprCollation((Node *) expr);
+
+				if (OidIsValid(colloid) &&
+					!get_collation_isdeterministic(colloid))
+					*coll_inderministic = true;
+
 				return cnt;
+			}
 		}
 
 		if (!strict_op)
@@ -2335,7 +2352,14 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
 		foreach(lc, rel->nullable_partexprs[cnt])
 		{
 			if (equal(lfirst(lc), expr))
+			{
+				Oid 	colloid =  exprCollation((Node *) expr);
+
+				if (OidIsValid(colloid) &&
+					!get_collation_isdeterministic(colloid))
+					*coll_inderministic = true;
 				return cnt;
+			}
 		}
 	}
 

Reply via email to