Hi.

Thanks for reviewing again.

On 2018/03/05 23:04, Emre Hasegeli wrote:
>>> Shouldn't we check if we consumed all elements (state->next_elem >=
>>> state->num_elems) inside the while loop?
>>
>> You're right.  Fixed.
> 
> I don't think the fix is correct.  arrayconst_next_fn() can still
> execute state->next_elem++ without checking if we consumed all
> elements.  I couldn't manage to crash it though.

I couldn't get it to crash either, but it's wrong anyway.  What happens is
the calling code would perform constraint exclusion with a garbage clause
(that is one containing a garbage value picked from elem_values when
next_elem has exceeded num_elems) and that may alter the result of
partition pruning.  Verified that with the following example.

create table p (a int) partition by list (a);
create table p1 partition of p for values in (1);
create table p2 partition of p for values in (2);

-- before fixing
explain (costs off) select * from p where a in (2, null);
                    QUERY PLAN
---------------------------------------------------
 Append
   ->  Seq Scan on p1
         Filter: (a = ANY ('{2,NULL}'::integer[]))
   ->  Seq Scan on p2
         Filter: (a = ANY ('{2,NULL}'::integer[]))
(5 rows)

So, it looks as if the proposed patch has no effect at all.

-- after fixing
explain (costs off) select * from p where a in (2, null);
                        QUERY PLAN
----------------------------------------------------------
 Append
   ->  Seq Scan on p2
         Filter: (a = ANY ('{2,NULL}'::integer[]))

Was a bit surprised that the calling code didn't crash while handling a
garbage clause.

> I am also not sure if it is proper to skip some items inside the
> "next_fn", but I don't know the code to suggest a better alternative.

Patch teaches it to ignore nulls when it's known that the operator being
used is strict.  It is harmless and has the benefit that constraint
exclusion gives an answer that is consistent with what actually running
such a qual against a table's rows would do.

Consider this example.

create table foo (a int check (a = 1));

insert into foo values (1), (null);

select * from foo where a in (2, null);
 a
---
(0 rows)

set constraint_exclusion to on;

-- you'd think constraint exclusion would avoid the scan
explain select * from foo where a in (2, null);
                     QUERY PLAN
-----------------------------------------------------
 Seq Scan on foo  (cost=0.00..41.88 rows=13 width=4)
   Filter: (a = ANY ('{2,NULL}'::integer[]))
(2 rows)

But it didn't, because the null in that list caused constraint exclusion
to mistakenly decide that the clause as a whole doesn't refute the
constraint (check (a = 1)).

-- with the patch
explain select * from foo where a in (2, null);
                QUERY PLAN
------------------------------------------
 Result  (cost=0.00..0.00 rows=0 width=4)
   One-Time Filter: false
(2 rows)

After ignoring null, only (a = 2) is left to consider and that does refute
(a = 1), so pruning works as desired.

BTW, if you compose the qual using an OR directly, it works as desired
even with HEAD:

explain select * from foo where a = 2 or a = null;
                QUERY PLAN
------------------------------------------
 Result  (cost=0.00..0.00 rows=0 width=4)
   One-Time Filter: false
(2 rows)

That's because a = null is const-simplified in an earlier planning phase
to false, so (a = 2 or false) reduces to just a = 2, which does refute
foo's constraint (a = 1).  I think the same thing should happen when
constraint exclusion internally turns an IN (..) clause into a set of OR
clauses.  Skipping nulls in these next_fn functions is, in a way, same as
const-simplifying them away.

>> +   /* skip nulls if ok to do so */
>> +   if (state->opisstrict && state->next)
> 
> Do we need && state->next in here?  It is checked for (state->next ==
> NULL) 2 lines above.

Yeah, it wasn't needed.

>> +   {
>> +       Node   *node = (Node *) lfirst(state->next);
>> +
>> +       while (node && IsA(node, Const) && ((Const *) node)->constisnull)
> 
> Should we spell the first check like node != NULL as the rest of the code 
> does.

OK.

>> +       {
>> +           state->next = lnext(state->next);
>> +           if (state->next)
>> +               node = (Node *) lfirst(state->next);
>> +       }
>> +   }
> 
> I think this function is also not correct as it can call
> lnext(state->next) without checking.

Yeah.  Rearranged the code to fix that.

Attached updated patch.  Thanks again.

Regards,
Amit
From 1c16473da04dfa2d41fdcf5de6983b65aabd5ff4 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Thu, 1 Feb 2018 11:32:23 +0900
Subject: [PATCH v4] Disregard nulls in SAOP rightarg array/list during CE

---
 src/backend/optimizer/util/predtest.c | 45 +++++++++++++++++++++++++++++++++++
 src/test/regress/expected/inherit.out | 34 ++++++++++++++++++++++----
 src/test/regress/sql/inherit.sql      |  1 +
 3 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/util/predtest.c 
b/src/backend/optimizer/util/predtest.c
index 8106010329..3e8171622e 100644
--- a/src/backend/optimizer/util/predtest.c
+++ b/src/backend/optimizer/util/predtest.c
@@ -905,6 +905,7 @@ boolexpr_startup_fn(Node *clause, PredIterInfo info)
 typedef struct
 {
        OpExpr          opexpr;
+       bool            opisstrict;             /* Is the operator strict? */
        Const           constexpr;
        int                     next_elem;
        int                     num_elems;
@@ -957,6 +958,12 @@ arrayconst_startup_fn(Node *clause, PredIterInfo info)
        state->constexpr.constbyval = elmbyval;
        lsecond(state->opexpr.args) = &state->constexpr;
 
+       /*
+        * Record if the operator is strict to perform appropriate action on
+        * encountering null values in the element array.
+        */
+       state->opisstrict = op_strict(saop->opno);
+
        /* Initialize iteration state */
        state->next_elem = 0;
 }
@@ -966,8 +973,21 @@ arrayconst_next_fn(PredIterInfo info)
 {
        ArrayConstIterState *state = (ArrayConstIterState *) info->state;
 
+       /*
+        * Skip nulls if the operator is strict.  Skipping nulls like this has
+        * same effect as const-simplifying a clause containing null argument to
+        * false.
+        */
+       if (state->opisstrict)
+       {
+               while (state->next_elem < state->num_elems &&
+                          state->elem_nulls[state->next_elem])
+                       state->next_elem++;
+       }
+
        if (state->next_elem >= state->num_elems)
                return NULL;
+
        state->constexpr.constvalue = state->elem_values[state->next_elem];
        state->constexpr.constisnull = state->elem_nulls[state->next_elem];
        state->next_elem++;
@@ -992,6 +1012,7 @@ arrayconst_cleanup_fn(PredIterInfo info)
 typedef struct
 {
        OpExpr          opexpr;
+       bool            opisstrict;             /* Is the operator strict? */
        ListCell   *next;
 } ArrayExprIterState;
 
@@ -1016,6 +1037,12 @@ arrayexpr_startup_fn(Node *clause, PredIterInfo info)
        state->opexpr.inputcollid = saop->inputcollid;
        state->opexpr.args = list_copy(saop->args);
 
+       /*
+        * Record if the operator is strict to perform appropriate action on
+        * encountering null values in the element array.
+        */
+       state->opisstrict = op_strict(saop->opno);
+
        /* Initialize iteration variable to first member of ArrayExpr */
        arrayexpr = (ArrayExpr *) lsecond(saop->args);
        state->next = list_head(arrayexpr->elements);
@@ -1026,8 +1053,26 @@ arrayexpr_next_fn(PredIterInfo info)
 {
        ArrayExprIterState *state = (ArrayExprIterState *) info->state;
 
+       /*
+        * Skip nulls if the operator is strict.  Skipping nulls like this has
+        * same effect as const-simplifying a clause containing null argument to
+        * false.
+        */
+       if (state->opisstrict)
+       {
+               Node   *node = (state->next != NULL) ? lfirst(state->next) : 
NULL;
+
+               while (node != NULL && IsA(node, Const) &&
+                          ((Const *) node)->constisnull)
+               {
+                       state->next = (state->next != NULL) ? 
lnext(state->next) : NULL;
+                       node = (state->next != NULL) ? lfirst(state->next) : 
NULL;
+               }
+       }
+
        if (state->next == NULL)
                return NULL;
+
        lsecond(state->opexpr.args) = lfirst(state->next);
        state->next = lnext(state->next);
        return (Node *) &(state->opexpr);
diff --git a/src/test/regress/expected/inherit.out 
b/src/test/regress/expected/inherit.out
index a79f891da7..065ea86d72 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1715,11 +1715,7 @@ explain (costs off) select * from list_parted where a = 
'ab' or a in (null, 'cd'
  Append
    ->  Seq Scan on part_ab_cd
          Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY 
('{NULL,cd}'::text[])))
-   ->  Seq Scan on part_ef_gh
-         Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY 
('{NULL,cd}'::text[])))
-   ->  Seq Scan on part_null_xy
-         Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY 
('{NULL,cd}'::text[])))
-(7 rows)
+(3 rows)
 
 explain (costs off) select * from list_parted where a = 'ab';
                 QUERY PLAN                
@@ -1797,6 +1793,34 @@ explain (costs off) select * from range_list_parted 
where a between 3 and 23 and
          Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
 (7 rows)
 
+explain (costs off) select * from generate_series(11, 12) v(a) where exists 
(select count(*) from range_list_parted where a = 1 or a in (null, v.a));
+                                    QUERY PLAN                                 
   
+----------------------------------------------------------------------------------
+ Function Scan on generate_series v
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Aggregate
+           ->  Append
+                 ->  Seq Scan on part_1_10_ab
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_1_10_cd
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_10_20_ab
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_10_20_cd
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_21_30_ab
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_21_30_cd
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_40_inf_ab
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_40_inf_cd
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+                 ->  Seq Scan on part_40_inf_null
+                       Filter: ((a = 1) OR (a = ANY (ARRAY[NULL::integer, 
v.a])))
+(23 rows)
+
 /* Should select no rows because range partition key cannot be null */
 explain (costs off) select * from range_list_parted where a is null;
         QUERY PLAN        
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index 2e42ae115d..c31cd6dab4 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -651,6 +651,7 @@ explain (costs off) select * from range_list_parted;
 explain (costs off) select * from range_list_parted where a = 5;
 explain (costs off) select * from range_list_parted where b = 'ab';
 explain (costs off) select * from range_list_parted where a between 3 and 23 
and b in ('ab');
+explain (costs off) select * from generate_series(11, 12) v(a) where exists 
(select count(*) from range_list_parted where a = 1 or a in (null, v.a));
 
 /* Should select no rows because range partition key cannot be null */
 explain (costs off) select * from range_list_parted where a is null;
-- 
2.11.0

Reply via email to