commit 14edda8344f24ebd4ecf9672cdc9b61bfec18ace
Author: jcoleman <jtc331@gmail.com>
Date:   Wed Nov 14 16:49:52 2018 +0000

    Prove IS NOT NULL inference for large arrays
    
    For the purposes of predicate proof testing we limit ScalarArrayOpExpr
    decomposition to arrays with <= MAX_SAOP_ARRAY_SIZE items (currently
    100 items). However all scalar array ops IS NOT NULL can be inferred
    trivially without decomposing into AND/OR chains.
    
    We teach predtest to check for strict operators in ScalarArrayOpExpr
    nodes instead of relying on checking strictness of operators in AND/OR
    chains.
    
    This allows the planner to use partial indexes of the form "WHERE foo IS
    NOT NULL" when the query's WHERE clause involves a scalar array op like
    "foo IN (1,2,...101)".

diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c
index 3d5ef6922c..671b8d8c45 100644
--- a/src/backend/optimizer/util/predtest.c
+++ b/src/backend/optimizer/util/predtest.c
@@ -815,7 +815,7 @@ predicate_refuted_by_recurse(Node *clause, Node *predicate,
  * This function also implements enforcement of MAX_SAOP_ARRAY_SIZE: if a
  * ScalarArrayOpExpr's array has too many elements, we just classify it as an
  * atom.  (This will result in its being passed as-is to the simple_clause
- * functions, which will fail to prove anything about it.)	Note that we
+ * functions, many of which will fail to prove anything about it.)  Note that we
  * cannot just stop after considering MAX_SAOP_ARRAY_SIZE elements; in general
  * that would result in wrong proofs, rather than failing to prove anything.
  */
@@ -1132,6 +1132,21 @@ predicate_implied_by_simple_clause(Expr *predicate, Node *clause,
 			/* strictness of clause for foo implies foo IS NOT NULL */
 			if (clause_is_strict_for(clause, (Node *) ntest->arg))
 				return true;
+
+			/*
+			 * Since we limit decomposing ScalarArrayOpExpr nodes into AND/OR quals
+			 * to arrays with at most MAX_SAOP_ARRAY_SIZE items, we need to handle
+			 * scalar array ops separately (this case will occur when the array has
+			 * more than MAX_SAOP_ARRAY_SIZE items).
+			 */
+			if (clause && IsA(clause, ScalarArrayOpExpr))
+			{
+				ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+				Node *subexpr = (Node *) ntest->arg;
+				if (op_strict(saop->opno) &&
+					clause_is_strict_for((Node *) linitial(saop->args), subexpr))
+					return true;
+			}
 		}
 		return false;			/* we can't succeed below... */
 	}
@@ -1196,6 +1211,21 @@ predicate_refuted_by_simple_clause(Expr *predicate, Node *clause,
 		if (clause_is_strict_for(clause, (Node *) isnullarg))
 			return true;
 
+		/*
+		 * Since we limit decomposing ScalarArrayOpExpr nodes into AND/OR quals
+		 * to arrays with at most MAX_SAOP_ARRAY_SIZE items, we need to handle
+		 * scalar array ops separately (this case will occur when the array has
+		 * more than MAX_SAOP_ARRAY_SIZE items).
+		 */
+		if (clause && !weak && IsA(clause, ScalarArrayOpExpr))
+		{
+			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+			Node *subexpr = (Node *) ((NullTest *) predicate)->arg;
+			if (op_strict(saop->opno) &&
+				clause_is_strict_for((Node *) linitial(saop->args), subexpr))
+				return true;
+		}
+
 		/* foo IS NOT NULL refutes foo IS NULL */
 		if (clause && IsA(clause, NullTest) &&
 			((NullTest *) clause)->nulltesttype == IS_NOT_NULL &&
diff --git a/src/test/modules/test_predtest/expected/test_predtest.out b/src/test/modules/test_predtest/expected/test_predtest.out
index 5574e03204..49323818ab 100644
--- a/src/test/modules/test_predtest/expected/test_predtest.out
+++ b/src/test/modules/test_predtest/expected/test_predtest.out
@@ -837,3 +837,226 @@ w_i_holds         | f
 s_r_holds         | f
 w_r_holds         | f
 
+-- For the next few tests, We want to test an array longer than
+-- MAX_SAOP_ARRAY_SIZE so that we're not relying on predtest turning
+-- the array op into set of OR quals. We also need to include at least
+-- one null value to demonstrate strict operators are checked properly.
+--
+-- ScalarArrayOp implies IS NOT NULL
+select * from test_predtest($$
+select x is not null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | t
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+select y is not null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+select (x + x) is not null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select x is not null, x = any((select vals from a)::int[])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | t
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select y is not null, x = any((select vals from a)::int[])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select (x + x) is not null, x = any((select vals from a)::int[])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- ScalarArrayOp refutes IS NULL
+select * from test_predtest($$
+select x is null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | t
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | t
+w_r_holds         | f
+
+select * from test_predtest($$
+select y is null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+select (x + x) is null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select x is null, x = any((select vals from a)::int[])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | t
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | t
+w_r_holds         | f
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select y is null, x = any((select vals from a)::int[])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select (x + x) is null, x = any((select vals from a)::int[])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
diff --git a/src/test/modules/test_predtest/sql/test_predtest.sql b/src/test/modules/test_predtest/sql/test_predtest.sql
index 2734735843..830f41ee3c 100644
--- a/src/test/modules/test_predtest/sql/test_predtest.sql
+++ b/src/test/modules/test_predtest/sql/test_predtest.sql
@@ -325,3 +325,119 @@ select * from test_predtest($$
 select x <= y, x = any(array[1,3,y])
 from integers
 $$);
+
+-- For the next few tests, We want to test an array longer than
+-- MAX_SAOP_ARRAY_SIZE so that we're not relying on predtest turning
+-- the array op into set of OR quals. We also need to include at least
+-- one null value to demonstrate strict operators are checked properly.
+--
+-- ScalarArrayOp implies IS NOT NULL
+select * from test_predtest($$
+select x is not null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+
+select * from test_predtest($$
+select y is not null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+
+select * from test_predtest($$
+select (x + x) is not null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select x is not null, x = any((select vals from a)::int[])
+from integers
+$$);
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select y is not null, x = any((select vals from a)::int[])
+from integers
+$$);
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select (x + x) is not null, x = any((select vals from a)::int[])
+from integers
+$$);
+
+-- ScalarArrayOp refutes IS NULL
+select * from test_predtest($$
+select x is null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+
+select * from test_predtest($$
+select y is null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+
+select * from test_predtest($$
+select (x + x) is null, x = any(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,null
+])
+from integers
+$$);
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select x is null, x = any((select vals from a)::int[])
+from integers
+$$);
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select y is null, x = any((select vals from a)::int[])
+from integers
+$$);
+
+select * from test_predtest($$
+with a(vals) as (
+  select array_agg(i) || null from generate_series(1, 101) t(i)
+)
+select (x + x) is null, x = any((select vals from a)::int[])
+from integers
+$$);
+
