From 0e0ab487745bd5f4896f7c3c111eb9ee8f2197d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Wed, 10 Feb 2021 10:58:06 +0800
Subject: [PATCH v3 1/2] Introduce notnullattrs field in RelOptInfo to indicate
 which attr

are not null in current query. The not null is judged by checking
pg_attribute and query's restrictinfo. The info is only maintained at
Base RelOptInfo level.
---
 src/backend/optimizer/path/allpaths.c  | 31 ++++++++++++++++++++++++++
 src/backend/optimizer/plan/initsplan.c | 23 ++++++++++++++++++-
 src/backend/optimizer/util/plancat.c   | 10 +++++++++
 src/backend/optimizer/util/var.c       | 14 ++++++++++++
 src/include/nodes/pathnodes.h          |  7 ++++++
 src/include/optimizer/optimizer.h      |  2 ++
 6 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index cd3fdd259c..709d2f82ca 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -998,6 +998,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
 		RelOptInfo *childrel;
 		ListCell   *parentvars;
 		ListCell   *childvars;
+		int i = -1;
 
 		/* append_rel_list contains all append rels; ignore others */
 		if (appinfo->parent_relid != parentRTindex)
@@ -1054,6 +1055,36 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
 								   (Node *) rel->reltarget->exprs,
 								   1, &appinfo);
 
+		/* Copy notnullattrs. */
+		while ((i = bms_next_member(rel->notnullattrs, i)) > 0)
+		{
+			AttrNumber attno = i + FirstLowInvalidHeapAttributeNumber;
+			AttrNumber child_attno;
+			if (attno == 0)
+			{
+				/* Whole row is not null, so must be same for child */
+				childrel->notnullattrs = bms_add_member(childrel->notnullattrs,
+														attno - FirstLowInvalidHeapAttributeNumber);
+				break;
+			}
+			if (attno < 0 )
+				/* no need to translate system column */
+				child_attno = attno;
+			else
+			{
+				Node * node = list_nth(appinfo->translated_vars, attno - 1);
+				if (!IsA(node, Var))
+					/* This may happens at UNION case, like (SELECT a FROM t1 UNION SELECT a + 3
+					 * FROM t2) t and we know t.a is not null
+					 */
+					continue;
+				child_attno = castNode(Var, node)->varattno;
+			}
+
+			childrel->notnullattrs = bms_add_member(childrel->notnullattrs,
+													child_attno - FirstLowInvalidHeapAttributeNumber);
+		}
+
 		/*
 		 * We have to make child entries in the EquivalenceClass data
 		 * structures as well.  This is needed either if the parent
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 02f813cebd..a929c25d41 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -706,6 +706,25 @@ deconstruct_jointree(PlannerInfo *root)
 	return result;
 }
 
+
+/*
+ * extract_notnull_attrs
+ */
+static void
+extract_notnull_attrs(PlannerInfo *root, Node *qual)
+{
+	/* Set the not null info now */
+	ListCell	*lc;
+	List		*non_nullable_vars = find_nonnullable_vars(qual);
+	foreach(lc, non_nullable_vars)
+	{
+		Var *var = lfirst_node(Var, lc);
+		RelOptInfo *rel = root->simple_rel_array[var->varno];
+		rel->notnullattrs = bms_add_member(rel->notnullattrs,
+										   var->varattno - FirstLowInvalidHeapAttributeNumber);
+	}
+}
+
 /*
  * deconstruct_recurse
  *	  One recursion level of deconstruct_jointree processing.
@@ -828,13 +847,13 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		foreach(l, (List *) f->quals)
 		{
 			Node	   *qual = (Node *) lfirst(l);
-
 			distribute_qual_to_rels(root, qual,
 									below_outer_join, JOIN_INNER,
 									root->qual_security_level,
 									*qualscope, NULL, NULL,
 									postponed_qual_list);
 		}
+		extract_notnull_attrs(root, f->quals);
 	}
 	else if (IsA(jtnode, JoinExpr))
 	{
@@ -948,6 +967,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		root->nullable_baserels = bms_add_members(root->nullable_baserels,
 												  nullable_rels);
 
+		extract_notnull_attrs(root, j->quals);
+
 		/*
 		 * Try to process any quals postponed by children.  If they need
 		 * further postponement, add them to my output postponed_qual_list.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 177e6e336a..0c5a79f296 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -117,6 +117,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	int			i;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -474,6 +475,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 		set_relation_partition_info(root, rel, relation);
 
+	Assert(rel->notnullattrs == NULL);
+	for(i = 0; i < relation->rd_att->natts; i++)
+	{
+		FormData_pg_attribute attr = relation->rd_att->attrs[i];
+		if (attr.attnotnull)
+			rel->notnullattrs = bms_add_member(rel->notnullattrs,
+											   attr.attnum - FirstLowInvalidHeapAttributeNumber);
+	}
+
 	table_close(relation, NoLock);
 
 	/*
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index e307d6fbb0..f2551713cd 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -867,3 +867,17 @@ alias_relid_set(Query *query, Relids relids)
 	}
 	return result;
 }
+
+
+/*
+ * is_var_nullable
+ */
+bool
+is_var_nullable(PlannerInfo *root, Var *var)
+{
+	RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+	return !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+						  rel->notnullattrs) ||
+		bms_is_member(var->varno, root->nullable_baserels);
+}
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 0ec93e648c..e392eb1a7e 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -710,6 +710,13 @@ typedef struct RelOptInfo
 	PlannerInfo *subroot;		/* if subquery */
 	List	   *subplan_params; /* if subquery */
 	int			rel_parallel_workers;	/* wanted number of parallel workers */
+	/*
+	 * Not null attrs, the value are calculated by looking it pg_attribute and quals
+	 * However both cases are not reliable in some outer join case. So when
+	 * we want to check if a Var is nullable, function is_var_nullable is a good
+	 * place to start with, which is true positive.
+	 */
+	Bitmapset		*notnullattrs;
 
 	/* Information about foreign tables and foreign joins */
 	Oid			serverid;		/* identifies server for the table or join */
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index d587952b7d..a94b74947b 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -23,6 +23,7 @@
 #define OPTIMIZER_H
 
 #include "nodes/parsenodes.h"
+#include "nodes/bitmapset.h"
 
 /* Test if an expression node represents a SRF call.  Beware multiple eval! */
 #define IS_SRF_CALL(node) \
@@ -198,5 +199,6 @@ extern bool contain_vars_of_level(Node *node, int levelsup);
 extern int	locate_var_of_level(Node *node, int levelsup);
 extern List *pull_var_clause(Node *node, int flags);
 extern Node *flatten_join_alias_vars(Query *query, Node *node);
+extern bool is_var_nullable(PlannerInfo *root, Var *var);
 
 #endif							/* OPTIMIZER_H */
-- 
2.21.0

