Re: Performance issue in foreign-key-aware join estimation

Tomas Vondra Mon, 24 Dec 2018 16:48:00 -0800

On 12/24/18 1:07 PM, David Rowley wrote:
> On Mon, 24 Dec 2018 at 09:38, David Rowley <david.row...@2ndquadrant.com> 
> wrote:
>> Using the above idea, but rather than going to the trouble of storing
>> PlannerInfo->eq_classes as an array type list, if we build the array
>> on the fly inside match_foreign_keys_to_quals(), then build a
>> Bitmapset type index to mark which of the eclasses contains members
>> for each relation, then I can get the run-time for the function down
>> to just 0.89%.  Looking at other functions appearing high on the
>> profile I also see; have_relevant_eclass_joinclause() (14%),
>> generate_join_implied_equalities_for_ecs() (23%).
> 
> I've now expanded the proof of concept patch to use this indexing
> technique for have_relevant_eclass_joinclause() and
> generate_join_implied_equalities_for_ecs().  With Tom's test from
> up-thread, I get:
> 
> Master:
> latency average = 14125.374 ms
> 
> Patched:
> latency average = 2417.164 ms
>


Yes, I can confirm these measurements. On my machine, timing on master
is about 10530ms, with v1 of the patch it drops to 2600ms and v2 pushes
it down to 1610ms.

I however observe failures on 4 regression test suites - inherit,
equivclass, partition_join and partition_prune (diff attached). That's a
bit surprising, because AFAICS the patch merely optimizes the execution
and should not change the planning otherwise (all the failures seem to
be a query plan changing in some way). I'm not sure if the plans are
correct or better than the old ones.

The other thing is that we probably should not use a single test case to
measure the optimization - I doubt it can improve less extreme queries,
but maybe we should verify it does not regress them?

With the patch attached, bms_overlap gets quite high in the profiles. I
think one reason for that is that all bitmap operations (not just
bms_overlap) always start the loop at 0. For the upper boundary is
usually determined as Min() of the lengths, but we don't do that for
lower boundary because we don't track that. The bitmaps for eclasses are
likely sparse, so this is quite painful. Attaches is a simple patch that
adds tracking of "minword" and uses it in various bms_ methods to skip
initial part of the loop. On my machine this reduces the timings by
roughly 5% (from 1610 to 1530 ms).


regards

-- 
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

*** /var/lib/postgresql/postgres/src/test/regress/expected/inherit.out  
2018-12-12 19:09:43.877432423 +0100
--- /var/lib/postgresql/postgres/src/test/regress/results/inherit.out   
2018-12-25 01:22:57.876139321 +0100
***************
*** 1297,1315 ****
  drop index patest2i;
  explain (costs off)
  select * from patest0 join (select f1 from int4_tbl limit 1) ss on id = f1;
!                     QUERY PLAN                    
! --------------------------------------------------
!  Nested Loop
!    ->  Limit
!          ->  Seq Scan on int4_tbl
     ->  Append
!          ->  Index Scan using patest0i on patest0
!                Index Cond: (id = int4_tbl.f1)
!          ->  Index Scan using patest1i on patest1
!                Index Cond: (id = int4_tbl.f1)
           ->  Seq Scan on patest2
!                Filter: (int4_tbl.f1 = id)
! (10 rows)
  
  select * from patest0 join (select f1 from int4_tbl limit 1) ss on id = f1;
   id | x | f1 
--- 1297,1314 ----
  drop index patest2i;
  explain (costs off)
  select * from patest0 join (select f1 from int4_tbl limit 1) ss on id = f1;
!                QUERY PLAN                
! -----------------------------------------
!  Hash Join
!    Hash Cond: (patest0.id = int4_tbl.f1)
     ->  Append
!          ->  Seq Scan on patest0
!          ->  Seq Scan on patest1
           ->  Seq Scan on patest2
!    ->  Hash
!          ->  Limit
!                ->  Seq Scan on int4_tbl
! (9 rows)
  
  select * from patest0 join (select f1 from int4_tbl limit 1) ss on id = f1;
   id | x | f1 

======================================================================

*** /var/lib/postgresql/postgres/src/test/regress/expected/equivclass.out       
2018-12-12 19:09:38.838432379 +0100
--- /var/lib/postgresql/postgres/src/test/regress/results/equivclass.out        
2018-12-25 01:23:02.109139043 +0100
***************
*** 260,286 ****
       union all
       select ff + 4 as x from ec1) as ss2
    where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
!                              QUERY PLAN                              
! ---------------------------------------------------------------------
   Nested Loop
!    ->  Nested Loop
!          ->  Index Scan using ec1_pkey on ec1
!                Index Cond: (ff = '42'::bigint)
!          ->  Append
!                ->  Index Scan using ec1_expr2 on ec1 ec1_1
!                      Index Cond: (((ff + 2) + 1) = ec1.f1)
!                ->  Index Scan using ec1_expr3 on ec1 ec1_2
!                      Index Cond: (((ff + 3) + 1) = ec1.f1)
!                ->  Index Scan using ec1_expr4 on ec1 ec1_3
!                      Index Cond: ((ff + 4) = ec1.f1)
     ->  Append
!          ->  Index Scan using ec1_expr2 on ec1 ec1_4
!                Index Cond: (((ff + 2) + 1) = (((ec1_1.ff + 2) + 1)))
!          ->  Index Scan using ec1_expr3 on ec1 ec1_5
!                Index Cond: (((ff + 3) + 1) = (((ec1_1.ff + 2) + 1)))
!          ->  Index Scan using ec1_expr4 on ec1 ec1_6
!                Index Cond: ((ff + 4) = (((ec1_1.ff + 2) + 1)))
! (18 rows)
  
  -- let's try that as a mergejoin
  set enable_mergejoin = on;
--- 260,285 ----
       union all
       select ff + 4 as x from ec1) as ss2
    where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
!                             QUERY PLAN                            
! ------------------------------------------------------------------
   Nested Loop
!    Join Filter: ((((ec1_1.ff + 2) + 1)) = (((ec1_4.ff + 2) + 1)))
     ->  Append
!          ->  Seq Scan on ec1 ec1_4
!          ->  Seq Scan on ec1 ec1_5
!          ->  Seq Scan on ec1 ec1_6
!    ->  Materialize
!          ->  Nested Loop
!                ->  Index Scan using ec1_pkey on ec1
!                      Index Cond: (ff = '42'::bigint)
!                ->  Append
!                      ->  Index Scan using ec1_expr2 on ec1 ec1_1
!                            Index Cond: (((ff + 2) + 1) = ec1.f1)
!                      ->  Index Scan using ec1_expr3 on ec1 ec1_2
!                            Index Cond: (((ff + 3) + 1) = ec1.f1)
!                      ->  Index Scan using ec1_expr4 on ec1 ec1_3
!                            Index Cond: ((ff + 4) = ec1.f1)
! (17 rows)
  
  -- let's try that as a mergejoin
  set enable_mergejoin = on;
***************
*** 345,354 ****
           ->  Index Scan using ec1_expr2 on ec1 ec1_1
                 Index Cond: (((ff + 2) + 1) = ec1.f1)
           ->  Seq Scan on ec1 ec1_2
-                Filter: (((ff + 3) + 1) = ec1.f1)
           ->  Index Scan using ec1_expr4 on ec1 ec1_3
                 Index Cond: ((ff + 4) = ec1.f1)
! (10 rows)
  
  -- let's try that as a mergejoin
  set enable_mergejoin = on;
--- 344,352 ----
           ->  Index Scan using ec1_expr2 on ec1 ec1_1
                 Index Cond: (((ff + 2) + 1) = ec1.f1)
           ->  Seq Scan on ec1 ec1_2
           ->  Index Scan using ec1_expr4 on ec1 ec1_3
                 Index Cond: ((ff + 4) = ec1.f1)
! (9 rows)
  
  -- let's try that as a mergejoin
  set enable_mergejoin = on;

======================================================================

*** /var/lib/postgresql/postgres/src/test/regress/expected/partition_join.out   
2018-12-12 19:09:43.880432423 +0100
--- /var/lib/postgresql/postgres/src/test/regress/results/partition_join.out    
2018-12-25 01:23:06.002138788 +0100
***************
*** 535,560 ****
   Sort
     Sort Key: t1.a
     ->  Append
!          ->  Nested Loop
!                Join Filter: (t1.a = ((t3.a + t3.b) / 2))
!                ->  Hash Join
!                      Hash Cond: (t2.b = t1.a)
!                      ->  Seq Scan on prt2_p1 t2
!                      ->  Hash
!                            ->  Seq Scan on prt1_p1 t1
!                                  Filter: (b = 0)
!                ->  Index Scan using iprt1_e_p1_ab2 on prt1_e_p1 t3
!                      Index Cond: (((a + b) / 2) = t2.b)
!          ->  Nested Loop
!                Join Filter: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
!                ->  Hash Join
!                      Hash Cond: (t2_1.b = t1_1.a)
!                      ->  Seq Scan on prt2_p2 t2_1
!                      ->  Hash
!                            ->  Seq Scan on prt1_p2 t1_1
!                                  Filter: (b = 0)
!                ->  Index Scan using iprt1_e_p2_ab2 on prt1_e_p2 t3_1
!                      Index Cond: (((a + b) / 2) = t2_1.b)
           ->  Nested Loop
                 Join Filter: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
                 ->  Hash Join
--- 535,560 ----
   Sort
     Sort Key: t1.a
     ->  Append
!          ->  Hash Join
!                Hash Cond: (((t3.a + t3.b) / 2) = t1.a)
!                ->  Seq Scan on prt1_e_p1 t3
!                ->  Hash
!                      ->  Hash Join
!                            Hash Cond: (t2.b = t1.a)
!                            ->  Seq Scan on prt2_p1 t2
!                            ->  Hash
!                                  ->  Seq Scan on prt1_p1 t1
!                                        Filter: (b = 0)
!          ->  Hash Join
!                Hash Cond: (((t3_1.a + t3_1.b) / 2) = t1_1.a)
!                ->  Seq Scan on prt1_e_p2 t3_1
!                ->  Hash
!                      ->  Hash Join
!                            Hash Cond: (t2_1.b = t1_1.a)
!                            ->  Seq Scan on prt2_p2 t2_1
!                            ->  Hash
!                                  ->  Seq Scan on prt1_p2 t1_1
!                                        Filter: (b = 0)
           ->  Nested Loop
                 Join Filter: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
                 ->  Hash Join
***************
*** 745,752 ****
  -- Semi-join
  EXPLAIN (COSTS OFF)
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 
WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
!                                    QUERY PLAN                                 
   
! 
---------------------------------------------------------------------------------
   Sort
     Sort Key: t1.a
     ->  Append
--- 745,752 ----
  -- Semi-join
  EXPLAIN (COSTS OFF)
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 
WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
!                                QUERY PLAN                                
! -------------------------------------------------------------------------
   Sort
     Sort Key: t1.a
     ->  Append
***************
*** 780,794 ****
                 Join Filter: (t1_2.a = t1_5.b)
                 ->  HashAggregate
                       Group Key: t1_5.b
!                      ->  Nested Loop
!                            ->  Seq Scan on prt2_p3 t1_5
!                                  Filter: (a = 0)
!                            ->  Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 
t2_2
!                                  Index Cond: (((a + b) / 2) = t1_5.b)
                 ->  Index Scan using iprt1_p3_a on prt1_p3 t1_2
                       Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
                       Filter: (b = 0)
! (41 rows)
  
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 
WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
    a  | b |  c   
--- 780,795 ----
                 Join Filter: (t1_2.a = t1_5.b)
                 ->  HashAggregate
                       Group Key: t1_5.b
!                      ->  Hash Join
!                            Hash Cond: (((t2_2.a + t2_2.b) / 2) = t1_5.b)
!                            ->  Seq Scan on prt1_e_p3 t2_2
!                            ->  Hash
!                                  ->  Seq Scan on prt2_p3 t1_5
!                                        Filter: (a = 0)
                 ->  Index Scan using iprt1_p3_a on prt1_p3 t1_2
                       Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
                       Filter: (b = 0)
! (42 rows)
  
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 
WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
    a  | b |  c   

======================================================================

*** /var/lib/postgresql/postgres/src/test/regress/expected/partition_prune.out  
2018-12-12 19:09:43.881432423 +0100
--- /var/lib/postgresql/postgres/src/test/regress/results/partition_prune.out   
2018-12-25 01:23:06.283138769 +0100
***************
*** 2650,2673 ****
  
  explain (analyze, costs off, summary off, timing off)
  select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
!                                 QUERY PLAN                                
! --------------------------------------------------------------------------
!  Nested Loop (actual rows=2 loops=1)
!    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
!    ->  Append (actual rows=1 loops=2)
!          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt3_idx on tprt_3 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt4_idx on tprt_4 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt5_idx on tprt_5 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt6_idx on tprt_6 (never executed)
!                Index Cond: (col1 = tbl1.col1)
! (15 rows)
  
  select tbl1.col1, tprt.col1 from tbl1
  inner join tprt on tbl1.col1 > tprt.col1
--- 2650,2672 ----
  
  explain (analyze, costs off, summary off, timing off)
  select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
!                               QUERY PLAN                               
! -----------------------------------------------------------------------
!  Gather (actual rows=2 loops=1)
!    Workers Planned: 2
!    Workers Launched: 2
!    ->  Nested Loop (actual rows=1 loops=3)
!          Join Filter: (tbl1.col1 = tprt_1.col1)
!          Rows Removed by Join Filter: 4
!          ->  Parallel Append (actual rows=2 loops=3)
!                ->  Parallel Seq Scan on tprt_1 (actual rows=2 loops=1)
!                ->  Parallel Seq Scan on tprt_2 (actual rows=3 loops=1)
!                ->  Parallel Seq Scan on tprt_3 (actual rows=1 loops=1)
!                ->  Parallel Seq Scan on tprt_4 (actual rows=0 loops=1)
!                ->  Parallel Seq Scan on tprt_5 (actual rows=0 loops=1)
!                ->  Parallel Seq Scan on tprt_6 (actual rows=1 loops=1)
!          ->  Seq Scan on tbl1 (actual rows=2 loops=7)
! (14 rows)
  
  select tbl1.col1, tprt.col1 from tbl1
  inner join tprt on tbl1.col1 > tprt.col1
***************
*** 2716,2739 ****
  
  explain (analyze, costs off, summary off, timing off)
  select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
!                                 QUERY PLAN                                
! --------------------------------------------------------------------------
!  Nested Loop (actual rows=3 loops=1)
!    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
!    ->  Append (actual rows=1 loops=5)
!          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=3)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt4_idx on tprt_4 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt5_idx on tprt_5 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt6_idx on tprt_6 (never executed)
!                Index Cond: (col1 = tbl1.col1)
! (15 rows)
  
  select tbl1.col1, tprt.col1 from tbl1
  inner join tprt on tbl1.col1 > tprt.col1
--- 2715,2737 ----
  
  explain (analyze, costs off, summary off, timing off)
  select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
!                               QUERY PLAN                               
! -----------------------------------------------------------------------
!  Gather (actual rows=3 loops=1)
!    Workers Planned: 2
!    Workers Launched: 2
!    ->  Nested Loop (actual rows=1 loops=3)
!          Join Filter: (tbl1.col1 = tprt_1.col1)
!          Rows Removed by Join Filter: 11
!          ->  Parallel Append (actual rows=2 loops=3)
!                ->  Parallel Seq Scan on tprt_1 (actual rows=2 loops=1)
!                ->  Parallel Seq Scan on tprt_2 (actual rows=3 loops=1)
!                ->  Parallel Seq Scan on tprt_3 (actual rows=1 loops=1)
!                ->  Parallel Seq Scan on tprt_4 (actual rows=0 loops=1)
!                ->  Parallel Seq Scan on tprt_5 (actual rows=0 loops=1)
!                ->  Parallel Seq Scan on tprt_6 (actual rows=1 loops=1)
!          ->  Seq Scan on tbl1 (actual rows=5 loops=7)
! (14 rows)
  
  select tbl1.col1, tprt.col1 from tbl1
  inner join tprt on tbl1.col1 > tprt.col1
***************
*** 2812,2835 ****
  insert into tbl1 values (10000);
  explain (analyze, costs off, summary off, timing off)
  select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
!                             QUERY PLAN                             
! -------------------------------------------------------------------
!  Nested Loop (actual rows=0 loops=1)
!    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
!    ->  Append (actual rows=0 loops=1)
!          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt2_idx on tprt_2 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt3_idx on tprt_3 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt4_idx on tprt_4 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt5_idx on tprt_5 (never executed)
!                Index Cond: (col1 = tbl1.col1)
!          ->  Index Scan using tprt6_idx on tprt_6 (never executed)
!                Index Cond: (col1 = tbl1.col1)
! (15 rows)
  
  select tbl1.col1, tprt.col1 from tbl1
  inner join tprt on tbl1.col1 = tprt.col1
--- 2810,2832 ----
  insert into tbl1 values (10000);
  explain (analyze, costs off, summary off, timing off)
  select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
!                               QUERY PLAN                               
! -----------------------------------------------------------------------
!  Gather (actual rows=0 loops=1)
!    Workers Planned: 2
!    Workers Launched: 2
!    ->  Nested Loop (actual rows=0 loops=3)
!          Join Filter: (tbl1.col1 = tprt_1.col1)
!          Rows Removed by Join Filter: 2
!          ->  Parallel Append (actual rows=2 loops=3)
!                ->  Parallel Seq Scan on tprt_1 (actual rows=2 loops=1)
!                ->  Parallel Seq Scan on tprt_2 (actual rows=3 loops=1)
!                ->  Parallel Seq Scan on tprt_3 (actual rows=1 loops=1)
!                ->  Parallel Seq Scan on tprt_4 (actual rows=0 loops=1)
!                ->  Parallel Seq Scan on tprt_5 (actual rows=0 loops=1)
!                ->  Parallel Seq Scan on tprt_6 (actual rows=1 loops=1)
!          ->  Seq Scan on tbl1 (actual rows=1 loops=7)
! (14 rows)
  
  select tbl1.col1, tprt.col1 from tbl1
  inner join tprt on tbl1.col1 = tprt.col1

======================================================================

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 8ce253c88d..9eb588e3a5 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -157,6 +157,7 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
 	int			shortlen;
 	int			longlen;
 	int			i;
+	int			start;
 
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
@@ -180,7 +181,8 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
 	}
 	/* And process */
 	shortlen = shorter->nwords;
-	for (i = 0; i < shortlen; i++)
+	start = Min(a->minword, b->minword);
+	for (i = start; i < shortlen; i++)
 	{
 		if (shorter->words[i] != longer->words[i])
 			return false;
@@ -207,6 +209,7 @@ bms_compare(const Bitmapset *a, const Bitmapset *b)
 {
 	int			shortlen;
 	int			i;
+	int			start;
 
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
@@ -227,7 +230,8 @@ bms_compare(const Bitmapset *a, const Bitmapset *b)
 	}
 	/* Process words in common */
 	i = shortlen;
-	while (--i >= 0)
+	start = Min(a->minword, b->minword);
+	while (--i >= start)
 	{
 		bitmapword	aw = a->words[i];
 		bitmapword	bw = b->words[i];
@@ -255,6 +259,7 @@ bms_make_singleton(int x)
 	result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
 	result->nwords = wordnum + 1;
 	result->words[wordnum] = ((bitmapword) 1 << bitnum);
+	result->minword = wordnum;
 	return result;
 }
 
@@ -304,6 +309,7 @@ bms_union(const Bitmapset *a, const Bitmapset *b)
 		result = bms_copy(a);
 		other = b;
 	}
+	result->minword = Min(a->minword, b->minword);
 	/* And union the shorter input into the result */
 	otherlen = other->nwords;
 	for (i = 0; i < otherlen; i++)
@@ -337,8 +343,9 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
 		other = a;
 	}
 	/* And intersect the longer input with the result */
+	result->minword = Max(a->minword, b->minword);
 	resultlen = result->nwords;
-	for (i = 0; i < resultlen; i++)
+	for (i = result->minword; i < resultlen; i++)
 		result->words[i] &= other->words[i];
 	return result;
 }
@@ -362,7 +369,8 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
 	result = bms_copy(a);
 	/* And remove b's bits from result */
 	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+	result->minword = Min(a->minword, b->minword);
+	for (i = result->minword; i < shortlen; i++)
 		result->words[i] &= ~b->words[i];
 	return result;
 }
@@ -497,6 +505,8 @@ bms_is_member(int x, const Bitmapset *a)
 	bitnum = BITNUM(x);
 	if (wordnum >= a->nwords)
 		return false;
+	if (wordnum < a->minword)
+		return false;
 	if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
 		return true;
 	return false;
@@ -510,13 +520,15 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
 {
 	int			shortlen;
 	int			i;
+	int			start;
 
 	/* Handle cases where either input is NULL */
 	if (a == NULL || b == NULL)
 		return false;
 	/* Check words in common */
 	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+	start = Min(a->minword, b->minword);
+	for (i = start; i < shortlen; i++)
 	{
 		if ((a->words[i] & b->words[i]) != 0)
 			return true;
@@ -545,7 +557,7 @@ bms_overlap_list(const Bitmapset *a, const List *b)
 			elog(ERROR, "negative bitmapset member not allowed");
 		wordnum = WORDNUM(x);
 		bitnum = BITNUM(x);
-		if (wordnum < a->nwords)
+		if ((wordnum >= a->minword) && (wordnum < a->nwords))
 			if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
 				return true;
 	}
@@ -561,6 +573,7 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
 {
 	int			shortlen;
 	int			i;
+	int			start;
 
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
@@ -569,7 +582,8 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
 		return !bms_is_empty(a);
 	/* Check words in common */
 	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+	start = Min(a->minword, b->minword);
+	for (i = start; i < shortlen; i++)
 	{
 		if ((a->words[i] & ~b->words[i]) != 0)
 			return true;
@@ -598,7 +612,7 @@ bms_singleton_member(const Bitmapset *a)
 	if (a == NULL)
 		elog(ERROR, "bitmapset is empty");
 	nwords = a->nwords;
-	for (wordnum = 0; wordnum < nwords; wordnum++)
+	for (wordnum = a->minword; wordnum < nwords; wordnum++)
 	{
 		bitmapword	w = a->words[wordnum];
 
@@ -641,7 +655,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
 	if (a == NULL)
 		return false;
 	nwords = a->nwords;
-	for (wordnum = 0; wordnum < nwords; wordnum++)
+	for (wordnum = a->minword; wordnum < nwords; wordnum++)
 	{
 		bitmapword	w = a->words[wordnum];
 
@@ -677,7 +691,7 @@ bms_num_members(const Bitmapset *a)
 	if (a == NULL)
 		return 0;
 	nwords = a->nwords;
-	for (wordnum = 0; wordnum < nwords; wordnum++)
+	for (wordnum = a->minword; wordnum < nwords; wordnum++)
 	{
 		bitmapword	w = a->words[wordnum];
 
@@ -734,7 +748,7 @@ bms_is_empty(const Bitmapset *a)
 	if (a == NULL)
 		return true;
 	nwords = a->nwords;
-	for (wordnum = 0; wordnum < nwords; wordnum++)
+	for (wordnum = a->minword; wordnum < nwords; wordnum++)
 	{
 		bitmapword	w = a->words[wordnum];
 
@@ -773,6 +787,9 @@ bms_add_member(Bitmapset *a, int x)
 	wordnum = WORDNUM(x);
 	bitnum = BITNUM(x);
 
+	if (a->minword > wordnum)
+		a->minword = wordnum;
+
 	/* enlarge the set if necessary */
 	if (wordnum >= a->nwords)
 	{
@@ -845,6 +862,8 @@ bms_add_members(Bitmapset *a, const Bitmapset *b)
 	otherlen = other->nwords;
 	for (i = 0; i < otherlen; i++)
 		result->words[i] |= other->words[i];
+	if (result->minword > a->minword)
+		result->minword = a->minword;
 	if (result != a)
 		pfree(a);
 	return result;
@@ -898,6 +917,9 @@ bms_add_range(Bitmapset *a, int lower, int upper)
 	lbitnum = BITNUM(lower);
 	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
 
+	if (a->minword > lwordnum)
+		a->minword = lwordnum;
+
 	/*
 	 * Special case when lwordnum is the same as uwordnum we must perform the
 	 * upper and lower masking on the word.
@@ -931,6 +953,7 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
 {
 	int			shortlen;
 	int			i;
+	int			start;
 
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
@@ -942,7 +965,8 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
 	}
 	/* Intersect b into a; we need never copy */
 	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+	start = Min(a->minword, b->minword);
+	for (i = start; i < shortlen; i++)
 		a->words[i] &= b->words[i];
 	for (; i < a->nwords; i++)
 		a->words[i] = 0;
@@ -957,6 +981,7 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
 {
 	int			shortlen;
 	int			i;
+	int			start;
 
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
@@ -965,7 +990,8 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
 		return a;
 	/* Remove b's bits from a; we need never copy */
 	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+	start = Max(a->minword, b->minword);
+	for (i = start; i < shortlen; i++)
 		a->words[i] &= ~b->words[i];
 	return a;
 }
@@ -997,9 +1023,10 @@ bms_join(Bitmapset *a, Bitmapset *b)
 		result = a;
 		other = b;
 	}
+	result->minword = Min(result->minword, other->minword);
 	/* And union the shorter input into the result */
 	otherlen = other->nwords;
-	for (i = 0; i < otherlen; i++)
+	for (i = other->minword; i < otherlen; i++)
 		result->words[i] |= other->words[i];
 	if (other != result)		/* pure paranoia */
 		pfree(other);
@@ -1029,7 +1056,7 @@ bms_first_member(Bitmapset *a)
 	if (a == NULL)
 		return -1;
 	nwords = a->nwords;
-	for (wordnum = 0; wordnum < nwords; wordnum++)
+	for (wordnum = a->minword; wordnum < nwords; wordnum++)
 	{
 		bitmapword	w = a->words[wordnum];
 
@@ -1158,7 +1185,7 @@ bms_prev_member(const Bitmapset *a, int prevbit)
 
 	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
 	mask = (~(bitmapword) 0) >> ushiftbits;
-	for (wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
+	for (wordnum = WORDNUM(prevbit); (wordnum >= 0) && (wordnum >= a->minword); wordnum--)
 	{
 		bitmapword	w = a->words[wordnum];
 
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 433df8a46d..829290bf9b 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -48,6 +48,7 @@ typedef int32 signedbitmapword; /* must be the matching signed type */
 
 typedef struct Bitmapset
 {
+	int			minword;
 	int			nwords;			/* number of words in array */
 	bitmapword	words[FLEXIBLE_ARRAY_MEMBER];	/* really [nwords] */
 } Bitmapset;

Re: Performance issue in foreign-key-aware join estimation

Reply via email to