Hi Guys,

I’m at a bit of a loss where I can go with the following 2 queries
that are over the same data structure (DDL attached) under postgresql
PostgreSQL 9.5.16 on x86_64-pc-linux-gnu (Debian 9.5.16-1.pgdg90+1),
compiled by gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, 64-bit and
could do with a second set of eyes if someone would oblige.

I’ve attached Query1.txt and Query2.txt along with the DDL for the
tables and indicies and execution plans.

On our production environment we’re running at about 2 seconds (with
the cache warm); I’m getting a comparable speed on my playbox. It
seems to me like the Bitmap Heap Scan on proposal is the issue because
the recheck is throwing away enormous amounts of data. The
has_been_anonymised flag on the proposal is effectively a soft-delete;
so I’ve tried adding something like :

CREATE INDEX ON proposal.proposal (system_id, legacy_organisation_id, reference)
WHERE has_been_anonymised = false;

Which I was hoping would shrink the size of the index significantly
and encourage an index scan rather than bitmap, however it didn’t have
that effect. For reference:

Has_been_anonymised false:    1534790
Has_been_anonymised true:        7072192

Row counts over the whole table in question are :
Proposal.proposal:    8606982    2340 MB
Proposal.note:        2624423      1638 MB

Presumably I could partition proposal on has_been_anonymised, however
the row counts seem low enough that it feels a bit like overkill? We
also need referential integrity so I'll need to wait until that's in
(I think it's coming in PG12?)

If I decrease the number of legacy_organisation_id’s that are being
used then the query performance gets much better, but presumably
that’s because there’s a smaller dataset.

Any thoughts or ideas?

Thanks
Rob

-- 
 <https://codeweavers.net>


A big Get Focused ‘thank you’ 
<https://codeweavers.net/company-blog/a-big-get-focused-thank-you>
Why you 
should partner with an Agile company 
<https://codeweavers.net/company-blog/why-you-should-partner-with-an-agile-company>


*
*
*Phone:* 0800 021 0888   Email: contac...@codeweavers.net 
<mailto:contac...@codeweavers.net>
Codeweavers Ltd | Barn 4 | Dunston 
Business Village | Dunston | ST18 9AB
Registered in England and Wales No. 
04092394 | VAT registration no. 974 9705 63 



 
<https://twitter.com/Codeweavers_Ltd>  
<https://www.facebook.com/Codeweavers.Ltd/>  
<https://www.linkedin.com/company/codeweavers-limited>
CREATE TABLE proposal.proposal
(
  id bigserial NOT NULL,
  reference uuid NOT NULL,
  system_id integer NOT NULL,
  legacy_id integer NOT NULL,
  organisation_id integer,
  legacy_organisation_id integer NOT NULL,
  has_been_anonymised boolean NOT NULL,
  external_source character varying,
  CONSTRAINT pk_proposal PRIMARY KEY (id),
  CONSTRAINT proposal_reference_key UNIQUE (reference),
  CONSTRAINT proposal_system_id_legacy_id_key UNIQUE (system_id, legacy_id)
)
WITH (
  OIDS=FALSE
);

CREATE INDEX proposal_has_been_anonymised_system_id_legacy_organisation__idx
  ON proposal.proposal
  USING btree
  (has_been_anonymised, system_id, legacy_organisation_id);

CREATE INDEX proposal_has_been_anonymised_system_id_legacy_organisation_idx1
  ON proposal.proposal
  USING btree
  (has_been_anonymised, system_id, legacy_organisation_id, reference);

--

CREATE TABLE proposal.note
(
  id bigserial NOT NULL,
  reference uuid NOT NULL,
  proposal_reference uuid NOT NULL,
  entry_time timestamp with time zone NOT NULL,
  legacy_originator_id integer,
  type_id integer NOT NULL,
  content text NOT NULL,
  legacy_read_by integer,
  time_read timestamp with time zone,
  "from" character varying(100),
  "to" character varying(100),
  originator_reference uuid,
  read_by_reference uuid,
  CONSTRAINT pk_note PRIMARY KEY (id),
  CONSTRAINT note_proposal_reference_fkey FOREIGN KEY (proposal_reference)
      REFERENCES proposal.proposal (reference) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION,
  CONSTRAINT note_reference_key UNIQUE (reference)
)
WITH (
  OIDS=FALSE
);

CREATE INDEX note_entry_time_type_id_idx
  ON proposal.note
  USING btree
  (entry_time, type_id)
  WHERE legacy_read_by IS NULL;

CREATE INDEX note_proposal_reference_idx
  ON proposal.note
  USING btree
  (proposal_reference);

CREATE INDEX note_proposal_reference_type_id_entry_time_idx
  ON proposal.note
  USING btree
  (proposal_reference, type_id, entry_time)
  WHERE legacy_read_by IS NULL;

CREATE INDEX note_type_id_entry_time_idx
  ON proposal.note
  USING btree
  (type_id, entry_time)
  WHERE legacy_read_by IS NULL;
EXPLAIN (ANALYZE TRUE, VERBOSE TRUE, COSTS TRUE, BUFFERS TRUE, TIMING TRUE) 
SELECT COUNT(1)
FROM proposal.proposal
INNER JOIN proposal.note 
        ON proposal.note.proposal_reference = proposal.proposal.reference
WHERE proposal.proposal.system_id = '11'
        AND proposal.proposal.legacy_organisation_id IN ('2', '6', '7', '8', 
'9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', 
'22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', 
'35', '36', '37', '38', '39', '40', '42', '43', '44', '45', '46', '47', '48', 
'50', '52', '53', '54', '55', '181', '187', '188', '189', '190', '191', '192', 
'193', '194', '195', '196', '197', '199', '200', '202', '203', '204', '218', 
'221', '222', '223', '225', '226', '227', '228', '229', '230', '231', '232', 
'233', '235', '237', '239', '240', '241', '242', '243', '244', '245', '246', 
'247', '248', '249', '250', '251', '252', '253', '254', '255', '258', '259', 
'260', '261', '262', '263', '264', '265', '266', '267', '41')
        AND proposal.proposal.has_been_anonymised = false
        AND proposal.note.legacy_read_by IS NULL
        AND proposal.note.type_id IN ('1', '4', '9')
        AND proposal.note.entry_time > '2019-07-09 00:00:00';


'Aggregate  (cost=199100.97..199100.98 rows=1 width=0) (actual 
time=2265.665..2265.665 rows=1 loops=1)'
'  Output: count(1)'
'  Buffers: shared hit=303 read=44070, temp read=3401 written=3275'
'  I/O Timings: read=338.284'
'  ->  Hash Join  (cost=95655.49..199099.14 rows=733 width=0) (actual 
time=2166.000..2265.500 rows=961 loops=1)'
'        Hash Cond: (note.proposal_reference = proposal.reference)'
'        Buffers: shared hit=303 read=44070, temp read=3401 written=3275'
'        I/O Timings: read=338.284'
'        ->  Bitmap Heap Scan on proposal.note  (cost=3055.52..105082.81 
rows=72805 width=16) (actual time=26.098..183.887 rows=133338 loops=1)'
'              Output: note.proposal_reference'
'              Recheck Cond: ((note.type_id = ANY ('{1,4,9}'::integer[])) AND 
(note.entry_time > '2019-07-09 00:00:00+01'::timestamp with time zone) AND 
(note.legacy_read_by IS NULL))'
'              Rows Removed by Index Recheck: 155411'
'              Heap Blocks: exact=5730 lossy=3139'
'              Buffers: shared hit=2 read=9873'
'              I/O Timings: read=63.824'
'              ->  Bitmap Index Scan on note_type_id_entry_time_idx  
(cost=0.00..3037.32 rows=72805 width=0) (actual time=25.246..25.246 rows=133338 
loops=1)'
'                    Index Cond: ((note.type_id = ANY ('{1,4,9}'::integer[])) 
AND (note.entry_time > '2019-07-09 00:00:00+01'::timestamp with time zone))'
'                    Buffers: shared hit=2 read=1004'
'                    I/O Timings: read=8.786'
'        ->  Hash  (cost=91092.63..91092.63 rows=86668 width=16) (actual 
time=1960.836..1960.836 rows=465120 loops=1)'
'              Output: proposal.reference'
'              Buckets: 16384 (originally 16384)  Batches: 64 (originally 16)  
Memory Usage: 476kB'
'              Buffers: shared hit=301 read=34197, temp written=1979'
'              I/O Timings: read=274.460'
'              ->  Bitmap Heap Scan on proposal.proposal  
(cost=2871.35..91092.63 rows=86668 width=16) (actual time=73.254..1823.379 
rows=465120 loops=1)'
'                    Output: proposal.reference'
'                    Recheck Cond: ((proposal.system_id = 11) AND 
(proposal.legacy_organisation_id = ANY 
('{2,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43,44,45,46,47,48,50,52,53,54,55,181,187,188,189,190,191,192,193,194,195,196,197,199,200,202,203,204,218,221,222,223,225,226,227,228,229,230,231,232,233,235,237,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,258,259,260,261,262,263,264,265,266,267,41}'::integer[])))'
'                    Rows Removed by Index Recheck: 1944770'
'                    Filter: (NOT proposal.has_been_anonymised)'
'                    Rows Removed by Filter: 993208'
'                    Heap Blocks: exact=3983 lossy=27989'
'                    Buffers: shared hit=301 read=34197'
'                    I/O Timings: read=274.460'
'                    ->  Bitmap Index Scan on 
proposal_has_been_anonymised_system_id_legacy_organisation__idx  
(cost=0.00..2849.68 rows=86668 width=0) (actual time=72.624..72.624 rows=465120 
loops=1)'
'                          Index Cond: ((proposal.has_been_anonymised = false) 
AND (proposal.system_id = 11) AND (proposal.legacy_organisation_id = ANY 
('{2,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43,44,45,46,47,48,50,52,53,54,55,181,187,188,189,190,191,192,193,194,195,196,197,199,200,202,203,204,218,221,222,223,225,226,227,228,229,230,231,232,233,235,237,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,258,259,260,261,262,263,264,265,266,267,41}'::integer[])))'
'                          Buffers: shared hit=301 read=2225'
'                          I/O Timings: read=23.251'
'Planning time: 0.920 ms'
'Execution time: 2265.986 ms'

EXPLAIN (ANALYZE TRUE, VERBOSE TRUE, COSTS TRUE, BUFFERS TRUE, TIMING TRUE) 
SELECT proposal.proposal.legacy_id AS "ProposalId"
   , note.entry_time as "EntryTime"
   , note.content as "Note"
FROM proposal.note
INNER JOIN proposal.proposal ON proposal.proposal.reference = 
note.proposal_reference
WHERE legacy_read_by is null
AND proposal.proposal.system_id = '11'
AND proposal.proposal.legacy_organisation_id in ('2', '6', '7', '8', '9', '10', 
'11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', 
'24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', 
'37', '38', '39', '40', '42', '43', '44', '45', '46', '47', '48', '50', '52', 
'53', '54', '55', '181', '187', '188', '189', '190', '191', '192', '193', 
'194', '195', '196', '197', '199', '200', '202', '203', '204', '218', '221', 
'222', '223', '225', '226', '227', '228', '229', '230', '231', '232', '233', 
'235', '237', '239', '240', '241', '242', '243', '244', '245', '246', '247', 
'248', '249', '250', '251', '252', '253', '254', '255', '258', '259', '260', 
'261', '262', '263', '264', '265', '266', '267', '41')
AND proposal.proposal.has_been_anonymised = false
AND note.type_id = 3;




'Hash Join  (cost=96937.18..210635.27 rows=1056 width=24) (actual 
time=1987.649..2086.253 rows=1065 loops=1)'
'  Output: proposal.legacy_id, note.entry_time, note.content'
'  Hash Cond: (note.proposal_reference = proposal.reference)'
'  Buffers: shared hit=301 read=36610 written=2, temp read=3222 written=3096'
'  I/O Timings: read=347.571 write=0.110'
'  ->  Bitmap Heap Scan on proposal.note  (cost=4253.21..115399.45 rows=104875 
width=36) (actual time=1.198..35.409 rows=2850 loops=1)'
'        Output: note.entry_time, note.content, note.proposal_reference'
'        Recheck Cond: ((note.type_id = 3) AND (note.legacy_read_by IS NULL))'
'        Heap Blocks: exact=2383'
'        Buffers: shared read=2413'
'        I/O Timings: read=20.890'
'        ->  Bitmap Index Scan on note_type_id_entry_time_idx  
(cost=0.00..4226.99 rows=104875 width=0) (actual time=0.886..0.886 rows=2850 
loops=1)'
'              Index Cond: (note.type_id = 3)'
'              Buffers: shared read=30'
'              I/O Timings: read=0.407'
'  ->  Hash  (cost=91092.63..91092.63 rows=86668 width=20) (actual 
time=1976.579..1976.579 rows=465120 loops=1)'
'        Output: proposal.legacy_id, proposal.reference'
'        Buckets: 8192 (originally 8192)  Batches: 64 (originally 16)  Memory 
Usage: 440kB'
'        Buffers: shared hit=301 read=34197 written=2, temp written=2203'
'        I/O Timings: read=326.681 write=0.110'
'        ->  Bitmap Heap Scan on proposal.proposal  (cost=2871.35..91092.63 
rows=86668 width=20) (actual time=77.416..1832.571 rows=465120 loops=1)'
'              Output: proposal.legacy_id, proposal.reference'
'              Recheck Cond: ((proposal.system_id = 11) AND 
(proposal.legacy_organisation_id = ANY 
('{2,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43,44,45,46,47,48,50,52,53,54,55,181,187,188,189,190,191,192,193,194,195,196,197,199,200,202,203,204,218,221,222,223,225,226,227,228,229,230,231,232,233,235,237,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,258,259,260,261,262,263,264,265,266,267,41}'::integer[])))'
'              Rows Removed by Index Recheck: 1944770'
'              Filter: (NOT proposal.has_been_anonymised)'
'              Rows Removed by Filter: 993208'
'              Heap Blocks: exact=3983 lossy=27989'
'              Buffers: shared hit=301 read=34197 written=2'
'              I/O Timings: read=326.681 write=0.110'
'              ->  Bitmap Index Scan on 
proposal_has_been_anonymised_system_id_legacy_organisation__idx  
(cost=0.00..2849.68 rows=86668 width=0) (actual time=76.850..76.850 rows=465120 
loops=1)'
'                    Index Cond: ((proposal.has_been_anonymised = false) AND 
(proposal.system_id = 11) AND (proposal.legacy_organisation_id = ANY 
('{2,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43,44,45,46,47,48,50,52,53,54,55,181,187,188,189,190,191,192,193,194,195,196,197,199,200,202,203,204,218,221,222,223,225,226,227,228,229,230,231,232,233,235,237,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,258,259,260,261,262,263,264,265,266,267,41}'::integer[])))'
'                    Buffers: shared hit=301 read=2225'
'                    I/O Timings: read=28.659'
'Planning time: 1.001 ms'
'Execution time: 2086.603 ms'

Reply via email to