[PERFORM] Index Scan Backward Slow

2015-05-01 Thread David Osborne
Hi,

We have a query which finds the latest row_id for a particular code.

We've found a backwards index scan is much slower than a forward one, to
the extent that disabling indexscan altogether actually improves the query
time.

Can anyone suggest why this might be, and what's best to do to improve the
query time?



dev= \d table
   Table public.table
Column|  Type  | Modifiers
--++---
 row_id   | integer|
 code | character(2)   |
Indexes:
table_code_idx btree (code)
table_row_idx btree (row_id)

dev= select count(*) from table;
  count
-
 6090254
(1 row)

dev= select count(distinct(row_id)) from table;
  count
-
 5421022
(1 row)

dev= select n_distinct from pg_stats where tablename='table' and
attname='row_id';
 n_distinct

  -0.762951
(1 row)

dev= show work_mem;
 work_mem
---
 1249105kB
(1 row)

dev= select version();
   version

--
 PostgreSQL 9.3.6 on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.6.3
20120306 (Red Hat 4.6.3-2), 64-bit
(1 row)


The query in question:

dev= explain (analyse,buffers)  select row_id as last_row_id from table
where code='XX' order by row_id desc limit 1;

QUERY PLAN

--
 Limit  (cost=0.43..1.67 rows=1 width=4) (actual time=835.281..835.282
rows=1 loops=1)
   Buffers: shared hit=187961
   -  Index Scan Backward using table_row_idx on table
 (cost=0.43..343741.98 rows=278731 width=4) (actual time=835.278..835.278
rows=1 loops=1)
 Filter: (code = 'XX'::bpchar)
 Rows Removed by Filter: 4050971
 Buffers: shared hit=187961
 Total runtime: 835.315 ms
(7 rows)

http://explain.depesz.com/s/uGC


So we can see it's doing a backwards index scan. Out of curiosity I tried a
forward scan and it was MUCH quicker:

dev= explain (analyse,buffers)  select row_id as first_row_id from table
where code='XX' order by row_id asc limit 1;
QUERY
PLAN
---
 Limit  (cost=0.43..1.67 rows=1 width=4) (actual time=19.473..19.474 rows=1
loops=1)
   Buffers: shared hit=26730
   -  Index Scan using table_row_idx on table  (cost=0.43..343741.98
rows=278731 width=4) (actual time=19.470..19.470 rows=1 loops=1)
 Filter: (code = 'XX'::bpchar)
 Rows Removed by Filter: 62786
 Buffers: shared hit=26730
 Total runtime: 19.509 ms
(7 rows)

http://explain.depesz.com/s/ASxD


I thought adding a index on row_id desc might be the answer but it has
little effect:

dev= create index row_id_desc_idx on table(row_id desc);
CREATE INDEX
Time: 5293.812 ms

dev= explain (analyse,buffers)  select row_id as last_row_id from table
where code='XX' order by row_id desc limit 1;

 QUERY PLAN


 Limit  (cost=0.43..1.66 rows=1 width=4) (actual time=944.666..944.667
rows=1 loops=1)
   Buffers: shared hit=176711 read=11071
   -  Index Scan using row_id_desc_idx on table  (cost=0.43..342101.98
rows=278731 width=4) (actual time=944.663..944.663 rows=1 loops=1)
 Filter: (code = 'XX'::bpchar)
 Rows Removed by Filter: 4050971
 Buffers: shared hit=176711 read=11071
 Total runtime: 944.699 ms
(7 rows)

http://explain.depesz.com/s/JStM

In fact, disabling the index scan completely improves matters considerably:

dev= drop index row_id_desc_idx;
DROP INDEX
dev= set enable_indexscan to off;
SET

dev= explain (analyse,buffers)  select row_id as last_row_id from table
where code='XX' order by row_id desc limit 1;

 QUERY PLAN


 Limit  (cost=74006.39..74006.39 rows=1 width=4) (actual
time=183.997..183.998 rows=1 loops=1)
   Buffers: shared hit=14723
   -  Sort  (cost=74006.39..74703.22 rows=278731 width=4) (actual
time=183.995..183.995 rows=1 loops=1)
 Sort Key: row_id
 Sort Method: top-N heapsort  Memory: 25kB
 Buffers: shared hit=14723
 -  Bitmap Heap Scan on table  (cost=5276.60..72612.74 rows=278731
width=4) (actual time=25.533..119.320 rows=275909 loops=1)
   Recheck Cond: (code = 'XX'::bpchar)
   Buffers: shared hit=14723
   -  Bitmap Index Scan on table_code_idx  (cost=0.00..5206.91

Re: [PERFORM] Index Scan Backward Slow

2015-05-01 Thread David Osborne
Simple... that did it... thanks!

dev= create index on table(code,row_id);
CREATE INDEX
Time: 38088.482 ms
dev= explain (analyse,buffers)  select row_id as last_row_id from table
where code='XX' order by row_id desc limit 1;

   QUERY PLAN


 Limit  (cost=0.43..0.46 rows=1 width=4) (actual time=0.070..0.071 rows=1
loops=1)
   Buffers: shared hit=2 read=3
   -  Index Only Scan Backward using table_code_row_id_idx on table
 (cost=0.43..7999.28 rows=278743 width=4) (actual time=0.067..0.067 rows=1
loops=1)
 Index Cond: (code = 'XX'::bpchar)
 Heap Fetches: 1
 Buffers: shared hit=2 read=3
 Total runtime: 0.097 ms
(7 rows)


On 1 May 2015 at 11:59, Evgeniy Shishkin itparan...@gmail.com wrote:


  On 01 May 2015, at 13:54, David Osborne da...@qcode.co.uk wrote:
 
  Hi,
 
  We have a query which finds the latest row_id for a particular code.
 
  We've found a backwards index scan is much slower than a forward one, to
 the extent that disabling indexscan altogether actually improves the query
 time.
 
  Can anyone suggest why this might be, and what's best to do to improve
 the query time?
 
 
 
  dev= \d table
 Table public.table
  Column|  Type  | Modifiers
  --++---
   row_id   | integer|
   code | character(2)   |
  Indexes:
  table_code_idx btree (code)
  table_row_idx btree (row_id)
 
  dev= select count(*) from table;
count
  -
   6090254
  (1 row)
 
  dev= select count(distinct(row_id)) from table;
count
  -
   5421022
  (1 row)
 
  dev= select n_distinct from pg_stats where tablename='table' and
 attname='row_id';
   n_distinct
  
-0.762951
  (1 row)
 
  dev= show work_mem;
   work_mem
  ---
   1249105kB
  (1 row)
 
  dev= select version();
 version
 
 --
   PostgreSQL 9.3.6 on x86_64-unknown-linux-gnu, compiled by gcc (GCC)
 4.6.3 20120306 (Red Hat 4.6.3-2), 64-bit
  (1 row)
 
 
  The query in question:
 
  dev= explain (analyse,buffers)  select row_id as last_row_id from table
 where code='XX' order by row_id desc limit 1;
 
  QUERY PLAN
 
 --
   Limit  (cost=0.43..1.67 rows=1 width=4) (actual time=835.281..835.282
 rows=1 loops=1)
 Buffers: shared hit=187961
 -  Index Scan Backward using table_row_idx on table
 (cost=0.43..343741.98 rows=278731 width=4) (actual time=835.278..835.278
 rows=1 loops=1)
   Filter: (code = 'XX'::bpchar)
   Rows Removed by Filter: 4050971
   Buffers: shared hit=187961
   Total runtime: 835.315 ms
  (7 rows)
 
  http://explain.depesz.com/s/uGC
 
 
  So we can see it's doing a backwards index scan. Out of curiosity I
 tried a forward scan and it was MUCH quicker:
 
  dev= explain (analyse,buffers)  select row_id as first_row_id from
 table where code='XX' order by row_id asc limit 1;
 
  QUERY PLAN
 
 ---
   Limit  (cost=0.43..1.67 rows=1 width=4) (actual time=19.473..19.474
 rows=1 loops=1)
 Buffers: shared hit=26730
 -  Index Scan using table_row_idx on table  (cost=0.43..343741.98
 rows=278731 width=4) (actual time=19.470..19.470 rows=1 loops=1)
   Filter: (code = 'XX'::bpchar)
   Rows Removed by Filter: 62786
   Buffers: shared hit=26730
   Total runtime: 19.509 ms
  (7 rows)
 
  http://explain.depesz.com/s/ASxD
 
 
  I thought adding a index on row_id desc might be the answer but it has
 little effect:
 
  dev= create index row_id_desc_idx on table(row_id desc);
  CREATE INDEX
  Time: 5293.812 ms
 
  dev= explain (analyse,buffers)  select row_id as last_row_id from table
 where code='XX' order by row_id desc limit 1;
 
 QUERY PLAN
 
 
   Limit  (cost=0.43..1.66 rows=1 width=4) (actual time=944.666..944.667
 rows=1 loops=1)
 Buffers: shared hit=176711 read=11071
 -  Index Scan using row_id_desc_idx on table  (cost=0.43..342101.98
 rows=278731 width=4) (actual time=944.663..944.663 rows=1 loops=1)
   Filter: (code = 'XX'::bpchar)
   Rows Removed by Filter: 4050971
   Buffers: shared hit=176711 read=11071
   Total runtime: 944.699 ms
  (7 rows)
 
  

Re: [PERFORM] Index Scan Backward Slow

2015-05-01 Thread Evgeniy Shishkin

 On 01 May 2015, at 13:54, David Osborne da...@qcode.co.uk wrote:
 
 Hi,
 
 We have a query which finds the latest row_id for a particular code.
 
 We've found a backwards index scan is much slower than a forward one, to the 
 extent that disabling indexscan altogether actually improves the query time.
 
 Can anyone suggest why this might be, and what's best to do to improve the 
 query time?
 
 
 
 dev= \d table
Table public.table
 Column|  Type  | Modifiers 
 --++---
  row_id   | integer| 
  code | character(2)   | 
 Indexes:
 table_code_idx btree (code)
 table_row_idx btree (row_id)
 
 dev= select count(*) from table;
   count  
 -
  6090254
 (1 row)
 
 dev= select count(distinct(row_id)) from table;
   count  
 -
  5421022
 (1 row)
 
 dev= select n_distinct from pg_stats where tablename='table' and 
 attname='row_id';
  n_distinct 
 
   -0.762951
 (1 row)
 
 dev= show work_mem;
  work_mem  
 ---
  1249105kB
 (1 row)
 
 dev= select version();
version
 
 --
  PostgreSQL 9.3.6 on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.6.3 
 20120306 (Red Hat 4.6.3-2), 64-bit
 (1 row)
 
 
 The query in question:
 
 dev= explain (analyse,buffers)  select row_id as last_row_id from table 
 where code='XX' order by row_id desc limit 1;
   
 QUERY PLAN
   
 --
  Limit  (cost=0.43..1.67 rows=1 width=4) (actual time=835.281..835.282 rows=1 
 loops=1)
Buffers: shared hit=187961
-  Index Scan Backward using table_row_idx on table  
 (cost=0.43..343741.98 rows=278731 width=4) (actual time=835.278..835.278 
 rows=1 loops=1)
  Filter: (code = 'XX'::bpchar)
  Rows Removed by Filter: 4050971
  Buffers: shared hit=187961
  Total runtime: 835.315 ms
 (7 rows)
 
 http://explain.depesz.com/s/uGC
 
 
 So we can see it's doing a backwards index scan. Out of curiosity I tried a 
 forward scan and it was MUCH quicker:
 
 dev= explain (analyse,buffers)  select row_id as first_row_id from table 
 where code='XX' order by row_id asc limit 1;
 QUERY 
 PLAN 
 ---
  Limit  (cost=0.43..1.67 rows=1 width=4) (actual time=19.473..19.474 rows=1 
 loops=1)
Buffers: shared hit=26730
-  Index Scan using table_row_idx on table  (cost=0.43..343741.98 
 rows=278731 width=4) (actual time=19.470..19.470 rows=1 loops=1)
  Filter: (code = 'XX'::bpchar)
  Rows Removed by Filter: 62786
  Buffers: shared hit=26730
  Total runtime: 19.509 ms
 (7 rows)
 
 http://explain.depesz.com/s/ASxD
 
 
 I thought adding a index on row_id desc might be the answer but it has little 
 effect:
 
 dev= create index row_id_desc_idx on table(row_id desc);
 CREATE INDEX
 Time: 5293.812 ms
 
 dev= explain (analyse,buffers)  select row_id as last_row_id from table 
 where code='XX' order by row_id desc limit 1;
  
 QUERY PLAN
  
 
  Limit  (cost=0.43..1.66 rows=1 width=4) (actual time=944.666..944.667 rows=1 
 loops=1)
Buffers: shared hit=176711 read=11071
-  Index Scan using row_id_desc_idx on table  (cost=0.43..342101.98 
 rows=278731 width=4) (actual time=944.663..944.663 rows=1 loops=1)
  Filter: (code = 'XX'::bpchar)
  Rows Removed by Filter: 4050971
  Buffers: shared hit=176711 read=11071
  Total runtime: 944.699 ms
 (7 rows)
 
 http://explain.depesz.com/s/JStM
 
 In fact, disabling the index scan completely improves matters considerably:
 
 dev= drop index row_id_desc_idx;
 DROP INDEX
 dev= set enable_indexscan to off;
 SET   
 
 dev= explain (analyse,buffers)  select row_id as last_row_id from table 
 where code='XX' order by row_id desc limit 1;
QUERY 
 PLAN