I have been interested in a query that returns a batch of results filtered
by a subset of the first column of an index and ordered by the second.

I created a simple (hopefully) reproducible example of the issue, the two
queries describe the same data but have very different costs (explain
output included in the attached file).
server_version 12.8

On slack #pgsql-hackers channel, @sfrost, it was suggested that what I
described is achieved by index skip scan. How can I get a development build
to test this feature?

create table ordered_skip_index_scan_test (id serial not null
    constraint ordered_skip_index_scan_test_pkey primary key, col1 INT4, col2 
INT4);

insert into ordered_skip_index_scan_test (col1, col2)
  select trunc(sqrt(random()*1e6)) as col1, (random()*1e6)::int as col2
from generate_series(1, 11e6);

create index ordered_skip_index_col2 on ordered_skip_index_scan_test using 
btree (col2)
create index ordered_skip_index_col1_col2 on ordered_skip_index_scan_test using 
btree (col1, col2)
analyse ordered_skip_index_scan_test;

select count(*) from ordered_skip_index_scan_test;

explain (analyse, buffers)
select * from ordered_skip_index_scan_test
where col2 > 50000
  and col1 in (1, 999)
order by col2
limit 10;
-- Limit  (cost=0.43..263.30 rows=10 width=12) (actual time=0.083..5.668 
rows=10 loops=1)
--   Buffers: shared hit=5362
--   ->  Index Scan using ordered_skip_index_col2 on 
ordered_skip_index_scan_test  (cost=0.43..492524.49 rows=18737 width=12) 
(actual time=0.082..5.664 rows=10 loops=1)
--         Index Cond: (col2 > 50000)
--         Filter: (col1 = ANY ('{1,999}'::integer[]))
--         Rows Removed by Filter: 5343
--         Buffers: shared hit=5362
-- Planning Time: 0.135 ms
-- Execution Time: 5.693 ms


explain (analyse, buffers)
with t as ((select *
       from ordered_skip_index_scan_test
       where col2 between 50000 and 500000
         and col1 = 13
       limit 10)
union (select *
       from ordered_skip_index_scan_test
       -- Not taking advantage of the partial
       -- results to adjust the bounds of the
       -- subsequent queries
       where col2 between 50000 and 500000
         and col1 = 8
       limit 10)
union (select *
       from ordered_skip_index_scan_test
       where col2 between 50000 and 500000
         and col1 = 5
       limit 10)
union (select *
       from ordered_skip_index_scan_test
       where col2 between 50000 and 500000
         and col1 = 845
       limit 10))
select * from t
order by col2
limit 10;
-- Limit  (cost=159.95..159.97 rows=10 width=12) (actual time=0.225..0.230 
rows=10 loops=1)
--   Buffers: shared hit=52
--   ->  Sort  (cost=159.95..160.05 rows=40 width=12) (actual time=0.224..0.227 
rows=10 loops=1)
--         Sort Key: ordered_skip_index_scan_test.col2
--         Sort Method: top-N heapsort  Memory: 25kB
--         Buffers: shared hit=52
--         ->  HashAggregate  (cost=158.28..158.68 rows=40 width=12) (actual 
time=0.167..0.178 rows=40 loops=1)
--               Group Key: ordered_skip_index_scan_test.id, 
ordered_skip_index_scan_test.col1, ordered_skip_index_scan_test.col2
--               Batches: 1  Memory Usage: 24kB
--               Buffers: shared hit=52
--               ->  Append  (cost=0.43..157.98 rows=40 width=12) (actual 
time=0.026..0.144 rows=40 loops=1)
--                     Buffers: shared hit=52
--                     ->  Limit  (cost=0.43..39.35 rows=10 width=12) (actual 
time=0.026..0.044 rows=10 loops=1)
--                           Buffers: shared hit=13
--                           ->  Index Scan using ordered_skip_index_col1_col2 
on ordered_skip_index_scan_test  (cost=0.43..17214.39 rows=4424 width=12) 
(actual time=0.025..0.042 rows=10 loops=1)
--                                 Index Cond: ((col1 = 13) AND (col2 >= 50000) 
AND (col2 <= 500000))
--                                 Buffers: shared hit=13
--                     ->  Limit  (cost=0.43..39.35 rows=10 width=12) (actual 
time=0.011..0.028 rows=10 loops=1)
--                           Buffers: shared hit=13
--                           ->  Index Scan using ordered_skip_index_col1_col2 
on ordered_skip_index_scan_test ordered_skip_index_scan_test_1  
(cost=0.43..17214.39 rows=4424 width=12) (actual time=0.011..0.026 rows=10 
loops=1)
--                                 Index Cond: ((col1 = 8) AND (col2 >= 50000) 
AND (col2 <= 500000))
--                                 Buffers: shared hit=13
--                     ->  Limit  (cost=0.43..39.35 rows=10 width=12) (actual 
time=0.010..0.025 rows=10 loops=1)
--                           Buffers: shared hit=13
--                           ->  Index Scan using ordered_skip_index_col1_col2 
on ordered_skip_index_scan_test ordered_skip_index_scan_test_2  
(cost=0.43..17214.39 rows=4424 width=12) (actual time=0.010..0.023 rows=10 
loops=1)
--                                 Index Cond: ((col1 = 5) AND (col2 >= 50000) 
AND (col2 <= 500000))
--                                 Buffers: shared hit=13
--                     ->  Limit  (cost=0.43..39.35 rows=10 width=12) (actual 
time=0.026..0.040 rows=10 loops=1)
--                           Buffers: shared hit=13
--                           ->  Index Scan using ordered_skip_index_col1_col2 
on ordered_skip_index_scan_test ordered_skip_index_scan_test_3  
(cost=0.43..17214.39 rows=4424 width=12) (actual time=0.026..0.039 rows=10 
loops=1)
--                                 Index Cond: ((col1 = 845) AND (col2 >= 
50000) AND (col2 <= 500000))
--                                 Buffers: shared hit=13
-- Planning Time: 0.358 ms
-- Execution Time: 0.286 ms

Reply via email to