*Hola Lista*

*Ya que en las últimas versiones de PostgreSQL contamos con
muchas más herramientas para la optimización de las consultas
(estadísticas, paralelismo, JIT, etc, etc,etc ) pues hoy me dio por
probar la cláusula INCLUDE en los índices, con el fin de indagar por su
correcto uso y/o beneficios, hice este pequeño laboratorio:*

drop table test

create table  test(mes text,cantidad int);

insert into  test(mes,cantidad)
select b.mes, trunc(1000000*random())
FROM generate_series(1,300000) as a
cross join
(values('ENE'),('FEB'),('MAR'),('ABR'),('MAY'),('JUN'),('JUL'),('AGO'),('SEP'),('OCT'),('NOV'),('DIC'))
as b(mes);

  analyze test;

*-- sin indices*
explain (ANALYZE,BUFFERS,TIMING)  select sum(cantidad)
from test where mes='JUN';

---
Finalize Aggregate  (cost=23742.83..23742.83 rows=1 width=8) (actual
time=174.405..174.406 rows=1 loops=1)
  Buffers: shared read=15930
  ->  Gather  (cost=23742.62..23742.83 rows=2 width=8) (actual
time=174.299..178.505 rows=3 loops=1)
        Workers Planned: 2
        Workers Launched: 2
        Buffers: shared read=15930
        ->  Partial Aggregate  (cost=22742.62..22742.63 rows=1 width=8)
(actual time=168.675..168.676 rows=1 loops=3)
              Buffers: shared read=15930
              ->  Parallel Seq Scan on test  (cost=0.00..22680.00
rows=125250 width=4) (actual time=0.044..160.487 rows=100000 loops=3)
                    Filter: (mes = 'JUN'::text)
                    Rows Removed by Filter: 1100000
                    Buffers: shared read=15930
Planning Time: 0.149 ms
Execution Time: 178.545 ms


*-- indice solo por mes*
create index concurrently idx_test_mes on test(mes);

explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN';


Aggregate  (cost=18830.72..18830.73 rows=1 width=8) (actual
time=238.116..238.116 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1385.12..18679.16 rows=303120
width=4) (actual time=28.007..206.884 rows=300000 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes  (cost=0.00..1369.97
rows=303120 width=0) (actual time=25.312..25.312 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.321 ms
Execution Time: 238.150 ms


*-- indice por mes incluye cantidad*
create index concurrently idx_test_mes2 on test(mes) include (cantidad);


explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN';

Aggregate  (cost=18830.72..18830.73 rows=1 width=8) (actual
time=227.678..227.678 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1385.12..18679.16 rows=303120
width=4) (actual time=26.342..198.513 rows=300000 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes2  (cost=0.00..1369.97
rows=303120 width=0) (actual time=23.680..23.680 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.540 ms
Execution Time: 227.710 ms

*-- consulta filtrando mes y cantidad*
explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN' and cantidad between 1000 and 40000

Aggregate  (cost=18973.69..18973.69 rows=1 width=8) (actual
time=352.304..352.304 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1370.56..18967.72 rows=11932 width=4)
(actual time=29.374..350.099 rows=11687 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Filter: ((cantidad >= 1000) AND (cantidad <= 40000))
        Rows Removed by Filter: 288313
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes2  (cost=0.00..1369.97
rows=303120 width=0) (actual time=26.412..26.412 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.684 ms
Execution Time: 352.349 ms

*-- indice compuesto por mes y cantidad*
create index concurrently idx_test_mes3 on test(mes,cantidad);

explain (ANALYZE,BUFFERS,TIMING)  select sum(cantidad)
from test where mes='JUN';


Aggregate  (cost=18830.72..18830.73 rows=1 width=8) (actual
time=234.626..234.627 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1385.12..18679.16 rows=303120
width=4) (actual time=31.220..203.525 rows=300000 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes3  (cost=0.00..1369.97
rows=303120 width=0) (actual time=28.165..28.165 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.191 ms
Execution Time: 234.656 ms


*  -- consulta filtrando mes y cantidad *
explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN' and cantidad between 1000 and 40000

Aggregate  (cost=9046.67..9046.67 rows=1 width=8) (actual
time=174.658..174.659 rows=1 loops=1)
  Buffers: shared read=8403
  ->  Bitmap Heap Scan on test  (cost=66.81..9040.70 rows=11932 width=4)
(actual time=6.184..171.275 rows=11687 loops=1)
        Recheck Cond: ((mes = 'JUN'::text) AND (cantidad >= 1000) AND
(cantidad <= 40000))
        Heap Blocks: exact=8368
        Buffers: shared read=8403
        ->  Bitmap Index Scan on idx_test_mes3  (cost=0.00..66.22
rows=11932 width=0) (actual time=4.535..4.535 rows=11687 loops=1)
              Index Cond: ((mes = 'JUN'::text) AND (cantidad >= 1000) AND
(cantidad <= 40000))
              Buffers: shared read=35
Planning Time: 0.732 ms
Execution Time: 174.706 ms



*-- tamaños de la tabla e indices creados*
SELECT

 nspname,relname,pg_relation_size(c.oid),pg_size_pretty(pg_relation_size(c.oid))
as "size"
from pg_class c left join pg_namespace n on ( n.oid=c.relnamespace)
where nspname not in ('pg_catalog','information_schema') and relname ilike
'%test%'
order by pg_relation_size(c.oid) desc;

nspname;relname; pg_relation_size;size
public;test;130498560;124 MB
public;idx_test_mes;80887808;77 MB
public;idx_test_mes2;80887808;77 MB
public;idx_test_mes3;80887808;77 MB

*La pregunta es, en qué situaciones se debe emplear o se justifica un
include versus, por ejemplo,  un índice compuesto? de antemano gracias*

-- 
Cordialmente,

Ing. Hellmuth I. Vargas S.

Reply via email to