HOla, Hellmuth / lista

Lo que pasa es que lo que esta en el include, no se indexa.

Es la forma de no salir del indice para buscar ese valor en particular, ahorrando el pase por la tabla

Salu2

On 2020-06-23 12:03, Hellmuth Vargas wrote:
*Hola Lista*
*
*
*Ya que en las últimas versiones de PostgreSQL contamos con  muchas más herramientas para la optimización de las consultas (estadísticas, paralelismo, JIT, etc, etc,etc ) pues hoy me dio por probar la cláusula INCLUDE en los índices, con el fin de indagar por su correcto uso y/o beneficios, hice este pequeño laboratorio:*

drop table test

create table  test(mes text,cantidad int);

insert into  test(mes,cantidad)
select b.mes, trunc(1000000*random())
FROM generate_series(1,300000) as a
cross join (values('ENE'),('FEB'),('MAR'),('ABR'),('MAY'),('JUN'),('JUL'),('AGO'),('SEP'),('OCT'),('NOV'),('DIC')) as b(mes);

  analyze test;

*-- sin indices*
explain (ANALYZE,BUFFERS,TIMING)  select sum(cantidad)
from test where mes='JUN';

---
Finalize Aggregate  (cost=23742.83..23742.83 rows=1 width=8) (actual time=174.405..174.406 rows=1 loops=1)
  Buffers: shared read=15930
  ->  Gather  (cost=23742.62..23742.83 rows=2 width=8) (actual time=174.299..178.505 rows=3 loops=1)
        Workers Planned: 2
        Workers Launched: 2
        Buffers: shared read=15930
        ->  Partial Aggregate  (cost=22742.62..22742.63 rows=1 width=8) (actual time=168.675..168.676 rows=1 loops=3)
              Buffers: shared read=15930
              ->  Parallel Seq Scan on test  (cost=0.00..22680.00 rows=125250 width=4) (actual time=0.044..160.487 rows=100000 loops=3)
                    Filter: (mes = 'JUN'::text)
                    Rows Removed by Filter: 1100000
                    Buffers: shared read=15930
Planning Time: 0.149 ms
Execution Time: 178.545 ms


*-- indice solo por mes*
create index concurrently idx_test_mes on test(mes);

explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN';


Aggregate  (cost=18830.72..18830.73 rows=1 width=8) (actual time=238.116..238.116 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1385.12..18679.16 rows=303120 width=4) (actual time=28.007..206.884 rows=300000 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes  (cost=0.00..1369.97 rows=303120 width=0) (actual time=25.312..25.312 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.321 ms
Execution Time: 238.150 ms


*-- indice por mes incluye cantidad*
create index concurrently idx_test_mes2 on test(mes) include (cantidad);


explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN';

Aggregate  (cost=18830.72..18830.73 rows=1 width=8) (actual time=227.678..227.678 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1385.12..18679.16 rows=303120 width=4) (actual time=26.342..198.513 rows=300000 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes2  (cost=0.00..1369.97 rows=303120 width=0) (actual time=23.680..23.680 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.540 ms
Execution Time: 227.710 ms

*-- consulta filtrando mes y cantidad*
explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN' and cantidad between 1000 and 40000

Aggregate  (cost=18973.69..18973.69 rows=1 width=8) (actual time=352.304..352.304 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1370.56..18967.72 rows=11932 width=4) (actual time=29.374..350.099 rows=11687 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Filter: ((cantidad >= 1000) AND (cantidad <= 40000))
        Rows Removed by Filter: 288313
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes2  (cost=0.00..1369.97 rows=303120 width=0) (actual time=26.412..26.412 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.684 ms
Execution Time: 352.349 ms

*-- indice compuesto por mes y cantidad*
create index concurrently idx_test_mes3 on test(mes,cantidad);

explain (ANALYZE,BUFFERS,TIMING)  select sum(cantidad)
from test where mes='JUN';


Aggregate  (cost=18830.72..18830.73 rows=1 width=8) (actual time=234.626..234.627 rows=1 loops=1)
  Buffers: shared read=16752
  ->  Bitmap Heap Scan on test  (cost=1385.12..18679.16 rows=303120 width=4) (actual time=31.220..203.525 rows=300000 loops=1)
        Recheck Cond: (mes = 'JUN'::text)
        Heap Blocks: exact=15930
        Buffers: shared read=16752
        ->  Bitmap Index Scan on idx_test_mes3  (cost=0.00..1369.97 rows=303120 width=0) (actual time=28.165..28.165 rows=300000 loops=1)
              Index Cond: (mes = 'JUN'::text)
              Buffers: shared read=822
Planning Time: 0.191 ms
Execution Time: 234.656 ms


*  -- consulta filtrando mes y cantidad *
explain (ANALYZE,BUFFERS,TIMING) select sum(cantidad)
from test where mes='JUN' and cantidad between 1000 and 40000

Aggregate  (cost=9046.67..9046.67 rows=1 width=8) (actual time=174.658..174.659 rows=1 loops=1)
  Buffers: shared read=8403
  ->  Bitmap Heap Scan on test  (cost=66.81..9040.70 rows=11932 width=4) (actual time=6.184..171.275 rows=11687 loops=1)         Recheck Cond: ((mes = 'JUN'::text) AND (cantidad >= 1000) AND (cantidad <= 40000))
        Heap Blocks: exact=8368
        Buffers: shared read=8403
        ->  Bitmap Index Scan on idx_test_mes3  (cost=0.00..66.22 rows=11932 width=0) (actual time=4.535..4.535 rows=11687 loops=1)               Index Cond: ((mes = 'JUN'::text) AND (cantidad >= 1000) AND (cantidad <= 40000))
              Buffers: shared read=35
Planning Time: 0.732 ms
Execution Time: 174.706 ms



*-- tamaños de la tabla e indices creados*
SELECT
 nspname,relname,pg_relation_size(c.oid),pg_size_pretty(pg_relation_size(c.oid)) as "size"
from pg_class c left join pg_namespace n on ( n.oid=c.relnamespace)
where nspname not in ('pg_catalog','information_schema') and relname ilike '%test%'
order by pg_relation_size(c.oid) desc;

nspname;relname; pg_relation_size;size
public;test;130498560;124 MB
public;idx_test_mes;80887808;77 MB
public;idx_test_mes2;80887808;77 MB
public;idx_test_mes3;80887808;77 MB

*La pregunta es, en qué situaciones se debe emplear o se justifica un  include versus, por ejemplo,  un índice compuesto? de antemano gracias*

--
Cordialmente,

Ing. Hellmuth I. Vargas S.

Reply via email to