Hi,

I have a problem on 9.3.14 with a query that accesses table:

Size: (retrieved by query 
https://gist.github.com/romank0/74f9d1d807bd3f41c0729d0fc6126749)

 schemaname |    relname    |  size  | toast  | associated_idx_size |  total
------------+---------------+--------+--------+---------------------+---------
 public     | document_head | 275 MB | 630 MB | 439 MB              | 1345 MB


Definition:
                                 Table "public.document_head"
           Column            |           Type           |              Modifiers
-----------------------------+--------------------------+-------------------------------------
 snapshot_id                 | character varying(36)    | not null
 id                          | character varying(36)    | not null
 base_type                   | character varying(50)    | not null
 is_cascade                  | boolean                  | not null default false
 parent_id                   | character varying(36)    |
 fileplan_node_id            | character varying(36)    |
 state                       | character varying(10)    | default 
'ACTIVE'::character varying
 title                       | character varying(4096)  | not null
 properties                  | text                     | not null
 properties_cache            | hstore                   | not null
 serial_number               | integer                  | not null
 major_version               | integer                  | not null
 minor_version               | integer                  | not null
 version_description         | text                     |
 sensitivity                 | integer                  | not null default 10
 id_path                     | ltree                    |
 path_name                   | character varying(4096)  | collate C not null
 ltx_id                      | bigint                   | not null
 created_by                  | integer                  | not null
 created_date                | timestamp with time zone | not null
 modified_by                 | integer                  | not null
 modified_date               | timestamp with time zone | not null
 responsible_user_ids        | integer[]                |
 origin_id                   | character varying(36)    |
 origin_snapshot_id          | character varying(36)    |
 ssn                         | character varying(64)    |
 record_physical_location    | text                     |
 record_physical_location_id | text                     |
 record_created_date         | timestamp with time zone |
 record_aggregated_date      | timestamp with time zone |
 record_last_review_comment  | text                     |
 record_last_review_date     | timestamp with time zone |
 record_next_review_date     | timestamp with time zone |
 record_originated_date      | timestamp with time zone |
 record_is_vital             | boolean                  | not null default false
 storage_plan_state          | text                     | not null default 
'New'::text
 cut_off_date                | timestamp with time zone |
 dispose_date                | timestamp with time zone |
 archive_date                | timestamp with time zone |
Indexes:
    "document_head__id__uniq_key" PRIMARY KEY, btree (id)
    "document_head__parent_id__path_name__unq_idx" UNIQUE, btree (parent_id, 
path_name) WHERE state::text = 'ACTIVE'::text
    "document_head__snapshot_id__unq" UNIQUE, btree (snapshot_id)
    "document_head__base_type__idx" btree (base_type) WHERE state::text <> 
'DELETED'::text
    "document_head__fileplan_node_id__idx" btree (fileplan_node_id)
    "document_head__id__idx" btree (id) WHERE state::text <> 'DELETED'::text
    "document_head__id_path__btree__idx" btree (id_path) WHERE state::text <> 
'DELETED'::text
    "document_head__id_path__gist__idx" gist (id_path)
    "document_head__ltx_id__idx" btree (ltx_id)
    "document_head__origin_id__hotfix__idx" btree (origin_id) WHERE origin_id 
IS NOT NULL
    "document_head__origin_id__idx" btree (origin_id) WHERE state::text <> 
'DELETED'::text AND origin_id IS NOT NULL
    "document_head__parent_id__idx" btree (parent_id)
    "document_head__properties_cache__contact_username_idx" btree 
((properties_cache -> 'person_meta_info.username'::text)) WHERE base_type::text 
= 'Contact'::text AND exist(properties_cache, 'person_meta_info.username'::text)
    "document_head__properties_cache__emailmeta_message_id__idx" btree 
((properties_cache -> 'emailmeta.message_id'::text)) WHERE base_type::text = 
'File'::text AND exist(properties_cache, 'emailmeta.message_id'::text)
    "document_head__properties_cache__idx" gist (properties_cache) WHERE 
state::text <> 'DELETED'::text
    "document_head__properties_cache__project_identifier__idx" btree 
((properties_cache -> 'project.identifier'::text)) WHERE base_type::text = 
'Project'::text AND exist(properties_cache, 'project.identifier'::text)
    "document_head__properties_cache__published_origin__idx" btree 
((properties_cache -> 'file_published_origin_id.origin_id'::text)) WHERE 
base_type::text = 'File'::text AND exist(properties_cache, 
'file_published_origin_id.origin_id'::text)
    "document_head__state__idx" btree (state)
    "document_head__storage_plan_state__idx" btree (storage_plan_state) WHERE 
state::text <> 'DELETED'::text
Check constraints:
    "document_base_storage_plan_state_check" CHECK (storage_plan_state = ANY 
(ARRAY['NEW'::text, 'READY_FOR_CUTOFF'::text, 'CUTOFF'::text, 
'READY_FOR_DISPOSITION'::text, 'DISPOSED'::text]))
    "document_head__sensitivity_check" CHECK (sensitivity = ANY (ARRAY[10, 20, 
30]))
Foreign-key constraints:
    "document_head__created_by__fk" FOREIGN KEY (created_by) REFERENCES 
auth_user(id)
    "document_head__modified_by__fk" FOREIGN KEY (modified_by) REFERENCES 
auth_user(id)
    "document_head__parent_id__fk" FOREIGN KEY (parent_id) REFERENCES 
document(id)

Some notes:
1. properties stores json that for some records may be as large as 300k

select count(*) from document_head  where length(properties) > 100000;
 count
-------
   535
(1 row)

select count(*) from document_head  where length(properties) > 20000;
 count
-------
 13917
(1 row)

select count(*) from document_head  where length(properties) > 1000;
 count
-------
 51708
(1 row)

select count(*) from document_head  where length(properties) > 300000;
 count
-------
     3
(1 row)

select max(length(properties)) from document_head;
  max
--------
 334976
(1 row)

2. properties_cache stores parsed properties: key is jsonpath of a key in json 
and value is a value.
3. all results here are retrieved after running `analyze document_head` and 
`vacuum document_head` manually.
4. I tried different work_mem settings up to 100MB and there's no effect on the 
main issue described below.
5. I haven't tested disks speed as first of all I think it is irrelevant to the 
problem and it is not easy to do as this is production system.

The function that is used in the query:

CREATE OR REPLACE FUNCTION public.get_doc_path(document_id character varying)
 RETURNS ltree
 LANGUAGE plpgsql
 STABLE
AS $function$
DECLARE
    path ltree;
BEGIN
    select id_path into path from document_head where id = document_id;
    RETURN path;
END $function$


The original query is rather big one and the simplified version where the issue 
can still be demonstrated is:

explain (analyze, buffers)
with trees AS (
SELECT d.id, d.snapshot_id , NULL :: text[] AS permissions
  FROM document_head AS d
  WHERE (d.id_path <@ get_doc_path('78157c60-45bc-42c1-9aad-c5651995db5c') 
        AND d.id != '78157c60-45bc-42c1-9aad-c5651995db5c') AND d.state != 
'DELETED'
)
SELECT COUNT(*) FROM trees;

I get this plan https://explain.depesz.com/s/UQX4h
                                                                                
                    QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=86227.19..86227.20 rows=1 width=0) (actual 
time=3878.775..3878.776 rows=1 loops=1)
   Buffers: shared hit=747698, temp written=1587
   CTE trees
     ->  Seq Scan on document_head d  (cost=0.00..82718.21 rows=155955 
width=74) (actual time=0.211..3620.044 rows=154840 loops=1)
           Filter: (((id)::text <> 
'78157c60-45bc-42c1-9aad-c5651995db5c'::text) AND ((state)::text <> 
'DELETED'::text) AND (id_path <@ 
get_doc_path('78157c60-45bc-42c1-9aad-c5651995db5c'::character varying)))
           Rows Removed by Filter: 23357
           Buffers: shared hit=747698
   ->  CTE Scan on trees  (cost=0.00..3119.10 rows=155955 width=0) (actual 
time=0.215..3828.519 rows=154840 loops=1)
         Buffers: shared hit=747698, temp written=1587
 Total runtime: 3881.781 ms
(10 rows)

If I change the predicate for ltree to use subquery the plan and execution time 
changes:

explain (analyze, buffers)
with trees AS (
SELECT d.id, d.snapshot_id , NULL :: text[] AS permissions
  FROM document_head AS d
  WHERE (d.id_path <@ (select 
get_doc_path('78157c60-45bc-42c1-9aad-c5651995db5c'))
        AND d.id != '78157c60-45bc-42c1-9aad-c5651995db5c') AND d.state != 
'DELETED'
)
SELECT COUNT(*) FROM trees;


https://explain.depesz.com/s/eUR
                                                                         QUERY 
PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=763.19..763.20 rows=1 width=0) (actual time=430.519..430.519 
rows=1 loops=1)
   Buffers: shared hit=47768, temp written=1587
   CTE trees
     ->  Bitmap Heap Scan on document_head d  (cost=82.05..759.20 rows=177 
width=74) (actual time=70.703..249.419 rows=154840 loops=1)
           Recheck Cond: (id_path <@ $0)
           Rows Removed by Index Recheck: 11698
           Filter: (((id)::text <> 
'78157c60-45bc-42c1-9aad-c5651995db5c'::text) AND ((state)::text <> 
'DELETED'::text))
           Rows Removed by Filter: 23
           Buffers: shared hit=47768
           InitPlan 1 (returns $0)
             ->  Result  (cost=0.00..0.26 rows=1 width=0) (actual 
time=0.083..0.084 rows=1 loops=1)
                   Buffers: shared hit=4
           ->  Bitmap Index Scan on document_head__id_path__gist__idx  
(cost=0.00..81.74 rows=178 width=0) (actual time=68.326..68.326 rows=159979 
loops=1)
                 Index Cond: (id_path <@ $0)
                 Buffers: shared hit=16238
   ->  CTE Scan on trees  (cost=0.00..3.54 rows=177 width=0) (actual 
time=70.707..388.714 rows=154840 loops=1)
         Buffers: shared hit=47768, temp written=1587
 Total runtime: 433.410 ms
(18 rows)

I can see that:
1. both queries return exactly the same data and require the same underlying 
data to produce the result.
2. when I use subquery postgres cannot estimate the number of records that are 
going to be found by <@. 
3. number of buffers processed by the quicker query is 15 times smaller than on 
the slow one. I assume that this is the reason the query is slower. 

My understanding how the planner works is that it evaluates some plans and 
calculates cost based on the settings and and available statistics. 
Settings define relative cost of operations like random vs sequential io vs 
processing in memory data.
I assume that it is better to use queries that allows planner to use correct 
estimations and tune settings so that planner knows correct operations cost.

My questions:
1. how to find out why slow execution requires 15x more buffers.
2. In the slow executin planner either does not consider the plan similar to 
the quick on or estimates it as worse. How to find out which of thes cases (if 
any) is true? And what I can try to make planner use better plan?

I understand that one option to try is to upgrade to more recent version and 
I'm going to test this but this may take a while until the affected production 
system will get an upgrade.


More information:

OS: Ubuntu 14.04.2 LTS (GNU/Linux 3.13.0-125-generic x86_64)
PG version: PostgreSQL 9.3.14 on x86_64-unknown-linux-gnu, compiled by gcc 
(GCC) 4.8.2 20140120 (Red Hat 4.8.2-16), 64-bit
History: no history as this is a new query.
Hardware: aws m4.large (2 vCPUs, RAM 8GB) with gp2 (SSD) storage with 
throughtput up to ~20MB/s. 


Non default server settings:

             name              |                                                
                                                                                
                                current_setting                                 
                                                                                
                                               |       source
--------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------
 application_name               | psql                                          
                                                                                
                                                                                
                                                                                
                                                | client
 archive_command                | /etc/rds/dbbin/pgscripts/rds_wal_archive %p   
                                                                                
                                                                                
                                                                                
                                                | configuration file
 archive_mode                   | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 archive_timeout                | 5min                                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 checkpoint_completion_target   | 0.9                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 checkpoint_segments            | 16                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 client_encoding                | UTF8                                          
                                                                                
                                                                                
                                                                                
                                                | client
 effective_cache_size           | 3987336kB                                     
                                                                                
                                                                                
                                                                                
                                                | configuration file
 fsync                          | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 hot_standby                    | off                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 listen_addresses               | *                                             
                                                                                
                                                                                
                                                                                
                                                | command line
 lo_compat_privileges           | off                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_checkpoints                | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_destination                | stderr                                        
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_directory                  | /rdsdbdata/log/error                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_file_mode                  | 0644                                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_filename                   | postgresql.log.%Y-%m-%d-%H                    
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_hostname                   | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_line_prefix                | %t:%r:%u@%d:[%p]:                             
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_min_duration_statement     | 1s                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_rotation_age               | 1h                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_timezone                   | UTC                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 log_truncate_on_rotation       | off                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 logging_collector              | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 maintenance_work_mem           | 127MB                                         
                                                                                
                                                                                
                                                                                
                                                | configuration file
 max_connections                | 1000                                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 max_locks_per_transaction      | 64                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 max_prepared_transactions      | 0                                             
                                                                                
                                                                                
                                                                                
                                                | configuration file
 max_stack_depth                | 6MB                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 max_wal_senders                | 5                                             
                                                                                
                                                                                
                                                                                
                                                | configuration file
 port                           | 5432                                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 rds.extensions                 | 
btree_gin,btree_gist,chkpass,citext,cube,dblink,dict_int,dict_xsyn,earthdistance,fuzzystrmatch,hstore,intagg,intarray,isn,ltree,pgcrypto,pgrowlocks,pg_stat_statements,pg_trgm,plcoffee,plls,plperl,plpgsql,pltcl,plv8,postgis,postgis_tiger_geocoder,postgis_topology,postgres_fdw,sslinfo,tablefunc,test_parser,tsearch2,unaccent,uuid-ossp
 | configuration file
 rds.internal_databases         | rdsadmin,template0                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 rds.superuser_variables        | session_replication_role                      
                                                                                
                                                                                
                                                                                
                                                | configuration file
 shared_buffers                 | 1993664kB                                     
                                                                                
                                                                                
                                                                                
                                                | configuration file
 shared_preload_libraries       | rdsutils                                      
                                                                                
                                                                                
                                                                                
                                                | configuration file
 ssl                            | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 ssl_ca_file                    | /rdsdbdata/rds-metadata/ca-cert.pem           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 ssl_cert_file                  | /rdsdbdata/rds-metadata/server-cert.pem       
                                                                                
                                                                                
                                                                                
                                                | configuration file
 ssl_key_file                   | /rdsdbdata/rds-metadata/server-key.pem        
                                                                                
                                                                                
                                                                                
                                                | configuration file
 ssl_renegotiation_limit        | 0                                             
                                                                                
                                                                                
                                                                                
                                                | configuration file
 stats_temp_directory           | /rdsdbdata/db/pg_stat_tmp                     
                                                                                
                                                                                
                                                                                
                                                | configuration file
 superuser_reserved_connections | 3                                             
                                                                                
                                                                                
                                                                                
                                                | configuration file
 synchronous_commit             | on                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 TimeZone                       | UTC                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 unix_socket_directories        | /tmp                                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 unix_socket_group              | rdsdb                                         
                                                                                
                                                                                
                                                                                
                                                | configuration file
 unix_socket_permissions        | 0700                                          
                                                                                
                                                                                
                                                                                
                                                | configuration file
 wal_keep_segments              | 32                                            
                                                                                
                                                                                
                                                                                
                                                | configuration file
 wal_level                      | hot_standby                                   
                                                                                
                                                                                
                                                                                
                                                | configuration file
 wal_receiver_timeout           | 30s                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
 wal_sender_timeout             | 30s                                           
                                                                                
                                                                                
                                                                                
                                                | configuration file
(52 rows)

Reply via email to