Dear all,
I have a query on 3 tables in a database as :-

_*Explain Analyze Output :-*_

explain anayze select c.clause, s.subject ,s.object , s.verb, s.subject_type , s.object_type ,s.doc_id ,s.svo_id from clause2 c, svo2 s ,page_content p where c.clause_id=s.clause_id and s.doc_id=c.source_id and c.sentence_id=s.sentence_id and s.doc_id=p.crawled_page_id order by s.svo_id limit 1000 offset 17929000

"Limit (cost=21685592.91..21686802.44 rows=1000 width=2624) (actual time=414601.802..414622.920 rows=1000 loops=1)" " -> Nested Loop (cost=59.77..320659013645.28 rows=265112018116 width=2624) (actual time=0.422..404902.314 rows=17930000 loops=1)" " -> Nested Loop (cost=0.00..313889654.42 rows=109882338 width=2628) (actual time=0.242..174223.789 rows=17736897 loops=1)" " -> Index Scan using pk_svo_id on svo2 s (cost=0.00..33914955.13 rows=26840752 width=2600) (actual time=0.157..14691.039 rows=14238271 loops=1)" " -> Index Scan using idx_clause2_id on clause2 c (cost=0.00..10.36 rows=4 width=44) (actual time=0.007..0.008 rows=1 loops=14238271)" " Index Cond: ((c.source_id = s.doc_id) AND (c.clause_id = s.clause_id) AND (c.sentence_id = s.sentence_id))" " -> Bitmap Heap Scan on page_content p (cost=59.77..2885.18 rows=2413 width=8) (actual time=0.007..0.008 rows=1 loops=17736897)"
"              Recheck Cond: (p.crawled_page_id = s.doc_id)"
" -> Bitmap Index Scan on idx_crawled_id (cost=0.00..59.17 rows=2413 width=0) (actual time=0.005..0.005 rows=1 loops=17736897)"
"                    Index Cond: (p.crawled_page_id = s.doc_id)"
"Total runtime: 414623.634 ms"

_*My Table & index definitions are as under :-

*_Estimated rows in 3 tables are :-

clause2 10341700
svo2 26008000
page_content 479785

CREATE TABLE clause2
(
 id bigint NOT NULL DEFAULT nextval('clause_id_seq'::regclass),
 source_id integer,
 sentence_id integer,
 clause_id integer,
 tense character varying(30),
 clause text,
 CONSTRAINT pk_clause_id PRIMARY KEY (id)
)WITH ( OIDS=FALSE);
CREATE INDEX idx_clause2_id ON clause2 USING btree (source_id, clause_id, sentence_id);

CREATE TABLE svo2
(
 svo_id bigint NOT NULL DEFAULT nextval('svo_svo_id_seq'::regclass),
 doc_id integer,
 sentence_id integer,
 clause_id integer,
 negation integer,
 subject character varying(3000),
 verb character varying(3000),
 "object" character varying(3000),
 preposition character varying(3000),
 subject_type character varying(3000),
 object_type character varying(3000),
 subject_attribute character varying(3000),
 object_attribute character varying(3000),
 verb_attribute character varying(3000),
 subject_concept character varying(100),
 object_concept character varying(100),
 subject_sense character varying(100),
 object_sense character varying(100),
 subject_chain character varying(5000),
 object_chain character varying(5000),
 sub_type_id integer,
 obj_type_id integer,
 CONSTRAINT pk_svo_id PRIMARY KEY (svo_id)
)WITH (  OIDS=FALSE);
CREATE INDEX idx_svo2_id_dummy ON svo2 USING btree (doc_id, clause_id, sentence_id);

CREATE TABLE page_content
(
content_id integer NOT NULL DEFAULT nextval('page_content_ogc_fid_seq'::regclass),
 wkb_geometry geometry,
 link_level integer,
 isprocessable integer,
 isvalid integer,
 isanalyzed integer,
 islocked integer,
 content_language character(10),
 url_id integer,
 publishing_date character(40),
 heading character(150),
 category character(150),
 crawled_page_url character(500),
 keywords character(500),
 dt_stamp timestamp with time zone,
 "content" character varying,
 crawled_page_id bigint,
 CONSTRAINT page_content_pk PRIMARY KEY (content_id),
 CONSTRAINT enforce_dims_wkb_geometry CHECK (st_ndims(wkb_geometry) = 2),
 CONSTRAINT enforce_srid_wkb_geometry CHECK (st_srid(wkb_geometry) = (-1))
)WITH (  OIDS=FALSE);
CREATE INDEX idx_crawled_id ON page_content USING btree (crawled_page_id); CREATE INDEX pgweb_idx ON page_content USING gin (to_tsvector('english'::regconfig, content::text));

If possible, Please let me know if I am something wrong or any alternate query to run it faster.


Thanks

Reply via email to