----- Original Message -----
Sent: Thursday, February 05, 2004
10:48 AM
Subject: [SQL] Slow sub-selects,
max and count(*)
I have a procedure ( included below with
table definition) to import end of day quotes into a table
fsechist.
The issue is with the speed ( or lackof it) that
the procedure proceeds. Apparently it is the subselects that are the worst
issue and I have tried to replace these. Also max() and count() refuse to use
indexes. I have replaced max() with (select ... as mx from ... order by ...
desc limit 1) and this is much faster but appear to have to avoid count() as I
can not find a way to point this at an index.
Please, I am sure that there is a better way to
do this. And 5 to 7 minutes to insert a day is really too slow. Each days
import table contains about 3200 records. Total table size for fsechist is
about 2.5 million records.
If you could point me in the right direction here
it would be much appreciated.
thanks
Richard Sydney-Smith
--------------------------------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION
public.import_sharedata(varchar)
RETURNS bool AS
'
declare
filnam alias for
$1;
alldone bool := true;
/* cnt integer := 0;
*/ /* count of number inserted not used */
begin
/*
Imports a text file
containing end of day price quotes
filnam should be in double
backslash format e.g. c:\\data\\sharequotes\\quotes.dat
Ensures that it only brings in quotes for companies we have
listed in our database
and ensures only the latest copy of the
quote is kept for each day
Import table has 7 columns in CSV
tab delimited ascii format.
EzyChart format
has prices in cents,
METASTOCK in
dollars
MetaStock +100 has the volume
divided by 100.
Depending on your import data
source you may need to process the temp_shareprices so that the
values are in Dollars , not cents and the volume is per unit
not per 100 units.
Present calculation is for EzyChart
format.
Both tables have to have a 4
digit year yyyymmdd
Warning : If you have the wrong
format this procedure will import garbage into your database !!!
*/
perform
drop_table(\'temp_shareprices\');
create table
temp_shareprices(
ticker char(10)
null,
dte char(8),
v1
numeric(19,4) not null default 0,
v2 numeric(19,4) not
null default 0,
v3 numeric(19,4) not null default
0,
v4 numeric(19,4) not null default
0,
v5 numeric(19,4) not null default
0
);
execute \'copy temp_shareprices from
\'||quote_literal(filnam)||\' delimiter \'||quote_literal(\',\');
-- extend yr to 4 digits, alter quotes to dollars
update temp_shareprices set dte =
\'20\'||dte,v1=v1/100,v2=v2/100,v3=v3/100,v4=v4/100;
-- add in any new company tickers
insert into fsecurity (sec_cde)
select ticker from temp_shareprices where length(trim(ticker))=3 and ticker
not in ( select sec_cde from fsecurity);
-- ====================
-- remove rows from temp_shareprices where we do not have a corresponding
security
-- using sub-select is slow
-- delete from temp_shareprices where
ticker not in (select sec_cde from fsecurity);
-- can just delete ones where length<>3 as insert above ensures all
length 3 are inserted. All ASX codes are 3char long
delete from
temp_shareprices where length(trim(ticker))<>3;
-- Attempt to remove without limiting to length and not using sub select
does not work
--update temp_shareprices set ticker = s.sec_cde from
temp_shareprices t left outer join fsecurity s on t.ticker =
s.sec_cde;
--delete from temp_shareprices where ticker is null;
-- put indexes on temp table
CREATE INDEX temphist_tick ON
temp_shareprices USING btree (ticker);
CREATE INDEX
temphist_dte ON temp_shareprices USING btree (dte);
-- ====================
-- Scan the input table
and delete as appropriate from fsechist
-- Using sub select to remove any pre-existing records in fsechist very
slow
-- delete from fsechist where hist_q in (select hist_q from fsechist,
temp_shareprices where hist_tick = ticker and dte = hist_date);
-- Tried to replace subselect
update fsechist set hist_tick = \'@@\'
from fsechist h, temp_shareprices t where t.ticker = h.hist_tick and
h.hist_date = t.dte;
delete from fsechist where hist_tick = \'@@\';
-- insert newvalues
insert into
fsechist(hist_tick,hist_date,hist_v1,hist_v2,hist_v3,hist_v4,hist_vol)
select ticker,dte,v1,v2,v3,v4,v5 from temp_shareprices;
-- count the number inserted
-- select count(*) into cnt
from temp_shareprices;
-- perform drop_table(\'temp_shareprices\');
return alldone;
end;'
LANGUAGE 'plpgsql' VOLATILE;
Note : drop_table is a simple procedure that
drops a procedure by name iff it exists.
==============================================
Definition of Fsechist
CREATE TABLE public.fsechist
(
hist_q int4 NOT NULL DEFAULT nextval('hist_q'::text),
hist_v1
numeric(19,4),
hist_v2 numeric(19,4),
hist_v3
numeric(19,4),
hist_v4 numeric(19,4),
hist_vol
numeric(19,4),
hist_deltar numeric(19,4),
hist_deltag
numeric(19,4),
hist_date char(8),
hist_tick
char(10),
hist_announce char(1),
CONSTRAINT fsechist_pkey
PRIMARY KEY (hist_q)
) WITH OIDS;
CREATE INDEX fsechist_date ON
public.fsechist USING btree (hist_date);
CREATE UNIQUE INDEX fsechist_q ON
public.fsechist USING btree (hist_q);
CREATE INDEX fsechist_tick ON
public.fsechist USING btree (hist_tick);