Some quick notes:
- Using a side effect of a function to update the
database feels bad to me
- how long does the SELECT into varQueryRecord
md5(upc.keyp....
function take / what does it's explain look
like?
- There are a lot of non-indexed columns on that
delta master table, such as keyf_upc.
I'm guessing you're doing
90,000 x {a lot of slow scans}
- My temptation would be to rewrite the processing
to do a pass of updates, a pass of inserts,
and then the SELECT
----- Original Message -----
Sent: Friday, October 01, 2004 2:14
PM
Subject: [PERFORM] Slow update/insert
process
Pg: 7.4.5 RH
7.3 8g Ram 200 g drive space RAID0+1 Tables vacuum on a
nightly basis
The following
process below takes 8 hours to run on 90k records and I'm not sure where to
being to look for the bottleneck. This isn't the only updating on this
database that seems to take a long time to complete. Is there something
I should be looking for in my conf settings?
TIA Patrick
SQL:
---Bring back only selected records to run
through the update process. --Without
the function the SQL takes < 10secs to return 90,000 records
SELECT
count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon')) FROM mdc_upc upc JOIN public.mdc_products prod ON upc.keyf_products =
prod.keyp_products JOIN
public.mdc_price_post_inc price ON prod.keyp_products =
price.keyf_product JOIN
public.mdc_attribute_product ap on ap.keyf_products = prod.keyp_products and
keyf_attribute=22 WHERE
upper(trim(ap.attributevalue)) NOT IN
('ESTEE LAUDER',
'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG
HEUER') AND
keyf_producttype<>222 AND
prod.action_publish = 1;
Function:
CREATE OR
REPLACE FUNCTION pm.pm_delta_function_amazon(int4, "varchar") RETURNS
bool AS 'DECLARE varkeyf_upc
ALIAS FOR $1; varPassword
ALIAS FOR $2;
varRealMD5
varchar; varDeltaMD5
varchar; varLastTouchDate
date; varQuery
text; varQuery1
text; varQueryMD5
text;
varQueryRecord record;
varFuncStatus boolean := false;
BEGIN
-- Check the password IF varPassword <>
\'amazon\' THEN Return false; END IF;
--
Get the md5 hash for this product SELECT into varQueryRecord
md5(upc.keyp_upc || prod.description ||
pm.pm_price_post_inc(prod.keyp_products)) AS md5 FROM
public.mdc_upc upc JOIN public.mdc_products prod ON
upc.keyf_products = prod.keyp_products JOIN
public.mdc_price_post_inc price ON price.keyf_product =
prod.keyp_products WHERE upc.keyp_upc = varkeyf_upc LIMIT 1
;
IF NOT FOUND THEN RAISE EXCEPTION
\'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc; ELSE
varRealMD5:=varQueryRecord.md5; END IF;
-- Check
that the product is in the delta table and return its hash for comparison
SELECT into varQueryRecord md5_hash,last_touch_date
FROM pm.pm_delta_master_amazon WHERE keyf_upc =
varkeyf_upc LIMIT 1;
IF NOT FOUND THEN -- ADD and
exit INSERT INTO pm.pm_delta_master_amazon
(keyf_upc,status,md5_hash,last_touch_date) values
(varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
varFuncStatus:=true; RETURN
varFuncStatus; ELSE --Update the record
--- If the hash matches then set the record to HOLD
IF varRealMD5 = varQueryRecord.md5_hash THEN
UPDATE pm.pm_delta_master_amazon
SET status= \'hold\',
last_touch_date = CURRENT_DATE WHERE
keyf_upc = varkeyf_upc AND last_touch_date <> CURRENT_DATE;
varFuncStatus:=true;
ELSE -- ELSE mark the item as
ADD UPDATE pm.pm_delta_master_amazon
SET status= \'add\',
last_touch_date = CURRENT_DATE WHERE
keyf_upc = varkeyf_upc;
varFuncStatus:=true; END IF; END
IF;
RETURN varFuncStatus; END;' LANGUAGE 'plpgsql'
IMMUTABLE;
TableDef
CREATE TABLE pm.pm_delta_master_amazon (
keyf_upc
int4 , status
varchar(6) ,
md5_hash varchar(40) ,
last_touch_date
date
) GO
CREATE INDEX status_idx ON
pm.pm_delta_master_amazon(status) GO
CONF -------- # WRITE AHEAD
LOG #---------------------------------------------------------------------------
# - Settings -
#fsync = true
# turns forced synchronization on or off #wal_sync_method = fsync #
the default varies across platforms:
# fsync, fdatasync, open_sync, or
open_datasync wal_buffers = 32
# min 4, 8KB each
# - Checkpoints -
checkpoint_segments = 50 #
in logfile segments, min 1, 16MB each checkpoint_timeout = 600 # range 30-3600, in
seconds #checkpoint_warning = 30
# 0 is off, in seconds #commit_delay = 0
# range 0-100000, in microseconds #commit_siblings = 5
# range 1-1000
Patrick
Hatcher Macys.Com
|