Re: [PERFORM] Slow update/insert process

2004-10-04 Thread Patrick Hatcher

Thanks for the help.
I found the culprit. The user
had created a function within the function (
pm.pm_price_post_inc(prod.keyp_products)).
Once this was fixed the time dropped dramatically.


Patrick Hatcher
Macys.Com
Legacy Integration Developer
415-422-1610 office
HatcherPT - AIM





Patrick Hatcher [EMAIL PROTECTED]

Sent by: [EMAIL PROTECTED]
10/01/04 11:14 AM




To
[EMAIL PROTECTED]


cc



Subject
[PERFORM] Slow update/insert
process









Pg: 7.4.5 
RH 7.3 
8g Ram 
200 g drive space 
RAID0+1 
Tables vacuum on a nightly basis 

The following process below takes 8 hours to run on 90k records and I'm
not sure where to being to look for the bottleneck. This isn't the
only updating on this database that seems to take a long time to complete.
Is there something I should be looking for in my conf settings? 


TIA 
Patrick 


SQL: 
---Bring back only selected records to run through the update process.

--Without the function the SQL takes  10secs to return 90,000 records

SELECT count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon'))

FROM mdc_upc upc 
JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products

JOIN public.mdc_price_post_inc price ON prod.keyp_products = price.keyf_product

JOIN public.mdc_attribute_product ap on ap.keyf_products = prod.keyp_products
and keyf_attribute=22 
WHERE 
upper(trim(ap.attributevalue)) NOT IN ('ESTEE LAUDER', 'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG
HEUER') 
AND keyf_producttype222 
AND prod.action_publish = 1; 


Function: 

CREATE OR REPLACE FUNCTION pm.pm_delta_function_amazon(int4, varchar)
 RETURNS bool AS
'DECLARE
  varkeyf_upcALIAS
FOR $1;
  varPasswordALIAS
FOR $2;
  varRealMD5varchar;
  varDeltaMD5varchar;
  varLastTouchDatedate;
  varQuery
text;
  varQuery1
text;
  varQueryMD5text;
  varQueryRecordrecord;
  varFuncStatusboolean := false;
  
BEGIN

-- Check the password
 IF varPassword  \'amazon\' THEN
  Return false;
 END IF;


-- Get the md5 hash for this product
 SELECT into varQueryRecord md5(upc.keyp_upc || prod.description || pm.pm_price_post_inc(prod.keyp_products))
AS md5
  FROM public.mdc_upc upc
  JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
  JOIN public.mdc_price_post_inc price ON price.keyf_product = prod.keyp_products
  WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 ;
 

 IF NOT FOUND THEN
  RAISE EXCEPTION \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;
 ELSE
  varRealMD5:=varQueryRecord.md5;
 END IF;

-- Check that the product is in the delta table and return its hash
for comparison 
 SELECT into varQueryRecord md5_hash,last_touch_date 
  FROM pm.pm_delta_master_amazon
  WHERE keyf_upc = varkeyf_upc LIMIT 1;

 IF NOT FOUND THEN
  -- ADD and exit
  INSERT INTO pm.pm_delta_master_amazon (keyf_upc,status,md5_hash,last_touch_date)
  values (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
  varFuncStatus:=true;
  RETURN varFuncStatus;
 ELSE
  --Update the record 
   --- If the hash matches then set the record to HOLD
  IF varRealMD5 = varQueryRecord.md5_hash THEN
UPDATE pm.pm_delta_master_amazon
 SET status= \'hold\',
 last_touch_date = CURRENT_DATE
WHERE keyf_upc = varkeyf_upc AND last_touch_date
 CURRENT_DATE; 
varFuncStatus:=true;
  ELSE
-- ELSE mark the item as ADD
UPDATE pm.pm_delta_master_amazon
 SET status= \'add\',
 last_touch_date = CURRENT_DATE
WHERE keyf_upc = varkeyf_upc;
varFuncStatus:=true;
  END IF;
 END IF;

 RETURN varFuncStatus;
END;'
 LANGUAGE 'plpgsql' IMMUTABLE;



TableDef 
CREATE TABLE pm.pm_delta_master_amazon ( 
  keyf_upc   
int4 , 
  status
varchar(6) , 
  md5_hash   
varchar(40) , 
  last_touch_datedate 
  ) 
GO 

CREATE INDEX status_idx 
  ON pm.pm_delta_master_amazon(status)

GO 




CONF 
 
# WRITE AHEAD LOG 
#---


# - Settings - 

#fsync = true 
# turns forced synchronization on or off 
#wal_sync_method = fsync# the default varies
across platforms: 
  
 # fsync, fdatasync, open_sync,
or open_datasync 
wal_buffers = 32#
min 4, 8KB each 

# - Checkpoints - 

checkpoint_segments = 50# in logfile segments,
min 1, 16MB each 
checkpoint_timeout = 600# range 30-3600, in
seconds 
#checkpoint_warning = 30# 0 is off, in seconds

#commit_delay = 0# range
0-10, in microseconds 
#commit_siblings = 5  # range 1-1000



Patrick Hatcher
Macys.Com


[PERFORM] Slow update/insert process

2004-10-01 Thread Patrick Hatcher

Pg: 7.4.5
RH 7.3
8g Ram
200 g drive space
RAID0+1
Tables vacuum on a nightly basis

The following process below takes 8
hours to run on 90k records and I'm not sure where to being to look for
the bottleneck. This isn't the only updating on this database that
seems to take a long time to complete. Is there something I should
be looking for in my conf settings? 

TIA
Patrick


SQL:
---Bring back only selected records
to run through the update process.
--Without the function the SQL takes
 10secs to return 90,000 records
SELECT count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon'))
FROM mdc_upc upc
JOIN public.mdc_products prod ON upc.keyf_products
= prod.keyp_products
JOIN public.mdc_price_post_inc price
ON prod.keyp_products = price.keyf_product
JOIN public.mdc_attribute_product ap
on ap.keyf_products = prod.keyp_products and keyf_attribute=22
WHERE 
upper(trim(ap.attributevalue)) NOT IN
('ESTEE LAUDER', 'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG
HEUER')
AND keyf_producttype222
AND prod.action_publish = 1;


Function:

CREATE OR REPLACE FUNCTION pm.pm_delta_function_amazon(int4, varchar)
 RETURNS bool AS
'DECLARE
  varkeyf_upc  
 ALIAS FOR $1;
  varPassword  
 ALIAS FOR $2;
  varRealMD5  
 varchar;
  varDeltaMD5  
 varchar;
  varLastTouchDatedate;
  varQuery  
  text;
  varQuery1  
  text;
  varQueryMD5  
 text;
  varQueryRecordrecord;
  varFuncStatusboolean
:= false;
  
BEGIN

-- Check the password
 IF varPassword  \'amazon\' THEN
  Return false;
 END IF;


-- Get the md5 hash for this product
 SELECT into varQueryRecord md5(upc.keyp_upc || prod.description
|| pm.pm_price_post_inc(prod.keyp_products)) AS md5
  FROM public.mdc_upc upc
  JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
  JOIN public.mdc_price_post_inc price ON price.keyf_product
= prod.keyp_products
  WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 ;
 
 
 IF NOT FOUND THEN
  RAISE EXCEPTION \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;
 ELSE
  varRealMD5:=varQueryRecord.md5;
 END IF;

-- Check that the product is in the delta table and return its hash
for comparison 
 SELECT into varQueryRecord md5_hash,last_touch_date 
  FROM pm.pm_delta_master_amazon
  WHERE keyf_upc = varkeyf_upc LIMIT 1;

 IF NOT FOUND THEN
  -- ADD and exit
  INSERT INTO pm.pm_delta_master_amazon (keyf_upc,status,md5_hash,last_touch_date)
  values (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
  varFuncStatus:=true;
  RETURN varFuncStatus;
 ELSE
  --Update the record 
   --- If the hash matches then set the record to HOLD
  IF varRealMD5 = varQueryRecord.md5_hash THEN
UPDATE pm.pm_delta_master_amazon
 SET status= \'hold\',
 last_touch_date = CURRENT_DATE
WHERE keyf_upc = varkeyf_upc
AND last_touch_date  CURRENT_DATE; 
varFuncStatus:=true;
  ELSE
-- ELSE mark the item as ADD
UPDATE pm.pm_delta_master_amazon
 SET status= \'add\',
 last_touch_date = CURRENT_DATE
WHERE keyf_upc = varkeyf_upc;
varFuncStatus:=true;
  END IF;
  END IF;

 RETURN varFuncStatus;
END;'
 LANGUAGE 'plpgsql' IMMUTABLE;



TableDef
CREATE TABLE pm.pm_delta_master_amazon
( 
  keyf_upc  
 int4 ,
  status   
 varchar(6) ,
  md5_hash  
 varchar(40) ,
  last_touch_date 
  date 
  )
GO

CREATE INDEX status_idx
  ON pm.pm_delta_master_amazon(status)
GO





CONF

# WRITE AHEAD LOG
#---

# - Settings -

#fsync = true
 # turns forced synchronization on or
off
#wal_sync_method = fsync  
 # the default varies across platforms:
 
  #
fsync, fdatasync, open_sync, or open_datasync
wal_buffers = 32   
# min 4, 8KB each

# - Checkpoints -

checkpoint_segments = 50  
 # in logfile segments, min 1, 16MB each
checkpoint_timeout = 600  
 # range 30-3600, in seconds
#checkpoint_warning = 30  
 # 0 is off, in seconds
#commit_delay = 0   
# range 0-10, in microseconds
#commit_siblings = 5   
  # range 1-1000


Patrick Hatcher
Macys.Com


Re: [PERFORM] Slow update/insert process

2004-10-01 Thread Aaron Werman



Some quick notes:

- Using a side effect of a function to update the 
database feels bad to me
- how long does theSELECT into varQueryRecord 
md5(upc.keyp
 function take / what does it's explain look 
like?
- There are a lot of non-indexed columns on that 
delta master table, such as keyf_upc. 

 I'm guessing you're doing 
90,000 x {a lot of slow scans}
- My temptation would be to rewrite the processing 
to do a pass of updates, a pass of inserts, 
 and then the SELECT

  - Original Message - 
  From: 
  Patrick 
  Hatcher 
  To: [EMAIL PROTECTED] 
  
  Sent: Friday, October 01, 2004 2:14 
  PM
  Subject: [PERFORM] Slow update/insert 
  process
  Pg: 7.4.5 RH 
  7.3 8g Ram 200 g drive space RAID0+1 Tables vacuum on a 
  nightly basis The following 
  process below takes 8 hours to run on 90k records and I'm not sure where to 
  being to look for the bottleneck. This isn't the only updating on this 
  database that seems to take a long time to complete. Is there something 
  I should be looking for in my conf settings?  TIA Patrick SQL: 
  ---Bring back only selected records to run 
  through the update process. --Without 
  the function the SQL takes  10secs to return 90,000 records 
  SELECT 
  count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon')) FROM mdc_upc upc JOIN public.mdc_products prod ON upc.keyf_products = 
  prod.keyp_products JOIN 
  public.mdc_price_post_inc price ON prod.keyp_products = 
  price.keyf_product JOIN 
  public.mdc_attribute_product ap on ap.keyf_products = prod.keyp_products and 
  keyf_attribute=22 WHERE 
  upper(trim(ap.attributevalue)) NOT IN 
  ('ESTEE LAUDER', 
  'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG 
  HEUER') AND 
  keyf_producttype222 AND 
  prod.action_publish = 1; Function: CREATE OR 
  REPLACE FUNCTION pm.pm_delta_function_amazon(int4, "varchar")RETURNS 
  bool AS'DECLARE varkeyf_upc 
 ALIAS FOR $1; varPassword  
ALIAS FOR $2; 
  varRealMD5
  varchar; varDeltaMD5  
varchar; varLastTouchDate   
   date; varQuery  
 text; varQuery1
   text; varQueryMD5  
text; 
  varQueryRecordrecord; 
  varFuncStatusboolean := false; 
  BEGIN-- Check the passwordIF varPassword  
  \'amazon\' THEN Return false;END IF;-- 
  Get the md5 hash for this productSELECT into varQueryRecord 
  md5(upc.keyp_upc || prod.description || 
  pm.pm_price_post_inc(prod.keyp_products)) AS md5 FROM 
  public.mdc_upc upc JOIN public.mdc_products prod ON 
  upc.keyf_products = prod.keyp_products JOIN 
  public.mdc_price_post_inc price ON price.keyf_product = 
  prod.keyp_products WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 
  ;IF NOT FOUND THEN RAISE EXCEPTION 
  \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;ELSE 
  varRealMD5:=varQueryRecord.md5;END IF;-- Check 
  that the product is in the delta table and return its hash for comparison 
  SELECT into varQueryRecord md5_hash,last_touch_date  
  FROM pm.pm_delta_master_amazon WHERE keyf_upc = 
  varkeyf_upc LIMIT 1;IF NOT FOUND THEN -- ADD and 
  exit INSERT INTO pm.pm_delta_master_amazon 
  (keyf_upc,status,md5_hash,last_touch_date) values 
  (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE); 
  varFuncStatus:=true; RETURN 
  varFuncStatus;ELSE --Update the record  
   --- If the hash matches then set the record to HOLD 
  IF varRealMD5 = varQueryRecord.md5_hash THEN   
  UPDATE pm.pm_delta_master_amazon
  SET status= \'hold\',
  last_touch_date = CURRENT_DATE   WHERE 
  keyf_upc = varkeyf_upc AND last_touch_date  CURRENT_DATE; 
 varFuncStatus:=true; 
  ELSE   -- ELSE mark the item as 
  ADD   UPDATE pm.pm_delta_master_amazon 
 SET status= \'add\',
  last_touch_date = CURRENT_DATE   WHERE 
  keyf_upc = varkeyf_upc;   
  varFuncStatus:=true; END IF; END 
  IF;RETURN varFuncStatus;END;'LANGUAGE 'plpgsql' 
  IMMUTABLE;TableDef 
  CREATE TABLE pm.pm_delta_master_amazon ( 
keyf_upc   
   int4 ,   status
   varchar(6) ,   
  md5_hashvarchar(40) , 
last_touch_date   
   date   
  ) GO CREATE INDEX status_idx   ON 
  pm.pm_delta_master_amazon(status) GO CONF  # WRITE AHEAD 
  LOG #--- 
  # - Settings - #fsync = true   
 # turns forced synchronization on or off #wal_sync_method = fsync# 
  the default varies across platforms:   
# fsync, fdatasync, open_sync, or 
  open_datasync wal_buffers = 32  
# min 4, 8KB each 
  # - Checkpoints - checkpoint_segments = 50# 
  in logfile segments, min 1, 16MB each checkpoint_timeout = 600# range 30-3600, in 
  seconds #checkpoint_warning = 30 
 # 0 is off, in seconds #commit_delay = 0  
# range 0-10, in microseconds #commit_siblings = 5  
  # range 1-1000 Patrick 
  HatcherMacys.Com