Mwalker has uploaded a new change for review.
https://gerrit.wikimedia.org/r/72741
Change subject: Adding Silverpop Email Export Script
......................................................................
Adding Silverpop Email Export Script
Change-Id: Iba9334cd2ca57a57d252835b5053f2133ce80dc0
---
A silverpop_export.sql
1 file changed, 259 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/tools
refs/changes/41/72741/1
diff --git a/silverpop_export.sql b/silverpop_export.sql
new file mode 100644
index 0000000..b49c12a
--- /dev/null
+++ b/silverpop_export.sql
@@ -0,0 +1,259 @@
+DROP TABLE IF EXISTS silverpop_export;
+CREATE TABLE silverpop_export(
+ id int unsigned PRIMARY KEY AUTO_INCREMENT,
+
+ -- Step 1 exported fields
+ contact_id int unsigned,
+ first_name varchar(128),
+ last_name varchar(128),
+ preferred_language varchar(5),
+ email varchar(255),
+
+ -- Step 5 lifetime statistics
+ has_recurred_donation tinyint(1),
+ highest_usd_amount decimal(20,2),
+ lifetime_usd_total decimal(20,2),
+ donation_count int,
+ is_2006_donor tinyint(1),
+ is_2007_donor tinyint(1),
+ is_2008_donor tinyint(1),
+ is_2009_donor tinyint(1),
+ is_2010_donor tinyint(1),
+ is_2011_donor tinyint(1),
+ is_2012_donor tinyint(1),
+ is_2013_donor tinyint(1),
+
+ -- Step 6 latest contribution
+ last_ctid int unsigned,
+ latest_currency varchar(3),
+ latest_native_amount decimal(20,2),
+ latest_usd_amount decimal(20,2),
+ latest_donation datetime,
+
+ -- Step 7 Address information
+ city varchar(64),
+ country varchar(2),
+ postal_code varchar(16),
+
+ -- Step 8 Geonames lookup of timezone
+ tzoffset float,
+
+ -- Step 10 Unsubcribe hash generation
+ unsub_hash varchar(255)
+);
+CREATE INDEX spex_contact_id ON silverpop_export(contact_id);
+CREATE INDEX spex_email ON silverpop_export(email);
+CREATE INDEX spex_city ON silverpop_export(city);
+CREATE INDEX spex_country ON silverpop_export(country);
+CREATE INDEX spex_postal ON silverpop_export(postal_code);
+
+-- STEP 1: Populate the temporary table with all contacts that have an
+-- email address
+INSERT INTO silverpop_export
+ (contact_id, email, first_name, last_name, preferred_language)
+ SELECT
+ e.contact_id, e.email, c.first_name, c.last_name,
+ SUBSTRING(1, 2, c.preferred_language)
+ FROM civicrm.civicrm_email e, civicrm.civicrm_contact c
+ WHERE
+ e.email IS NOT NULL AND e.email != '' AND
+ e.contact_id = c.id AND
+ c.is_deleted = 0 AND c.is_opt_out = 0;
+
+-- STEP 2: Deduplicate rows that have the same email address, we will
+-- have to merge in more data later, but this is >500k rows we're
+-- getting rid of here which is more better than taking them all the way
+-- through.
+CREATE TABLE silverpop_export_dedupe_email
+ (id INT PRIMARY KEY AUTO_INCREMENT, email varchar(255), maxid int);
+
+INSERT INTO silverpop_export_dedupe_email (email, maxid)
+ SELECT email, max(id) maxid FROM silverpop_export
+ FORCE INDEX (spex_email)
+ GROUP BY email
+ HAVING count(*) > 1;
+
+DELETE silverpop_export FROM silverpop_export, silverpop_export_dedupe_email
+ WHERE
+ silverpop_export.email = silverpop_export_dedupe_email.email AND
+ silverpop_export.id != silverpop_export_dedupe_email.maxid;
+
+DROP TABLE silverpop_export_dedupe_email;
+
+-- STEP 3: Deduplicate rows that have the same contact ID because they'll
+-- generate the same result (> 50k rows)
+CREATE TABLE silverpop_export_dedupe_contact
+ (id int PRIMARY KEY AUTO_INCREMENT, contact_id int, maxid int);
+
+INSERT INTO silverpop_export_dedupe_contact (contact_id, maxid)
+ SELECT contact_id, max(id) maxid FROM silverpop_export
+ FORCE INDEX (spex_contact_id)
+ GROUP BY contact_id
+ HAVING count(*) > 1;
+
+DELETE silverpop_export FROM silverpop_export, silverpop_export_dedupe_contact
+ WHERE
+ silverpop_export.contact_id = silverpop_export_dedupe_contact.contact_id
AND
+ silverpop_export.id != silverpop_export_dedupe_contact.maxid;
+
+DROP TABLE silverpop_export_dedupe_contact;
+
+-- STEP 4 Update every email address with every contact and opt them out
+DELETE silverpop_export ex
+FROM silverpop_export ex, civicrm.civicrm_email e USE INDEX(UI_email),
civicrm.civicrm_contact c
+WHERE
+ ex.email = e.email AND e.contact_id = c.id AND c.is_opt_out = 1;
+
+-- STEP 5: Create an aggregate table from a full contribution table scan
+DROP TABLE IF EXISTS silverpop_export_stat;
+CREATE TABLE silverpop_export_stat (
+ id INT PRIMARY KEY AUTO_INCREMENT,
+ email varchar(255),
+ exid INT, -- STEP 5
+ max_ctid INT, -- STEP 5
+ max_amount_ctid INT, -- STEP 5
+ max_amount_usd decimal(20,2), -- STEP 5
+ max_amount_currency varchar(3), -- STEP 5
+ has_recurred_donation tinyint(1),
+ total_usd decimal(20,2), -- STEP 5
+ cnt_total int, -- STEP 5
+ cnt_2006 tinyint, -- STEP 5
+ cnt_2007 tinyint,
+ cnt_2008 tinyint,
+ cnt_2009 tinyint,
+ cnt_2010 tinyint,
+ cnt_2011 tinyint,
+ cnt_2012 tinyint,
+ cnt_2013 tinyint
+);
+
+INSERT INTO silverpop_export_stat
+ (email, exid, max_ctid, max_amount_usd, total_usd, cnt_total,
has_recurred_donation,
+ cnt_2006, cnt_2007, cnt_2008, cnt_2009, cnt_2010, cnt_2011, cnt_2012,
cnt_2013)
+ SELECT
+ e.email, ex.id, MAX(ct.id), MAX(ct.total_amount), SUM(ct.total_amount),
+ count(*),
+ SUM(IF(SUBSTRING(ct.trxn_id, 1, 9) = 'RECURRING', 1, 0), 1, 0),
+ SUM(IF('2006-07-1' < ct.receive_date AND ct.receive_date < '2007-07-01',
1, 0)),
+ SUM(IF('2007-07-1' < ct.receive_date AND ct.receive_date < '2008-07-01',
1, 0)),
+ SUM(IF('2008-07-1' < ct.receive_date AND ct.receive_date < '2009-07-01',
1, 0)),
+ SUM(IF('2009-07-1' < ct.receive_date AND ct.receive_date < '2010-07-01',
1, 0)),
+ SUM(IF('2010-07-1' < ct.receive_date AND ct.receive_date < '2011-07-01',
1, 0)),
+ SUM(IF('2011-07-1' < ct.receive_date AND ct.receive_date < '2012-07-01',
1, 0)),
+ SUM(IF('2012-07-1' < ct.receive_date AND ct.receive_date < '2013-07-01',
1, 0)),
+ SUM(IF('2013-07-1' < ct.receive_date AND ct.receive_date < '2014-07-01',
1, 0))
+ FROM silverpop_export ex, civicrm.civicrm_email e,
civicrm.civicrm_contribution ct
+ WHERE e.email=ex.email AND e.contact_id=ct.contact_id
+ GROUP BY e.email;
+
+UPDATE silverpop_export ex, silverpop_export_stat exs
+ SET
+ ex.last_ctid = exs.max_ctid,
+ ex.highest_usd_amount = exs.max_amount_usd,
+ ex.lifetime_usd_total = exs.total_usd,
+ ex.donation_count = exs.cnt_total,
+ ex.has_recurred_donation = IF(exs.has_recurred_donation > 0, 1, 0),
+ ex.is_2006_donor = IF(exs.cnt_2006 > 0, 1, 0),
+ ex.is_2007_donor = IF(exs.cnt_2007 > 0, 1, 0),
+ ex.is_2008_donor = IF(exs.cnt_2008 > 0, 1, 0),
+ ex.is_2009_donor = IF(exs.cnt_2009 > 0, 1, 0),
+ ex.is_2010_donor = IF(exs.cnt_2010 > 0, 1, 0),
+ ex.is_2011_donor = IF(exs.cnt_2011 > 0, 1, 0),
+ ex.is_2012_donor = IF(exs.cnt_2012 > 0, 1, 0),
+ ex.is_2013_donor = IF(exs.cnt_2013 > 0, 1, 0)
+ WHERE
+ ex.id = exs.exid;
+
+DROP TABLE silverpop_export_stat;
+
+-- STEP 6: Populate information about the most recent contribution
+UPDATE silverpop_export ex, civicrm.civicrm_contribution ct
+SET
+ latest_currency = SUBSTRING(ct.source, 1, 3),
+ latest_native_amount = CONVERT(SUBSTRING(ct.source, 5), decimal(20,2)),
+ latest_usd_amount = ct.total_amount,
+ latest_donation = ct.receive_date,
+ latest_is_recurring = IF(SUBSTRING(ct.trxn_id, 1, 9) = 'RECURRING', 1, 0)
+WHERE
+ ex.last_ctid = ct.id;
+
+-- STEP 7: Join on address
+UPDATE silverpop_export ex, civicrm.civicrm_address addr,
civicrm.civicrm_country ctry
+ SET
+ ex.city = addr.city,
+ ex.country = ctry.iso_code,
+ ex.postal_code = addr.postal_code
+ WHERE ex.contact_id = addr.contact_id AND addr.country_id = ctry.id;
+
+-- STEP 8: Geonames lookup of timezone
+-- 8.1 Lookup by post code and country
+UPDATE silverpop_export ex, geonames.geonames g, geonames.altnames a,
geonames.timezones tz
+ SET ex.tzoffset = tz.offset
+ WHERE
+ tz.tzid IS NULL AND
+ ex.postal_code IS NOT NULL AND
+ ex.country IN ('FR', 'US', 'RU', 'AU', 'GB', 'CA', 'NZ', 'BR', 'ID', 'MX',
'PT', 'ES') AND
+ a.format='post' AND
+ ex.country = g.country_code AND
+ a.altname = ex.postal_code AND
+ a.geonameid = g.geonameid AND
+ tz.tzid=g.tzid;
+
+-- 8.2 Otherwise just by country
+UPDATE
+ silverpop_export ex,
+ (SELECT g.country_code country_code, tz.offset offset
+ FROM geonames.geonames g, geonames.timezones tz
+ WHERE g.tzid=tz.tzid
+ GROUP BY g.country_code
+ ) tz
+ SET ex.tzoffset = tz.offset
+ WHERE
+ ex.tzoffset is NULL AND
+ tz.country_code=ex.country;
+
+-- 8.3 And really otherwise, just set it to 0
+UPDATE silverpop_export ex
+ SET ex.tzoffset = 0
+ WHERE ex.tzoffset is NULL;
+
+-- STEP 9 Normalize some data
+UPDATE silverpop_export SET preferred_language='en' WHERE preferred_langage IS
NULL;
+UPDATE silverpop_export SET
+ last_ctid = 0,
+ highest_usd_amount = 0,
+ lifetime_usd_total = 0,
+ donation_count = 0,
+ is_2007_donor = 0,
+ is_2008_donor = 0,
+ is_2009_donor = 0,
+ is_2010_donor = 0,
+ is_2011_donor = 0,
+ is_2012_donor = 0,
+ is_2013_donor = 0,
+ latest_currency = 'USD',
+ latest_native_amount = 0,
+ latest_usd_amount = 0,
+ latest_donation = NOW(),
+ has_recurred_donation = 0
+ WHERE donation_count IS NULL;
+UPDATE silverpop_export SET country='US' where country IS NULL;
+
+-- STEP 10 Create the unsub hash
+UPDATE silverpop_export ex SET
+ unsub_hash = SHA1(CONCAT(last_ctid, email, XXX));
+
+-- Export some random rows
+SELECT contact_id ContactID, email, first_name firstname, last_name lastname,
+ last_ctid ContributionID, country,
+ SUBSTRING(preferred_language, 1, 2) IsoLang, has_recurred_donation,
highest_usd_amount,
+ lifetime_usd_total, latest_donation latest_donation_date, latest_usd_amount,
+ latest_currency, latest_native_amount, tzoffset timezone, donation_count,
+ is_2006_donor, is_2007_donor, is_2008_donor, is_2009_donor, is_2010_donor,
+ is_2011_donor, is_2012_donor, is_2013_donor, unsub_hash
+FROM silverpop_export AS r1 JOIN
+ (SELECT (RAND() * (SELECT MAX(id) FROM silverpop_export)) AS id) AS r2
+ WHERE r1.id >= r2.id
+ ORDER BY r1.id ASC
+ LIMIT 200;
+
--
To view, visit https://gerrit.wikimedia.org/r/72741
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iba9334cd2ca57a57d252835b5053f2133ce80dc0
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/fundraising/tools
Gerrit-Branch: master
Gerrit-Owner: Mwalker <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits