Part 3, the last part, materialized views with joins
I am going to create two new tables for this test:
mysql> create table dim_uid (uid_id int auto_increment primary key ,
url_hash char(32),
site_code char(2)
) engine=innodb;
Query OK, 0 rows affected (0.03 sec)
mysql> create table fact(fact_id int auto_increment primary key,
uid_id int not null,
imp_count int,
click_count int,
cpc decimal(10,6)
) engine=innodb;
Query OK, 0 rows affected (0.08 sec)
*Materialized view logs are created on both tables.*
mysql> show tables where Tables_in_test like 'fact%' or
Tables_in_test like
'dim_uid%';
+----------------+
| Tables_in_test |
+----------------+
| dim_uid |
| dim_uid_mvlog |
| fact |
| fact_mvlog |
+----------------+
4 rows in set (0.00 sec)
*Data initially in the tables:*
mysql> select * from dim_uid;
+--------+----------------------------------+-----------+
| uid_id | url_hash | site_code |
+--------+----------------------------------+-----------+
| 1 | 0bfe2e352a363bdeaeee3323d6465b1c | XA |
| 2 | 07cd93d5d23a60e4e825769783b11b9d | XA |
| 3 | cc8a8d7b824bd7f5a5eb16bddc35de62 | XB |
| 4 | 050ef350ac164f18e26ed19ced9370dd | XC |
+--------+----------------------------------+-----------+
4 rows in set (0.00 sec)
mysql> select * from fact;
+---------+--------+-----------+-------------+----------
+------------+
| fact_id | uid_id | imp_count | click_count | cpc |
the_date |
+---------+--------+-----------+-------------+----------
+------------+
| 7 | 1 | 10 | 2 | 0.000001 |
2008-01-02 |
| 8 | 1 | 25 | 15 | 0.000200 |
2008-01-02 |
| 9 | 2 | 10 | 5 | 0.000500 |
2008-01-01 |
| 10 | 3 | 75682 | 1750 | 4.242321 |
2008-01-01 |
| 11 | 3 | 114232 | 15323 | 4.123212 |
2008-02-01 |
| 12 | 4 | 512 | 64 | 0.000210 |
2008-01-01 |
| 13 | 4 | 1024 | 256 | 0.000330 |
2008-01-02 |
+---------+--------+-----------+-------------+----------
+------------+
7 rows in set (0.00 sec)
*I am going to materialize a view for the following SQL:*
SELECT f.the_date click_date,
d.site_code site_code,
count(*) site_count,
sum(f.imp_count) total_imp,
sum(f.click_count) total_clicks,
sum(f.click_count * f.cpc) click_revenue
FROM dim_uid d
JOIN fact f on d.uid_id = f.uid_id
WHERE f.cpc > 0.0001
GROUP BY f.the_date, site_code
ORDER BY click_revenue desc;
+------------+-----------+------------+-----------+--------------
+---------------+-----+
| click_date | site_code | site_count | total_imp | total_clicks |
click_revenue | CNT |
+------------+-----------+------------+-----------+--------------
+---------------+-----+
| 2008-02-01 | XB | 1 | 114232 | 15323 |
63179.977476 | 1 |
| 2008-01-01 | XB | 1 | 75682 | 1750 |
7424.061750 | 1 |
| 2008-01-02 | XC | 1 | 1024 | 256 |
0.084480 | 1 |
| 2008-01-01 | XC | 1 | 512 | 64 |
0.013440 | 1 |
| 2008-01-02 | XA | 1 | 25 | 15 |
0.003000 | 1 |
| 2008-01-01 | XA | 1 | 10 | 5 |
0.002500 | 1 |
+------------+-----------+------------+-----------+--------------
+---------------+-----+
6 rows in set (0.01 sec)
*
API calls to build the view:*
mysql> call flexviews.create('test','example2','INCREMENTAL');
Query OK, 1 row affected (0.03 sec)
mysql> set @mvid=last_insert_id();
Query OK, 0 rows affected (0.00 sec)
mysql> call flexviews.add_table(@mvid, 'test','dim_uid', 'd', NULL);
Query OK, 1 row affected (0.01 sec)
mysql> call flexviews.add_table(@mvid, 'test','fact', 'f', ' ON
f.uid_id =
d.uid_id ');
Query OK, 1 row affected (0.01 sec)
mysql> call flexviews.add_expr(@mvid, 'GROUP', 'f.the_date',
'click_date');
Query OK, 0 rows affected (0.03 sec)
mysql> call flexviews.add_expr(@mvid, 'GROUP', 'd.site_code',
'site_code');
Query OK, 0 rows affected (0.00 sec)
mysql> call flexviews.add_expr(@mvid, 'COUNT', '*', 'site_count');
Query OK, 0 rows affected (0.01 sec)
mysql> call flexviews.add_expr(@mvid, 'SUM', 'f.imp_count',
'total_imp');
Query OK, 0 rows affected (0.00 sec)
mysql> call flexviews.add_expr(@mvid, 'SUM', 'f.click_count',
'total_clicks');
Query OK, 0 rows affected (0.00 sec)
mysql> call flexviews.add_expr(@mvid, 'SUM', 'f.click_count * f.cpc',
'click_revenue');
Query OK, 0 rows affected (0.01 sec)
mysql> call flexviews.add_expr(@mvid, 'WHERE', 'f.cpc > .0001',
'fact_cpc_where1');
Query OK, 0 rows affected (0.01 sec)
mysql> call flexviews.enable(@mvid);
Query OK, 0 rows affected (0.10 sec)
mysql> select * from example2 order by click_revenue desc;
+------------+-----------+------------+-----------+--------------
+---------------+-----+
| click_date | site_code | site_count | total_imp | total_clicks |
click_revenue | CNT |
+------------+-----------+------------+-----------+--------------
+---------------+-----+
| 2008-02-01 | XB | 1 | 114232 | 15323 |
63179.977476 | 1 |
| 2008-01-01 | XB | 1 | 75682 | 1750 |
7424.061750 | 1 |
| 2008-01-02 | XC | 1 | 1024 | 256 |
0.084480 | 1 |
| 2008-01-01 | XC | 1 | 512 | 64 |
0.013440 | 1 |
| 2008-01-02 | XA | 1 | 25 | 15 |
0.003000 | 1 |
| 2008-01-01 | XA | 1 | 10 | 5 |
0.002500 | 1 |
+------------+-----------+------------+-----------+--------------
+---------------+-----+
6 rows in set (0.00 sec)
*-- Fix the date transposition error, and add rows to tables
*mysql> update fact
set the_date = '2008-01-02'
where the_date = '2008-02-01';
Query OK, 1 row affected (0.04 sec)
Rows matched: 1 Changed: 1 Warnings: 0
--Add a row to the dimension table.
mysql> insert into dim_uid (uid_id, url_hash, site_code) values (5,
md5(now()), 'AA');
Query OK, 1 row affected (0.00 sec)
--Add rows to the fact table
mysql> insert into fact values (null, 5, 100, 75, .005,
'2008-01-01');
Query OK, 1 row affected (0.00 sec)
mysql> insert into fact values (null, 5, 250, 100, .00752,
'2008-01-02');
Query OK, 1 row affected (0.01 sec
*What is in the materialized view logs?
*mysql> select * from dim_uid_mvlog;
+----------+--------+--------+----------------------------------
+-----------+
| dml_type | uow_id | uid_id | url_hash |
site_code
|
+----------+--------+--------+----------------------------------
+-----------+
| 1 | 82 | 5 | 0c9a2a45a83d776b5bb0887e9a3cee7e | AA
|
+----------+--------+--------+----------------------------------
+-----------+
1 row in set (0.00 sec)
mysql> select * from fact_mvlog;
+----------+--------+---------+--------+-----------+-------------
+----------+------------+
| dml_type | uow_id | fact_id | uid_id | imp_count | click_count |
cpc
| the_date |
+----------+--------+---------+--------+-----------+-------------
+----------+------------+
| -1 | 81 | 11 | 3 | 114232 | 15323 |
4.123212
| 2008-02-01 |
| 1 | 81 | 11 | 3 | 114232 | 15323 |
4.123212
| 2008-01-02 |
| 1 | 83 | 19 | 5 | 100 | 75 |
0.005000
| 2008-01-01 |
| 1 | 84 | 20 | 5 | 250 | 100 |
0.007520
| 2008-01-02 |
+----------+--------+---------+--------+-----------+-------------
+----------+------------+
4 rows in set (0.01 sec)
*
Here are the compensation queries for calculating the delta records:*
mysql> call flexviews.refresh(2,'COMPUTE');
Query OK, 1 row affected, 2 warnings (0.61 sec)
mysql> select * from flexviews.refresh_log where message like
'%INSERT%'\G
*************************** 1. row ***************************
tstamp: 2009-07-17 02:43:46
usec: NULL
message: Query:
INSERT INTO test.example2_delta
SELECT (d.dml_type * 1) as dml_type,d.uow_id as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((d.dml_type * 1)) as `site_count`,
SUM((d.dml_type * 1) * f.imp_count) as `total_imp`,
SUM((d.dml_type * 1) * f.click_count) as `total_clicks`,
SUM((d.dml_type * 1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((d.dml_type * 1)) as `CNT`
FROM test.dim_uid_mvlog as d
JOIN test.fact as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND d.uow_id >58
AND d.uow_id <=67
AND (d.dml_type * 1 = 1)
GROUP BY (f.the_date), (d.site_code)
UNION ALL
SELECT
(d.dml_type * 1) as dml_type,d.uow_id as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((d.dml_type * 1)) as `site_count`,
SUM((d.dml_type * 1) * f.imp_count) as `total_imp`,
SUM((d.dml_type * 1) * f.click_count) as `total_clicks`,
SUM((d.dml_type * 1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((d.dml_type * 1)) as `CNT`
FROM test.dim_uid_mvlog as d
JOIN test.fact as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND d.uow_id >58
AND d.uow_id <=67
AND (d.dml_type * 1 = -1)
GROUP BY (f.the_date), (d.site_code)
*************************** 2. row ***************************
tstamp: 2009-07-17 02:43:46
usec: NULL
message: Query:
INSERT INTO test.example2_delta
SELECT (f.dml_type * -1) as dml_type,
LEAST(d.uow_id,f.uow_id) as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((f.dml_type * -1)) as `site_count`,
SUM((f.dml_type * -1) * f.imp_count) as `total_imp`,
SUM((f.dml_type * -1) * f.click_count) as `total_clicks`,
SUM((f.dml_type * -1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((f.dml_type * -1)) as `CNT`
FROM test.dim_uid_mvlog as d
JOIN test.fact_mvlog as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND d.uow_id >58
AND d.uow_id <=67
AND f.uow_id >67
AND f.uow_id <=68
AND (f.dml_type * -1 = 1)
GROUP BY (f.the_date), (d.site_code)
UNION ALL
SELECT (f.dml_type * -1) as dml_type,
LEAST(d.uow_id,f.uow_id) as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((f.dml_type * -1)) as `site_count`,
SUM((f.dml_type * -1) * f.imp_count) as `total_imp`,
SUM((f.dml_type * -1) * f.click_count) as `total_clicks`,
SUM((f.dml_type * -1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((f.dml_type * -1)) as `CNT`
FROM test.dim_uid_mvlog as d
JOIN test.fact_mvlog as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND d.uow_id >58
AND d.uow_id <=67
AND f.uow_id >67
AND f.uow_id <=68
AND (f.dml_type * -1 = -1)
GROUP BY (f.the_date), (d.site_code)
*************************** 3. row ***************************
tstamp: 2009-07-17 02:43:46
usec: NULL
message: Query:
INSERT INTO test.example2_delta
SELECT (f.dml_type * 1) as dml_type,
f.uow_id as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((f.dml_type * 1)) as `site_count`,
SUM((f.dml_type * 1) * f.imp_count) as `total_imp`,
SUM((f.dml_type * 1) * f.click_count) as `total_clicks`,
SUM((f.dml_type * 1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((f.dml_type * 1)) as `CNT`
FROM test.dim_uid as d JOIN test.fact_mvlog as f ON f.uid_id =
d.uid_id
WHERE f.cpc > .0001
AND f.uow_id >58
AND f.uow_id <=67
AND (f.dml_type * 1 = 1)
GROUP BY (f.the_date), (d.site_code)
UNION ALL
SELECT (f.dml_type * 1) as dml_type,
f.uow_id as uow_id, (f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((f.dml_type * 1)) as `site_count`,
SUM((f.dml_type * 1) * f.imp_count) as `total_imp`,
SUM((f.dml_type * 1) * f.click_count) as `total_clicks`,
SUM((f.dml_type * 1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((f.dml_type * 1)) as `CNT`
FROM test.dim_uid as d
JOIN test.fact_mvlog as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND f.uow_id >58
AND f.uow_id <=67
AND (f.dml_type * 1 = -1)
GROUP BY (f.the_date), (d.site_code)
*************************** 4. row ***************************
tstamp: 2009-07-17 02:43:46
usec: NULL
message: Query:
INSERT INTO test.example2_delta
SELECT (d.dml_type * -1) as dml_type,
LEAST(d.uow_id,f.uow_id) as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((d.dml_type * -1)) as `site_count`,
SUM((d.dml_type * -1) * f.imp_count) as `total_imp`,
SUM((d.dml_type * -1) * f.click_count) as `total_clicks`,
SUM((d.dml_type * -1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((d.dml_type * -1)) as `CNT`
FROM test.dim_uid_mvlog as d
JOIN test.fact_mvlog as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND d.uow_id >58
AND d.uow_id <=70
AND f.uow_id >58
AND f.uow_id <=67
AND (d.dml_type * -1 = 1)
GROUP BY (f.the_date), (d.site_code)
UNION ALL
SELECT (d.dml_type * -1) as dml_type,
LEAST(d.uow_id,f.uow_id) as uow_id,
(f.the_date) as `click_date`,
(d.site_code) as `site_code`,
SUM((d.dml_type * -1)) as `site_count`,
SUM((d.dml_type * -1) * f.imp_count) as `total_imp`,
SUM((d.dml_type * -1) * f.click_count) as `total_clicks`,
SUM((d.dml_type * -1) * f.click_count * f.cpc) as
`click_revenue`,
SUM((d.dml_type * -1)) as `CNT`
FROM test.dim_uid_mvlog as d
JOIN test.fact_mvlog as f ON f.uid_id = d.uid_id
WHERE f.cpc > .0001
AND d.uow_id >58
AND d.uow_id <=70
AND f.uow_id >58
AND f.uow_id <=67
AND (d.dml_type * -1 = -1)
GROUP BY (f.the_date), (d.site_code)
4 rows in set (0.00 sec)
*Which produce the following rows in the materialized view delta log:
**
mysql> select * from example2_delta;
*
+----------+--------+------------+-----------+------------
+-----------+--------------+---------------+-----+
| dml_type | uow_id | click_date | site_code | site_count |
total_imp |
total_clicks | click_revenue | CNT |
+----------+--------+------------+-----------+------------
+-----------+--------------+---------------+-----+
| 1 | 82 | 2008-01-01 | AA | 1 | 100
| 75 | 0.375000 | 1 |
| 1 | 82 | 2008-01-02 | AA | 1 | 250
| 100 | 0.752000 | 1 |
| 1 | 83 | 2008-01-01 | AA | 1 | 100
| 75 | 0.375000 | 1 |
| 1 | 84 | 2008-01-02 | AA | 1 | 250
| 100 | 0.752000 | 1 |
| 1 | 81 | 2008-01-02 | XB | 1 | 114232
| 15323 | 63179.977476 | 1 |
| -1 | 81 | 2008-02-01 | XB | -1 | -114232
| -15323 | -63179.977476 | -1 |
| -1 | 82 | 2008-01-01 | AA | -1 | -100
| -75 | -0.375000 | -1 |
| -1 | 82 | 2008-01-02 | AA | -1 | -250
| -100 | -0.752000 | -1 |
+----------+--------+------------+-----------+------------
+-----------+--------------+---------------+-----+
8 rows in set (0.00 sec)*
Then the delta log is applied to the view:
*mysql> call flexviews.refresh(2, 'APPLY')
-> ;
Query OK, 1 row affected, 1 warning (0.08 sec)
*
which runs the following SQL:
*
DELETE test.example2_delta.*,
test.example2.*
FROM test.example2_delta
JOIN test.example2
USING(click_date, site_code)
WHERE example2.CNT + test.example2_delta.CNT=0
INSERT INTO test.example2
SELECT * FROM
(SELECT click_date,
site_code,
site_count,
total_imp,
total_clicks,
click_revenue,
CNT
FROM test.example2_delta
WHERE uow_id > 58
AND uow_id <= 71) x_select_
ON DUPLICATE KEY UPDATE
`click_revenue` = example2.`click_revenue` +
x_select_.`click_revenue`
,`CNT` = example2.`CNT` + x_select_.`CNT`
,`site_count` = example2.`site_count` + x_select_.`site_count`
,`total_clicks` = example2.`total_clicks` + x_select_.`total_clicks`
,`total_imp` = example2.`total_imp` + x_select_.`total_imp`
*After refresh:
real query:
*
SELECT f.the_date click_date,
d.site_code site_code,
count(*) site_count,
sum(f.imp_count) total_imp,
sum(f.click_count) total_clicks,
sum(f.click_count * f.cpc) click_revenue
FROM dim_uid d
JOIN fact f on d.uid_id = f.uid_id
WHERE f.cpc > 0.0001
GROUP BY f.the_date, site_code
ORDER BY click_revenue desc;
+------------+-----------+------------+-----------+--------------
+---------------+
| click_date | site_code | site_count | total_imp | total_clicks |
click_revenue |
+------------+-----------+------------+-----------+--------------
+---------------+
| 2008-01-02 | XB | 1 | 114232 | 15323 |
63179.977476 |
| 2008-01-01 | XB | 1 | 75682 | 1750 |
7424.061750 |
| 2008-01-02 | AA | 1 | 250 | 100 |
0.752000 |
| 2008-01-01 | AA | 1 | 100 | 75 |
0.375000 |
| 2008-01-02 | XC | 1 | 1024 | 256 |
0.084480 |
| 2008-01-01 | XC | 1 | 512 | 64 |
0.013440 |
| 2008-01-02 | XA | 1 | 25 | 15 |
0.003000 |
| 2008-01-01 | XA | 1 | 10 | 5 |
0.002500 |
+------------+-----------+------------+-----------+--------------
+---------------+
8 rows in set (0.00 sec)
*materialized view result:
*mysql> select * from example2 order by click_revenue desc;
+------------+-----------+------------+-----------+--------------
+---------------+-----+
| click_date | site_code | site_count | total_imp | total_clicks |
click_revenue | CNT |
+------------+-----------+------------+-----------+--------------
+---------------+-----+
| 2008-01-02 | XB | 1 | 114232 | 15323 |
63179.977476 | 1 |
| 2008-01-01 | XB | 1 | 75682 | 1750 |
7424.061750 | 1 |
| 2008-01-02 | AA | 1 | 250 | 100 |
0.752000 | 1 |
| 2008-01-01 | AA | 1 | 100 | 75 |
0.375000 | 1 |
| 2008-01-02 | XC | 1 | 1024 | 256 |
0.084480 | 1 |
| 2008-01-01 | XC | 1 | 512 | 64 |
0.013440 | 1 |
| 2008-01-02 | XA | 1 | 25 | 15 |
0.003000 | 1 |
| 2008-01-01 | XA | 1 | 10 | 5 |
0.002500 | 1 |
+------------+-----------+------------+-----------+--------------
+---------------+-----+
8 rows in set (0.00 sec)
Tada. View materialization with joins, and you can change fact and
dimension tables. That was simple wasn't it? :D
------------------------------------------------------------------------
_______________________________________________
Mailing list: https://launchpad.net/~drizzle-discuss
Post to : [email protected]
Unsubscribe : https://launchpad.net/~drizzle-discuss
More help : https://help.launchpad.net/ListHelp