http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89288
Revision: 89288
Author: ezachte
Date: 2011-06-01 21:09:12 +0000 (Wed, 01 Jun 2011)
Log Message:
-----------
added view and prepared statements
Modified Paths:
--------------
trunk/wikistats/analytics/create_and_use_db_analytics.txt
Modified: trunk/wikistats/analytics/create_and_use_db_analytics.txt
===================================================================
--- trunk/wikistats/analytics/create_and_use_db_analytics.txt 2011-06-01
20:55:27 UTC (rev 89287)
+++ trunk/wikistats/analytics/create_and_use_db_analytics.txt 2011-06-01
21:09:12 UTC (rev 89288)
@@ -1,4 +1,5 @@
-- invoke this file with "mysql --user=root --password=[...] <
create_and_use_db_analytics.txt"
+-- for test queries make sure to delete output files *test*.csv first (MySQL
on purpose forbids overwrite)
-- tables implemented:
-- comscore
@@ -22,6 +23,7 @@
-- Create database and two tables from scratch
DROP DATABASE IF EXISTS `analytics` ;
+
CREATE DATABASE `analytics` ;
USE `analytics` ;
@@ -51,9 +53,9 @@
CREATE TABLE `comscore_regions` (
`region_code` char (2),
- `target_language` char (10),
+ `report_language` char (10),
`region_name` char (18),
- PRIMARY KEY (target_language,region_code)
+ PRIMARY KEY (report_language,region_code)
) ;
CREATE TABLE `wikistats` (
@@ -95,10 +97,10 @@
) ;
CREATE TABLE `language_names` (
- `target_language` char (15),
+ `report_language` char (15),
`language_code` char (15),
`language_name` char (50),
- PRIMARY KEY (target_language,language_code)
+ PRIMARY KEY (report_language,language_code)
) ;
-- SHOW TABLES ;
@@ -111,90 +113,117 @@
-- Database Manipulation
-- Obviously in real world this is a separate script
--- show contents (debugging only)
--- SELECT *
--- FROM comscore_regions ;
-
-
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_comscore.csv'
+LOAD DATA LOCAL INFILE 'c:/MySQL/analytics/analytics_in_comscore.csv'
INTO TABLE comscore
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
(yyyymm,country_code,region_code,property,project,reach,visitors) ;
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_comscore_regions.csv'
+LOAD DATA LOCAL INFILE 'c:/MySQL/analytics/analytics_in_comscore_regions.csv'
INTO TABLE comscore_regions
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
- (target_language,region_code,region_name) ;
+ (report_language,region_code,region_name) ;
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_wikistats.csv'
+-- show contents (debugging only)
+SELECT *
+ FROM comscore_regions ;
+
+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
INTO TABLE wikistats
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
(project,language_code,yyyymm,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
;
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_page_views.csv'
+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
INTO TABLE page_views
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
(project,language_code,yyyymm,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized)
;
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_language_names.csv'
+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
INTO TABLE language_names
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
- (target_language,language_code,language_name) ;
+ (report_language,language_code,language_name) ;
--- show contents (debugging only)
-SELECT *
- FROM comscore
- ORDER BY yyyymm,country_code,region_code,property,project
- INTO OUTFILE 'c:/MySQL/analytics/analytics_out_comscore_full_table.csv'
- FIELDS TERMINATED BY ',' ;
-
--- test query (to be described in detail)
--- make sure to delete output file first
-SELECT yyyymm, project, page_views.language_code, language_name,
views_non_mobile_normalized, views_mobile_normalized
+-- on views procedures and functions see
http://www.paragoncorporation.com/ArticleDetail.aspx?ArticleID=28
+-- view makes join implicit and adds aggregated fields (and provides default
order, can be overidden)
+CREATE VIEW page_views_v AS
+ SELECT yyyymm, project, page_views.language_code, language_name,
+ views_non_mobile_raw, views_mobile_raw ,
+ views_non_mobile_normalized, views_mobile_normalized,
+ views_non_mobile_raw+views_mobile_raw AS views_raw,
+ views_non_mobile_normalized+views_mobile_normalized AS
views_normalized,
+ language_names.report_language
FROM page_views LEFT JOIN language_names
ON page_views.language_code = language_names.language_code
+ ORDER BY yyyymm,project,language_name ;
+
+PREPARE page_views_s1
+ FROM
+ "SELECT yyyymm,project,language_code,language_name,views_normalized
+ FROM page_views_v
WHERE (project = 'wp') AND
(yyyymm BETWEEN '2011-03' AND '2011-05') AND
- (page_views.language_code = 'nl') AND
- (language_names.target_language = 'en')
- ORDER BY project,language_name,yyyymm
+ (language_code = 'nl') AND
+ (report_language=?) -- parameter needed
INTO OUTFILE 'c:/MySQL/analytics/analytics_out_page_views_test1.csv'
- FIELDS TERMINATED BY ',' ;
-
--- expected result:
--- 2011-03,wp,nl,Dutch,164544225,4994050
--- 2011-04,wp,nl,Dutch,153550168,5427629
--- 2011-05,wp,nl,Dutch,1957423,73106
+ FIELDS TERMINATED BY ','" ;
--- test query (to be described in detail)
--- make sure to delete output file first
-SELECT yyyymm, project, page_views.language_code, language_name,
views_non_mobile_normalized, views_mobile_normalized
- FROM page_views LEFT JOIN language_names
- ON page_views.language_code = language_names.language_code
- WHERE (project = 'wp') AND
- (yyyymm BETWEEN '2011-03' AND '2011-05') AND
- (page_views.language_code = 'nl') AND
- (language_names.target_language = 'de')
- ORDER BY project,language_name,yyyymm
+SET @report_language = 'en' ; -- used as parameter in prepared statements ;
for tests databasee contains translations for 'en' and 'de'
+
+EXECUTE page_views_s1 USING @report_language ;
+
+SET @project = 'wp' ;
+SET @yyyymm_first = '2011-03' ;
+SET @yyyymm_last = '2011-05' ;
+SET @language_code = 'nl' ;
+SET @report_language = 'de' ;
+
+PREPARE page_views_s2
+ FROM
+ "SELECT yyyymm,project,language_code,language_name,views_normalized
+ FROM page_views_v
+ WHERE (project = ?) AND
+ (yyyymm BETWEEN ? AND ?) AND
+ (language_code = ?) AND
+ (report_language = ?)
INTO OUTFILE 'c:/MySQL/analytics/analytics_out_page_views_test2.csv'
- FIELDS TERMINATED BY ',' ;
-
--- expected result:
--- 2011-03,wp,nl,Niederländisch,164544225,4994050
--- 2011-04,wp,nl,Niederländisch,153550168,5427629
--- 2011-05,wp,nl,Niederländisch,1957423,73106
+ FIELDS TERMINATED BY ','" ;
--- export all relevant non-meta data from comsCore's reach by region (and
comScore treats India and China as regions in this context)
--- make sure to delete output file first
-SELECT yyyymm,region_name,reach
- FROM comscore LEFT JOIN comscore_regions
- ON comscore.region_code = comscore_regions.region_code AND
comscore_regions.target_language = 'en'
- WHERE (region_name != '') AND (yyyymm BETWEEN '2011-03' AND '2011-05')
- ORDER BY yyyymm,region_name
- INTO OUTFILE
'c:/MySQL/analytics/analytics_out_comscore_reach_test1.csv'
- FIELDS TERMINATED BY ',' ;
+EXECUTE page_views_s2 USING @project, @yyyymm_first, @yyyymm_last,
@language_code, @report_language ;
+
+
+/*
+-- in progress: tests with using procedure to make invocation more flexible
than with prepared statement
+-- prep stats expects all parameters predefined
+
+-- could the following be done in MySQL: a variable where clause with variable
number of variables ? (seems not exactly like in this example for other DMRS)
+-- http://www.sqlteam.com/article/implementing-a-dynamic-where-clause
+
+DROP PROCEDURE IF EXISTS page_views_p1 ;
+DELIMITER //
+CREATE PROCEDURE page_views_p1 (parm_project VARCHAR(30))
+ SELECT yyyymm,project,language_code,language_name,views_normalized
+ FROM page_views_v
+ WHERE (project = parm_project) ;
+ //
+DELIMITER ;
+
+-- CALL page_views_p1 ('wk') ;
+
+delimiter //
+CREATE PROCEDURE dynamic(IN tbl CHAR(64), IN col CHAR(64))
+BEGIN
+ SET @s = CONCAT('SELECT ',col,' FROM ',tbl) ;
+ PREPARE stmt FROM @s;
+ EXECUTE stmt;
+END
+//
+delimiter ;
+CALL dynamic ('comscore', 'yyyymm')
+*/
+
+
+
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs