http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89288

Revision: 89288
Author:   ezachte
Date:     2011-06-01 21:09:12 +0000 (Wed, 01 Jun 2011)
Log Message:
-----------
added view and prepared statements

Modified Paths:
--------------
    trunk/wikistats/analytics/create_and_use_db_analytics.txt

Modified: trunk/wikistats/analytics/create_and_use_db_analytics.txt
===================================================================
--- trunk/wikistats/analytics/create_and_use_db_analytics.txt   2011-06-01 
20:55:27 UTC (rev 89287)
+++ trunk/wikistats/analytics/create_and_use_db_analytics.txt   2011-06-01 
21:09:12 UTC (rev 89288)
@@ -1,4 +1,5 @@
 -- invoke this file with "mysql --user=root --password=[...] < 
create_and_use_db_analytics.txt"
+-- for test queries make sure to delete output files *test*.csv first (MySQL 
on purpose forbids overwrite)
 
 -- tables implemented:
 --   comscore
@@ -22,6 +23,7 @@
 
 -- Create database and two tables from scratch 
 DROP DATABASE IF EXISTS `analytics` ;
+
 CREATE DATABASE `analytics` ;
 
 USE `analytics` ;
@@ -51,9 +53,9 @@
 
 CREATE TABLE `comscore_regions` (
   `region_code`     char (2), 
-  `target_language` char (10),
+  `report_language` char (10),
   `region_name`     char (18),
-  PRIMARY KEY (target_language,region_code)
+  PRIMARY KEY (report_language,region_code)
 ) ;  
 
 CREATE TABLE `wikistats` (
@@ -95,10 +97,10 @@
 ) ;
 
 CREATE TABLE `language_names` (
-  `target_language`             char (15),   
+  `report_language`             char (15),   
   `language_code`               char (15),
   `language_name`               char (50),
-  PRIMARY KEY (target_language,language_code)
+  PRIMARY KEY (report_language,language_code)
 ) ;
 
 -- SHOW TABLES ;
@@ -111,90 +113,117 @@
 -- Database Manipulation
 -- Obviously in real world this is a separate script
 
--- show contents (debugging only) 
--- SELECT * 
---      FROM comscore_regions ;
-
-
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_comscore.csv' 
+LOAD DATA LOCAL INFILE 'c:/MySQL/analytics/analytics_in_comscore.csv' 
      INTO TABLE comscore 
      FIELDS TERMINATED BY ',' 
      OPTIONALLY ENCLOSED BY '"' 
      (yyyymm,country_code,region_code,property,project,reach,visitors) ;
 
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_comscore_regions.csv' 
+LOAD DATA LOCAL INFILE 'c:/MySQL/analytics/analytics_in_comscore_regions.csv' 
      INTO TABLE comscore_regions 
      FIELDS TERMINATED BY ',' 
      OPTIONALLY ENCLOSED BY '"' 
-     (target_language,region_code,region_name) ;
+     (report_language,region_code,region_name) ;
 
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_wikistats.csv' 
+-- show contents (debugging only) 
+SELECT * 
+     FROM comscore_regions ;
+
+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv' 
      INTO TABLE wikistats 
      FIELDS TERMINATED BY ',' 
      OPTIONALLY ENCLOSED BY '"' 
      
(project,language_code,yyyymm,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
 ;
 
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_page_views.csv' 
+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv' 
      INTO TABLE page_views 
      FIELDS TERMINATED BY ',' 
      OPTIONALLY ENCLOSED BY '"' 
      
(project,language_code,yyyymm,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized)
 ;
 
-LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_language_names.csv' 
+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv' 
      INTO TABLE language_names 
      FIELDS TERMINATED BY ',' 
      OPTIONALLY ENCLOSED BY '"' 
-     (target_language,language_code,language_name) ;
+     (report_language,language_code,language_name) ;
 
--- show contents (debugging only) 
-SELECT * 
-       FROM comscore 
-       ORDER BY yyyymm,country_code,region_code,property,project 
-       INTO OUTFILE 'c:/MySQL/analytics/analytics_out_comscore_full_table.csv' 
-       FIELDS TERMINATED BY ',' ; 
-
--- test query (to be described in detail)
--- make sure to delete output file first 
-SELECT yyyymm, project, page_views.language_code, language_name, 
views_non_mobile_normalized, views_mobile_normalized 
+-- on views procedures and functions see 
http://www.paragoncorporation.com/ArticleDetail.aspx?ArticleID=28
+-- view makes join implicit and adds aggregated fields (and provides default 
order, can be overidden)
+CREATE VIEW page_views_v AS  
+       SELECT yyyymm, project, page_views.language_code, language_name, 
+              views_non_mobile_raw, views_mobile_raw , 
+              views_non_mobile_normalized, views_mobile_normalized, 
+              views_non_mobile_raw+views_mobile_raw AS views_raw, 
+              views_non_mobile_normalized+views_mobile_normalized AS 
views_normalized,
+              language_names.report_language 
        FROM page_views LEFT JOIN language_names 
        ON page_views.language_code = language_names.language_code 
+       ORDER BY yyyymm,project,language_name ;
+
+PREPARE page_views_s1
+       FROM
+       "SELECT yyyymm,project,language_code,language_name,views_normalized
+       FROM page_views_v 
        WHERE (project = 'wp')  AND 
              (yyyymm BETWEEN '2011-03' AND '2011-05') AND 
-             (page_views.language_code = 'nl') AND 
-             (language_names.target_language = 'en') 
-       ORDER BY project,language_name,yyyymm
+             (language_code = 'nl') AND
+             (report_language=?)                     -- parameter needed
        INTO OUTFILE 'c:/MySQL/analytics/analytics_out_page_views_test1.csv' 
-       FIELDS TERMINATED BY ',' ; 
-       
--- expected result:       
--- 2011-03,wp,nl,Dutch,164544225,4994050
--- 2011-04,wp,nl,Dutch,153550168,5427629
--- 2011-05,wp,nl,Dutch,1957423,73106 
+       FIELDS TERMINATED BY ','" ; 
 
--- test query (to be described in detail)
--- make sure to delete output file first 
-SELECT yyyymm, project, page_views.language_code, language_name, 
views_non_mobile_normalized, views_mobile_normalized 
-       FROM page_views LEFT JOIN language_names 
-       ON page_views.language_code = language_names.language_code 
-       WHERE (project = 'wp')  AND 
-             (yyyymm BETWEEN '2011-03' AND '2011-05') AND 
-             (page_views.language_code = 'nl') AND 
-             (language_names.target_language = 'de') 
-       ORDER BY project,language_name,yyyymm
+SET @report_language = 'en' ; -- used as parameter in prepared statements ; 
for tests databasee contains translations for 'en' and 'de'
+
+EXECUTE page_views_s1 USING @report_language ;
+
+SET @project = 'wp' ;
+SET @yyyymm_first = '2011-03' ;
+SET @yyyymm_last  = '2011-05' ;
+SET @language_code = 'nl' ;
+SET @report_language = 'de' ; 
+
+PREPARE page_views_s2
+       FROM
+       "SELECT yyyymm,project,language_code,language_name,views_normalized
+       FROM page_views_v 
+       WHERE (project = ?) AND  
+             (yyyymm BETWEEN ? AND ?) AND
+             (language_code = ?) AND
+             (report_language = ?)      
        INTO OUTFILE 'c:/MySQL/analytics/analytics_out_page_views_test2.csv' 
-       FIELDS TERMINATED BY ',' ; 
-       
--- expected result:       
--- 2011-03,wp,nl,Niederländisch,164544225,4994050
--- 2011-04,wp,nl,Niederländisch,153550168,5427629
--- 2011-05,wp,nl,Niederländisch,1957423,73106 
+       FIELDS TERMINATED BY ','" ; 
 
--- export all relevant non-meta data from comsCore's reach by region (and 
comScore treats India and China as regions in this context)
--- make sure to delete output file first 
-SELECT yyyymm,region_name,reach
-       FROM comscore LEFT JOIN comscore_regions 
-       ON comscore.region_code = comscore_regions.region_code AND 
comscore_regions.target_language = 'en'
-       WHERE (region_name != '') AND (yyyymm BETWEEN '2011-03' AND '2011-05') 
-       ORDER BY yyyymm,region_name 
-       INTO OUTFILE 
'c:/MySQL/analytics/analytics_out_comscore_reach_test1.csv' 
-       FIELDS TERMINATED BY ',' ; 
+EXECUTE page_views_s2 USING @project, @yyyymm_first, @yyyymm_last, 
@language_code, @report_language ;
+
+
+/*
+-- in progress: tests with using procedure to make invocation more flexible 
than with prepared statement
+-- prep stats expects all parameters predefined 
+
+-- could the following be done in MySQL: a variable where clause with variable 
number of variables ? (seems not exactly like in this example for other DMRS)
+-- http://www.sqlteam.com/article/implementing-a-dynamic-where-clause
+
+DROP PROCEDURE IF EXISTS page_views_p1 ;
+DELIMITER // 
+CREATE PROCEDURE page_views_p1 (parm_project VARCHAR(30))
+       SELECT yyyymm,project,language_code,language_name,views_normalized
+       FROM page_views_v 
+       WHERE (project = parm_project) ; 
+       //
+DELIMITER ;
+
+-- CALL page_views_p1 ('wk') ;       
+
+delimiter // 
+CREATE PROCEDURE dynamic(IN tbl CHAR(64), IN col CHAR(64))
+BEGIN
+    SET @s = CONCAT('SELECT ',col,' FROM ',tbl) ;
+    PREPARE stmt FROM @s;
+    EXECUTE stmt;
+END    
+//
+delimiter ;
+CALL dynamic ('comscore', 'yyyymm')
+*/
+
+
+


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to