https://www.mediawiki.org/wiki/Special:Code/MediaWiki/110549

Revision: 110549
Author:   ezachte
Date:     2012-02-02 02:57:43 +0000 (Thu, 02 Feb 2012)
Log Message:
-----------
reactivated hourly counts + updates in page view counts (also count mobile 
counts for report card sortable array) 

Modified Paths:
--------------
    trunk/wikistats/dumps/WikiCountsSummarizeProjectCounts.pl

Modified: trunk/wikistats/dumps/WikiCountsSummarizeProjectCounts.pl
===================================================================
--- trunk/wikistats/dumps/WikiCountsSummarizeProjectCounts.pl   2012-02-02 
02:52:22 UTC (rev 110548)
+++ trunk/wikistats/dumps/WikiCountsSummarizeProjectCounts.pl   2012-02-02 
02:57:43 UTC (rev 110549)
@@ -14,7 +14,7 @@
 
 # to do: AdjustForMissingFilesAndUndercountedMonths for week and day level 
files
 
-# Added May 2001: 
+# Added May 2001:
 # For analytics database one file is written for all projects and languages 
combined,
 # with per month, not normalized and normalized page view counts in one row.
 
@@ -27,8 +27,14 @@
   ez_lib_version (4) ;
 
   # set defaults mainly for tests on local machine
-  default_argv "-i 'w:/# In Dammit.lt/projectcounts'|-o 'w:/# out test'" ;
+  default_argv "-i 'w:/# In Dammit.lt/projectcounts/test_in'|-o 'w:/# In 
Dammit.lt/projectcounts/test_out'" ;
 
+  # by default process up to and including last completed month,
+  # to recreate older stats, set following variables which will be used 
instead of system time
+  # $assume_current_year  = 2012 ;
+  # $assume_current_month = 1 ;
+  # (to do: make this scripts parameters)
+
   $| = 1; # flush screen output
   $true  = 1 ;
   $false = 0 ;
@@ -102,7 +108,7 @@
       # $totals_project_month_split {$project2} {$date} += $count ;
       # if ($totals_project_month_split {$project2} {$date} > 
$totals_project_month_split_max {$project2})
       # { $totals_project_month_split_max {$project2} = 
$totals_project_month_split {$project2} {$date} ; }
-#qqq
+
       $totals_project_month_combined {"$project3"} {$date} += $count ;
       if ($totals_project_month_combined {$project3} {$date} > 
$totals_project_month_combined_max {$project3})
       { $totals_project_month_combined_max {$project3} = 
$totals_project_month_combined {$project3} {$date} ; }
@@ -315,8 +321,11 @@
   my ($month,$year) = (localtime(time))[4,5] ;
   my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
 
-# $year  = 111 ;
-# $month = 3 ;
+  # by default process up to and including last completed month, may be 
overruled here
+  if ($assume_current_year ne '')
+  { $year = $assume_current_year - 1900 ; }
+  if ($assume_current_month ne '')
+  { $month = $assume_current_month - 1 ; }
 
   $year_now  = $year + 1900 ;
   $month_now = $month + 1 ;
@@ -331,7 +340,7 @@
   ($year,$month) = $month > 0 ? ($year,$month-1) : ($year-1,11) ;
   $month_0_minus_1 = sprintf ("%04d/%02d",$year+1900,$month+1) ;
 
-  print "\nWrite trend data up till month: $month_0\n\n" ;
+  print "\nWrite trend data up till month: $month_0 to $month_0_file\n\n" ;
   print "Compare with previous month: $month_0_minus_1, previous year: 
$month_0_minus_12\n\n" ;
 
 #  $csv_recent_months = "project," ;
@@ -446,6 +455,8 @@
 
       next if $file ge "projectcounts-20100611-000000" and $file lt 
"projectcounts-20100617-000000" ; # bad measurements on these dates
       next if $file ge "projectcounts-20100627-000000" and $file lt 
"projectcounts-20100628-000000" ; # bad measurements on these dates
+      next if $file ge "projectcounts-20110908-000000" and $file lt 
"projectcounts-20110915-000000" ; # bad measurements on these dates
+      next if $file ge "projectcounts-20111223-010000" and $file lt 
"projectcounts-20111226-160000" ; # bad measurements on these dates
 
       push @files, $file ;
       $file_in_tar {$file} = $file_in ;
@@ -556,13 +567,14 @@
         print "Month $period: $processed processed, $missing missing -> 
rescale * $rescale\n" ;
       }
 
+      # summer 2010: correct for data loss (percentages derived from widened 
gaps in squid log sequence numbers)
          if ($period eq '2010/04') { $rescale2 = 1.241 ; }
       elsif ($period eq '2010/05') { $rescale2 = 1.310 ; }
       elsif ($period eq '2010/06') { $rescale2 = 1.328 ; }
       elsif ($period eq '2010/07') { $rescale2 = 1.295 ; }
 
       if ($rescale2 != 1)
-      { print "Month $period: rescale * $rescale2 to compensate for missed UDP 
messages at locke\n" ; }
+      { print "Month $period: rescale * $rescale2 to compensate for missed UDP 
messages at squid log processing servers\n" ; }
 
       next if $rescale == 1 and $rescale2 == 1 ;
 
@@ -650,6 +662,7 @@
     $hours_processed {"weekday"} {"$weekday"} ++ ;
 
     $tar_file = $file_in_tar {$file} ;
+
     if ($tar_file ne $tar_file_prev)
     {
       $tar->read($tar_file);
@@ -717,7 +730,7 @@
 # print "$project $language $year/$month: " . $totals {"month"} 
{$project}{"$language,$year/$month"} . "\n" ;
       $totals {"week"}    {$project} {"$language,$year,$week"} += $count ;
       $totals {"day"}     {$project} {"$language,$year/$month/$day"} += $count 
;
-    # $totals {"hour"}    {$project} {"$language,$year/$month/$day,$hour"} = 
$count ; # huge file, reactivate when really used
+      $totals {"hour"}    {$project} {"$language,$year/$month/$day,$hour"} = 
$count ; # huge file, reactivate when really used
       $totals {"weekday"} {$project} {"$language,$weekday"} += $count ;
 
       if ("$year/$month" eq $month_0) # determines sort order, no need to 
rescale for missing projectcount files
@@ -798,11 +811,11 @@
       { $file_csv =~ s/\.csv/Normalized.csv/ ; }
       &Log ("File out: $file_out\n") ;
 
-      if (-e "$dir_out/PageViewsPerHourAll.csv") # huge file, remove for now, 
reactivate when really used
-      {
-        print "unlink $dir_out/PageViewsPerHourAll.csv (reactivate when really 
used)\n" ;
-        unlink "$dir_out/PageViewsPerHourAll.csv" ;
-      }
+      # if (-e "$dir_out/PageViewsPerHourAll.csv") # huge file, remove for 
now, reactivate when really used
+      # {
+      #   print "unlink $dir_out/PageViewsPerHourAll.csv (reactivate when 
really used)\n" ;
+      #   unlink "$dir_out/PageViewsPerHourAll.csv" ;
+      # }
 
       &Log ("File csv: $file_csv\n") ;
       open CSV, ">", $file_csv ;
@@ -938,12 +951,13 @@
 
 # %test = %{$totals {"month"} {"wp"} };
 # %test2 = @recent_months ;
+
     for ($m = 0 ; $m < $months_recent ; $m++)
     {
       print CSV ($totals {"month"} {$project} {"$language,${recent_months 
[$m]}"} +
                  $totals {"month"} {$project} {"$language\.m,${recent_months 
[$m]}"}) . "," ;
-      print     ($totals {"month"} {$project} {"$language,${recent_months 
[$m]}"} +
-                 $totals {"month"} {$project} {"$language\.m,${recent_months 
[$m]}"}) . "," ;
+      print      $totals {"month"} {$project} {"$language,${recent_months 
[$m]}"} +
+                 $totals {"month"} {$project} {"$language\.m,${recent_months 
[$m]}"} . "," ;
     }
 
     if (($project ne "wp") && ($project ne "wx"))
@@ -1215,6 +1229,8 @@
 
   close CSV ;
 
+  my (%growth_figures_text,%growth_figures_html) ;
+
   # write ready made table rows for report card: page views top 25 movers 
shakers
   foreach $key (keys %largest_projects)
   {
@@ -1222,9 +1238,9 @@
 
     next if $language =~ /\.m/ ; # skip mobile for now
 
-    $total_lastmonth = $totals {"month"} {$project} {"$language,$month_0"} ;
-    $total_prevmonth = $totals {"month"} {$project} 
{"$language,$month_0_minus_1"} ;
-    $total_prevyear  = $totals {"month"} {$project} 
{"$language,$month_0_minus_12"} ;
+    $total_lastmonth = $totals {"month"} {$project} {"$language,$month_0"}     
     + $totals {"month"} {$project} {"$language\.m,$month_0"} ;
+    $total_prevmonth = $totals {"month"} {$project} 
{"$language,$month_0_minus_1"}  + $totals {"month"} {$project} 
{"$language\.m,$month_0_minus_1"};
+    $total_prevyear  = $totals {"month"} {$project} 
{"$language,$month_0_minus_12"} + $totals {"month"} {$project} 
{"$language\.m,$month_0_minus_12"};
 
     $perc_month = "no data" ;
     $perc_year  = "no data" ;


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to