Erik Zachte has uploaded a new change for review.
https://gerrit.wikimedia.org/r/88973
Change subject: use different input/output folders
......................................................................
use different input/output folders
Change-Id: I31a12b92f0e8bc11f59cd0a886e8bec93f9ea0d3
---
M dumps/bash/sort_dblists.sh
M dumps/perl/WikiCountsSortDblist.pl
2 files changed, 64 insertions(+), 18 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/wikistats
refs/changes/73/88973/1
diff --git a/dumps/bash/sort_dblists.sh b/dumps/bash/sort_dblists.sh
index d750c96..c02721d 100755
--- a/dumps/bash/sort_dblists.sh
+++ b/dumps/bash/sort_dblists.sh
@@ -3,15 +3,59 @@
wikistats=/a/wikistats_git
dumps=$wikistats/dumps
perl=$dumps/perl
+perl=/home/ezachte/wikistats/dumps/perl # tests
csv=$dumps/csv
dblists=$dumps/dblists
+# dblists are
+
+# dblists are maintained manually:
+# Once a private wiki got added to original dblist file inadvertently,
+# which required major cleanup operation across csv files,
+# so I'd rather vet new wiki codes myself (EZ).
+
+# Update Jan 2012:
+# As these files are updated by script (to sort wikis by size on each run)
+# which causes git warnings, there is now a folder 'master copy'
+
+# Update Jan 2013:
+# Script now uses master copy as input and writes sorted list to folder where
wikistats reads them
+# Oops, I shouldn't have used a space in folder name 'master copy', oh well
+
cd $perl
-perl WikiCountsSortDblist.pl -c $csv/csv_wb/StatisticsLog.csv -d
$dblists/wikibooks.dblist -s wikibooks
-perl WikiCountsSortDblist.pl -c $csv/csv_wk/StatisticsLog.csv -d
$dblists/wiktionary.dblist -s wiktionary
-perl WikiCountsSortDblist.pl -c $csv/csv_wn/StatisticsLog.csv -d
$dblists/wikinews.dblist -s wikinews
-perl WikiCountsSortDblist.pl -c $csv/csv_wp/StatisticsLog.csv -d
$dblists/wikipedia.dblist -s wiki
-perl WikiCountsSortDblist.pl -c $csv/csv_wq/StatisticsLog.csv -d
$dblists/wikiquote.dblist -s wikiquote
-perl WikiCountsSortDblist.pl -c $csv/csv_ws/StatisticsLog.csv -d
$dblists/wikisource.dblist -s wikisource
-perl WikiCountsSortDblist.pl -c $csv/csv_wv/StatisticsLog.csv -d
$dblists/wikiversity.dblist -s wikiversity
-perl WikiCountsSortDblist.pl -c $csv/csv_wx/StatisticsLog.csv -d
$dblists/special.dblist -s wiki
+perl WikiCountsSortDblist.pl -c $csv/csv_wb/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikibooks.dblist \
+ -o $dblists/wikibooks.dblist \
+ -s wikibooks
+perl WikiCountsSortDblist.pl -c $csv/csv_wk/StatisticsLog.csv \
+ -i $dblists/master\ copy/wiktionary.dblist \
+ -o $dblists/wiktionary.dblist \
+ -s wiktionary
+perl WikiCountsSortDblist.pl -c $csv/csv_wn/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikinews.dblist \
+ -o $dblists/wikinews.dblist \
+ -s wikinews
+perl WikiCountsSortDblist.pl -c $csv/csv_wo/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikivoyage.dblist \
+ -o $dblists/wikivoyage.dblist \
+ -s wikivoyage
+perl WikiCountsSortDblist.pl -c $csv/csv_wp/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikipedia.dblist \
+ -o $dblists/wikipedia.dblist \
+ -s wiki
+perl WikiCountsSortDblist.pl -c $csv/csv_wq/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikiquote.dblist \
+ -o $dblists/wikiquote.dblist \
+ -s wikiquote
+perl WikiCountsSortDblist.pl -c $csv/csv_ws/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikisource.dblist \
+ -o $dblists/wikisource.dblist \
+ -s wikisource
+perl WikiCountsSortDblist.pl -c $csv/csv_wv/StatisticsLog.csv \
+ -i $dblists/master\ copy/wikiversity.dblist \
+ -o $dblists/wikiversity.dblist \
+ -s wikiversity
+perl WikiCountsSortDblist.pl -c $csv/csv_wx/StatisticsLog.csv \
+ -i $dblists/master\ copy/special.dblist \
+ -o $dblists/special.dblist \
+ -s wiki
diff --git a/dumps/perl/WikiCountsSortDblist.pl
b/dumps/perl/WikiCountsSortDblist.pl
old mode 100755
new mode 100644
index e27bdd0..08c3598
--- a/dumps/perl/WikiCountsSortDblist.pl
+++ b/dumps/perl/WikiCountsSortDblist.pl
@@ -2,26 +2,28 @@
use Getopt::Std ;
- getopt ("cds", \%options) ;
+ getopt ("cios", \%options) ;
- die ("Specify dblist file as: -d path") if (! defined (@options {"d"})) ;
+ die ("Specify dblist file in as: -i path") if (! defined (@options
{"i"})) ;
+ die ("Specify dblist file out as: -o path") if (! defined (@options
{"o"})) ;
die ("Specify path for StatisticsLog.csv: -c path") if (! defined (@options
{"c"})) ;
- die ("Specify suffix: -s suffix") if (! defined (@options {"s"})) ;
+ die ("Specify suffix: -s suffix") if (! defined (@options
{"s"})) ;
- $file_csv = @options {"c"} ;
- $file_dblist = @options {"d"} ;
- $suffix = @options {"s"} ;
+ $file_csv = @options {"c"} ;
+ $file_dblist_in = @options {"i"} ;
+ $file_dblist_out = @options {"o"} ;
+ $suffix = @options {"s"} ;
# local test only:
# $file_csv = "dblists/StatisticsLog.csv" ;
# $file_dblist = "dblists/wikinews.dblist" ;
# $suffix = "wikinews" ;
- if (! -e $file_csv) { die "csv file '$file_csv' not found" ; }
- if (! -e $file_dblist) { die "dblist file '$file_dblist' not found" ; }
+ if (! -e $file_csv) { die "csv file '$file_csv' not found" ; }
+ if (! -e $file_dblist_in) { die "dblist file '$file_dblist_in' not found" ; }
print "\n\nSort dblist $file_dblist\nProcessing last dump took x
seconds:\n\n" ;
- open DBLIST, '<', $file_dblist ;
+ open DBLIST, '<', $file_dblist_in ;
@dblist = <DBLIST> ;
foreach $db (@dblist)
{
@@ -66,7 +68,7 @@
rename $file_dblist, $file_dblist.".bak" ;
$lines = 0 ;
- open DBLIST, '>', $file_dblist ;
+ open DBLIST, '>', $file_dblist_out || die "File '$file_dblist_out' could not
be written" ;
foreach $db (@dblist)
{
$rank = $wiki_rank {$db} ;
--
To view, visit https://gerrit.wikimedia.org/r/88973
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I31a12b92f0e8bc11f59cd0a886e8bec93f9ea0d3
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikistats
Gerrit-Branch: master
Gerrit-Owner: Erik Zachte <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits