scripts/os-stats.pl | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+)
New commits: commit 6f1d1f6212bff5576822e94de6f8d6e695e07377 Author: Michael Meeks <[email protected]> Date: Wed Oct 14 14:51:56 2015 +0100 Script to crunch os statistics. diff --git a/scripts/os-stats.pl b/scripts/os-stats.pl new file mode 100755 index 0000000..ff889ea --- /dev/null +++ b/scripts/os-stats.pl @@ -0,0 +1,204 @@ +#!/usr/bin/perl -w + +# +# A script to attempt to determine OS versions from user-agent strings. +# + +use strict; +use POSIX; + +my %global_date_to_epoch; + +sub ymd_to_epoch($$$) +{ + my ($year, $month, $day) = @_; + + my $key = "$year-$month-$day"; + + if ( ! defined $global_date_to_epoch{$key} ) { + # 1970-01-01 is Thursday, add 3 days (259200 seconds), and divide + my $seconds = POSIX::strftime( "%s", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual + + # remember the ISO week + my $week = POSIX::strftime( "%G-%V", 0, 0, 12, $day, $month - 1, $year - 1900 ); # see the manual + $global_date_to_epoch{$key} = $week; + } + return $global_date_to_epoch{$key}; +} + +# de-mangle windows user-agents +my %win_ver_hash = ( + 'NT 10.0' => 'Windows 10', + 'NT 9.0' => 'Windows 9', + 'NT 6.3' => 'Windows 8.1', + 'NT 6.2' => 'Windows 8', + 'NT 7.0' => 'Windows 7', + 'NT 6.1' => 'Windows 7', + 'NT 6.0' => 'Windows Vista', + 'NT 5.2' => 'Windows Server 2003', + 'NT 5.1' => 'Windows XP', + 'NT 5.01' => 'Windows 2000 SP1', + 'NT 5.0' => 'Windows 2000', + 'NT 4.0' => 'Windows NT 4.0', + '98; Win 9x 4.90' => 'Windows ME', + '98' => 'Windows 98', + '95' => 'Windows 95', + 'CE' => 'Windows CE', + ); + +sub win_real_ver($) +{ + my $vin = shift; + my $vout = $win_ver_hash{$vin}; + if (!defined $vout) { + $vout = 'Windows other'; + } + return $vout; +} + +my %month_to_num = ( + 'Jan' => '01', + 'Feb' => '02', + 'Mar' => '03', + 'Apr' => '04', + 'May' => '05', + 'Jun' => '06', + 'Jul' => '07', + 'Aug' => '08', + 'Sep' => '09', + 'Oct' => '10', + 'Nov' => '11', + 'Dec' => '12', +); + +my %totals; +my %breakdown_by_week; + +sub analyze_dir($) +{ + my $dirname = shift; + + open LOG, "( cd $dirname ; bzcat download.documentfoundation.org*access*.bz2 2>/dev/null ) |" or die "Cannot open the logs"; + while (<LOG>) { + my $line = $_; + if ( /^([^ ]+) - - \[([^\/]+)\/([^\/]+)\/([^:]+):([0-9][0-9])[^\]]*\] "[^"]*" [^ ]+ [^ ]+ "[^"]*" "(.*)"/ ) { + my ( $ip, $day, $month, $year, $hour, $useragent ) = + ( $1, $2, $month_to_num{$3}, $4, $5, $6, ); + + next if ($useragent eq '-' || $useragent eq 'setup'); + + # download tools & skip bots + next if ($useragent =~ m|Wget| || $useragent =~ m|chocolatey|); + next if ($useragent =~ m|lftp/| || $useragent =~ m|curl/|); + next if ($useragent =~ m|FPS-DAV-Client/| || $useragent =~ m|Deluge/|); + next if ($useragent =~ m|FPS-GET-Client/| || $useragent =~ m|SoftonicDownloader/|); + next if ($useragent =~ m|CCBot/| || $useragent =~ m|AhrefsBot/|); + next if ($useragent =~ m|SputnikBot/| || $useragent =~ m|YandexBot/|); + next if ($useragent =~ m|MojeekBot/| || $useragent =~ m|Webmon /|); + next if ($useragent =~ m|bingbot/| || $useragent =~ m|Baiduspider/|); + next if ($useragent =~ m|Yahoo! Slurp| || $useragent =~ m|portscout/|); + next if ($useragent =~ m|CRAZYWEBCRAWLER | || $useragent =~ m|FDM |); + next if ($useragent =~ m|YisouSpider| || $useragent =~ m|ABCdatos BotLink/|); + next if ($useragent =~ m|ia_archiver | || $useragent =~ m|BTWebClient/|); + next if ($useragent =~ m|portroach/| || $useragent =~ m|Java/|); + next if ($useragent =~ m|Googlebot| || $useragent =~ m|escan |); + next if ($useragent =~ m|Python-urllib/| || $useragent =~ m|PycURL/|); + next if ($useragent =~ m|fetch | || $useragent =~ m|WWWC/|); + next if ($useragent =~ m|Xovibot/| || $useragent =~ m|Dolphin |); + next if ($useragent =~ m|Megaindex.ru/| || $useragent =~ m|idmarch |); + next if ($useragent =~ m|coccoc/| || $useragent =~ m|WebMon |); + next if ($useragent =~ m|Download Master| || $useragent =~ m|Downloader |); + + # Misc. foo to reduce noise + next if ($useragent =~ /GetRedirect/ || $useragent =~ /setup_\d/ || + $useragent =~ /GetLength/ || $useragent =~ /xbps-src-update-check/); + + my $year_week = ymd_to_epoch($year, $month, $day); + + my $key; + if ($useragent =~ m/Windows \s*([^;\)]+)\s*[;\)]/) { +# print "good: Windows: $1\n"; + $key = win_real_ver($1); + } elsif ($useragent =~ m/Macintosh;.*Intel Mac OS X\s*([0-9_]+)/) { + my $short = $1; + $short =~ s/_[0-9]+$//; +# print "good: OS/X: $short\n"; + $key = "OSX $short"; + } elsif ($useragent =~ m/X11; Linux/ || + $useragent =~ m/X11; Ubuntu/ || + $useragent =~ m/Linux; /) { + $key = "Linux"; +# print "good: linux\n"; + } elsif ($useragent =~ m|[Bb]ot/|) { +# print "auto-bot: '$useragent'"; + } else { + $key = "other"; +# print "odd: '$useragent'\n"; + } + if (defined $key) { + $totals{$key}++; + $breakdown_by_week{$year_week}{$key}++; + } + } else { + if ($line =~ m|Wget/| || $line =~ m|CCBot/|) { +# print STDERR "bot? '$line'"; + } else { + print STDERR "bad line: '$line'\n"; + } + } + } + close LOG; +} + +sub scan_dirs($); +sub scan_dirs($) +{ + my $dirname = shift; + + print STDERR "analyzing: $dirname\n"; + analyze_dir($dirname); + opendir(my $dirh, $dirname) || die "Can't open $dirname: $!"; + my @subdirs; + while (my $subdir = readdir($dirh)) { + next if ($subdir =~ m/^\./); + push @subdirs, $subdir if -d "$dirname/$subdir"; + } + closedir $dirh; + + for my $subdir (@subdirs) { + scan_dirs ("$dirname/$subdir"); + } +} + +my $toplevel = `pwd`; +chomp($toplevel); +scan_dirs ($toplevel); + +my @os_list = sort keys %totals; + +print "Generated on: " . qx(date --rfc-3339=seconds) . "\n"; + +print "Totals:\n"; +for my $os (@os_list) { + print "$os\t".$totals{$os}."\n"; +} + +print "By week:\n"; + +print "year/week\t"; +for my $os (@os_list) { + print "$os\t"; +} +print "\n"; + +for my $week (sort keys %breakdown_by_week) { + print "$week\t"; + for my $os (@os_list) { + if (defined $breakdown_by_week{$week}{$os}) { + print $breakdown_by_week{$week}{$os}."\t"; + } else { + print "0\t"; + } + } + print "\n"; +} _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
