Author: hdu Date: Wed May 28 15:04:50 2014 New Revision: 1598032 URL: http://svn.apache.org/r1598032 Log: split logic and data to make the script more maintainable
the logic itself and the resulting outputs remain unchanged Modified: openoffice/devtools/aoo-stats/detail-by-day.py Modified: openoffice/devtools/aoo-stats/detail-by-day.py URL: http://svn.apache.org/viewvc/openoffice/devtools/aoo-stats/detail-by-day.py?rev=1598032&r1=1598031&r2=1598032&view=diff ============================================================================== --- openoffice/devtools/aoo-stats/detail-by-day.py (original) +++ openoffice/devtools/aoo-stats/detail-by-day.py Wed May 28 15:04:50 2014 @@ -1,290 +1,119 @@ -################################################################ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -################################################################ - - -# This script queries the SourceForce REST API for download statistics for -# sets of files on SourceForge, for a range of dates, in ISO format (YYYY-MM-DD) -# passed in as a command line argument. The data, in CSV format is written to stdout - - -import urllib -import json -import sys -import datetime - -from urllib import urlencode - -def getSourceForgeStats(download, startDate, endDate): - - url = download + "/stats/json?start_date=" + startDate + "&" "end_date=" + endDate - - #print >> sys.stderr, url - - attempts = 0 - - while attempts < 3: - try: - conn = urllib.urlopen(url) - data = conn.read() - - return data - - except: - attempts += 1 - print url - print >> sys.stderr, "error " + download + "(" + str(attempts) + ")" - - return "" - - -if len(sys.argv) != 4: - print "syntax: python detail-by-day.py <urls.lst> <start-date> <end-date>" - print "where <file.list> is a list of files URL's to gather stats on, and <start-date> and <end-date> are in YYYY-MM-DD format." - exit(-1) - - -downloads = [line.strip() for line in open(sys.argv[1])] -start_date = datetime.datetime.strptime(sys.argv[2], '%Y-%m-%d') -end_date = datetime.datetime.strptime(sys.argv[3], '%Y-%m-%d') - -print '"date","count_total","count_340","count_341","count_400","count_401","count_410","windows","mac","linux","linux32","linux64","deb","rpm","ar","ast","eu","zh_TW","zh_CN","cs","da","nl","en_GB","en_US","fi","fr","gd","gl","de","hu","it","ja","km","ko","lt","nb","pl","pt_BR","ru","sk","sl","es","sv","el","pt","ta","sr","tr","vi","he","bg","hi","th"' - -today = start_date - -while today <= end_date: - - linux32 = 0 - linux64 = 0 - windows = 0 - mac = 0 - linux = 0 - count_total = 0 - count_340 = 0 - count_341 = 0 - count_400 = 0 - count_401 = 0 - count_410 = 0 - deb = 0 - rpm = 0 - - ar = 0 - ast = 0 - eu = 0 - zh_TW = 0 - zh_CN = 0 - cs = 0 - da = 0 - nl = 0 - el = 0 - en_GB = 0 - en_US = 0 - fi = 0 - fr = 0 - gd = 0 - gl = 0 - de = 0 - hu = 0 - it = 0 - ja = 0 - km = 0 - ko = 0 - lt = 0 - nb = 0 - pl = 0 - pt = 0 - pt_BR = 0 - ru = 0 - sk = 0 - sl = 0 - es = 0 - sv = 0 - ta = 0 - sr = 0 - tr = 0 - vi = 0 - he = 0 - bg = 0 - hi = 0 - th = 0 - - date_string = today.strftime("%Y-%m-%d") - - print >> sys.stderr, date_string - - for download in downloads : - - try: - data = json.loads(getSourceForgeStats(download,date_string,date_string)) - day_count = data["total"] - except ValueError: - data = "" - day_count = 0 - - count_total = count_total + day_count - -#versions - - if download.find("3.4.0") != -1: - count_340 = count_340 + day_count - - if download.find("3.4.1") != -1: - count_341 = count_341 + day_count - - if download.find("4.0.0") != -1: - count_400 = count_400 + day_count - - if download.find("4.0.1") != -1: - count_401 = count_401 + day_count - - if download.find("4.1.0") != -1: - count_410 = count_410 + day_count - - - -#platforms - - if download.find("Win_x86") != -1: - windows = windows + day_count - - if download.find("MacOS") != -1: - mac = mac + day_count - - if download.find("Linux") != -1: - linux = linux + day_count - -#architecture - - if download.find("Linux_x86_") != -1: - linux32 = linux32 + day_count - - if download.find("Linux_x86-64_") != -1: - linux64 = linux64 + day_count - -#packaging - - if download.find("install-deb_") != -1: - deb = deb + day_count - - if download.find("install-rpm_") != -1: - rpm = rpm + day_count - -#languages - - if download.find("_ar.") != -1: - ar = ar + day_count - if download.find("_ast.") != -1: - ast = ast+ day_count - if download.find("_eu.") != -1: - eu = eu + day_count - if download.find("_zh-TW.") != -1: - zh_TW = zh_TW + day_count - if download.find("_zh-CN.") != -1: - zh_CN = zh_CN + day_count - if download.find("_cs.") != -1: - cs = cs + day_count - if download.find("_da.") != -1: - da = da + day_count - if download.find("_nl.") != -1: - nl = nl + day_count - if download.find("_el.") != -1: - el = el + day_count - if download.find("_en-GB.") != -1: - en_GB = en_GB + day_count - if download.find("_en-US.") != -1: - en_US = en_US + day_count - if download.find("_fi.") != -1: - fi = fi + day_count - if download.find("_fr.") != -1: - fr = fr + day_count - if download.find("_gd.") != -1: - gd = gd + day_count - if download.find("_gl.") != -1: - gl = gl + day_count - if download.find("_de.") != -1: - de = de + day_count - if download.find("_hu.") != -1: - hu = hu + day_count - if download.find("_it.") != -1: - it = it + day_count - if download.find("_ja.") != -1: - ja = ja + day_count - if download.find("_km.") != -1: - km = km + day_count - if download.find("_ko.") != -1: - ko = ko + day_count - if download.find("_lt.") != -1: - lt = lt + day_count - if download.find("_nb.") != -1: - nb = nb + day_count - if download.find("_pl.") != -1: - pl = pl + day_count - if download.find("_pt.") != -1: - pt = pt + day_count - if download.find("_pt-BR.") != -1: - pt_BR = pt_BR + day_count - if download.find("_ru.") != -1: - ru = ru + day_count - if download.find("_sk.") != -1: - sk = sk + day_count - if download.find("_sl.") != -1: - sl = sl + day_count - if download.find("_es.") != -1: - es = es + day_count - if download.find("_sv.") != -1: - sv = sv + day_count - if download.find("_ta.") != -1: - ta = ta + day_count - if download.find("_sr.") != -1: - sr = sr + day_count - if download.find("_tr.") != -1: - tr = tr + day_count - if download.find("_vi.") != -1: - vi = vi + day_count - if download.find("_he.") != -1: - he = he + day_count - if download.find("_bg.") != -1: - bg = bg + day_count - if download.find("_hi.") != -1: - hi = hi + day_count - if download.find("_th.") != -1: - th = th + day_count - - print date_string + "," + str(count_total) + "," + str(count_340) + "," + str(count_341) + "," + str(count_400) + "," + str(count_401) + "," + str(count_410) + "," + \ - str(windows) + "," + str(mac) + "," + str(linux) + "," + str(linux32) + "," + str(linux64) + "," + \ - str(deb) + "," + str(rpm) + "," + \ - str(ar) + "," + str(ast) + "," + str(eu) + "," + str(zh_TW) + "," + \ - str(zh_CN) + "," + str(cs) + "," + str(da) + "," + \ - str(nl) + "," + str(en_GB) + "," + str(en_US) + "," + \ - str(fi) + "," + str(fr) + "," + str(gd) + "," + \ - str(gl) + "," + str(de) + "," + str(hu) + "," + \ - str(it) + "," + str(ja) + "," + str(km) + "," + \ - str(ko) + "," + str(lt) + "," + str(nb) + "," + str(pl) + "," + str(pt_BR) + "," + \ - str(ru) + "," + str(sk) + "," + str(sl) + "," + \ - str(es) + "," + str(sv) + "," + str(el) + "," + \ - str(pt) + "," + str(ta) + "," + str(sr) + "," + str(tr) + "," + str(vi) + "," + str(he) + "," + str(bg)+ "," + str(hi)+ "," + str(th) - - - today += datetime.timedelta(days=1) - - - - - +################################################################ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +################################################################ + + +# This script queries the SourceForce REST API for download statistics for +# sets of files on SourceForge, for a range of dates, in ISO format (YYYY-MM-DD) +# passed in as a command line argument. The data, in CSV format is written to stdout + + +import urllib +import json +import sys +import datetime + +from urllib import urlencode + +def getSourceForgeStats(download, startDate, endDate): + + url = download + "/stats/json?start_date=" + startDate + "&" "end_date=" + endDate + + #print >> sys.stderr, url + + attempts = 0 + + while attempts < 3: + try: + conn = urllib.urlopen(url) + data = conn.read() + + return data + + except: + attempts += 1 + print url + print >> sys.stderr, "error " + download + "(" + str(attempts) + ")" + + return "" + + +if len(sys.argv) != 4: + print "syntax: python detail-by-day.py <urls.lst> <start-date> <end-date>" + print "where <file.list> is a list of files URL's to gather stats on, and <start-date> and <end-date> are in YYYY-MM-DD format." + exit(-1) + + +downloads = [line.strip() for line in open(sys.argv[1])] +start_date = datetime.datetime.strptime(sys.argv[2], '%Y-%m-%d') +end_date = datetime.datetime.strptime(sys.argv[3], '%Y-%m-%d') + +# columns of interest +columns = [ "count_total","count_340","count_341","count_400","count_401","count_410","windows","mac","linux","linux32","linux64","deb","rpm","ar","ast","eu","zh-TW","zh-CN","cs","da","nl","en-GB","en-US","fi","fr","gd","gl","de","hu","it","ja","km","ko","lt","nb","pl","pt-BR","ru","sk","sl","es","sv","el","pt","ta","sr","tr","vi","he","bg","hi","th"] + +# column counters are updated if the download name contains a matching pattern +# The dictionary below maps the column names to these search patterns. +# If there is no entry for a column then the pattern for language columns is assumed +patternDict = { + "count_total" : "", + "count_340" : "3.4.0", + "count_341" : "3.4.1", + "count_400" : "4.0.0", + "count_401" : "4.0.1", + "count_410" : "4.1.0", + "windows" : "Win_x86", + "mac" : "MacOS", + "linux" : "Linux", + "linux32" : "Linux_x86_", + "linux64" : "Linux_x86-64_", + "deb" : "install-deb_", + "rpm" : "install-rpm_" +} + + +print( '"date","' + '","'.join(columns) + '"') + +today = start_date + +while today <= end_date: + + counts = dict( [(c,0) for c in columns]) + + date_string = today.strftime("%Y-%m-%d") + print >> sys.stderr, date_string + + for download in downloads : + + try: + data = json.loads(getSourceForgeStats(download,date_string,date_string)) + day_count = data["total"] + except ValueError: + continue + + # update the per column counts + for c in columns: + pattern = patternDict[c] if c in patternDict else ("_%s." % c) + if download.find(pattern) != -1: + counts[c] += day_count + + print( date_string + ',' + ','.join( [str(counts[c]) for c in columns])) + + today += datetime.timedelta(days=1) +