On Sun, Aug 20, 2017 at 10:02:20PM +0200, Karsten Loesing wrote: > Okay. Maybe we could do something with archive.org in that case. It's > not that we do have a complete history for MaxMind's files, except that > we could probably create our own history from Tor's Git repository which > contains files based on MaxMind's files.
I have a script that walks through the history of tor's git geoip files.
#!/usr/bin/env python import datetime import getopt import os.path import socket import subprocess import sys # Counts the size of per-country geoip allocations in the tor source code. # # Usage: ./scrape-geoip.py ~/src/tor > tor-geoip.csv # # ~/src/tor (or whatever the path is) must be a tor source repo; i.e. a clone of # https://git.torproject.org/tor.git. def usage(f=sys.stdout): print >> f, """\ Usage: %s /path/to/tor """ % sys.argv[0] def history(dirname, filename): proc = subprocess.Popen(["git", "log", "--reverse", "--date=short", "--pretty=%H %ad", filename], cwd=dirname, stdout=subprocess.PIPE) return proc.stdout def git_show(dirname, filename, commithash): proc = subprocess.Popen(["git", "show", commithash+":"+filename], cwd=dirname, stdout=subprocess.PIPE) return proc.stdout def parse_geoip(f): ccs = {} for line in f: if line.startswith("#"): continue parts = line.strip().split(",") start = int(parts[0]) end = int(parts[1]) cc = parts[2].lower() ccs.setdefault(cc, 0) ccs[cc] += end - start + 1 return ccs def ipv6_to_int(ipstr): return long("0x" + socket.inet_pton(socket.AF_INET6, ipstr).encode("hex"), 16) def parse_geoip6(f): ccs = {} for line in f: if line.startswith("#"): continue parts = line.strip().split(",") start = ipv6_to_int(parts[0]) end = ipv6_to_int(parts[1]) cc = parts[2].lower() ccs.setdefault(cc, 0) ccs[cc] += end - start + 1 return ccs opts, args = getopt.gnu_getopt(sys.argv[1:], "h", ["help"]) for o, a in opts: if o == "-h" or o == "--help": usage() sys.exit() try: TOR_PATH, = args except ValueError: usage(sys.stderr) sys.exit(1) print "date,ipv,country,count" for line in history(TOR_PATH, "src/config/geoip"): parts = line.strip().split() commithash = parts[0] date = datetime.datetime.strptime(parts[1], "%Y-%m-%d") try: ccs = parse_geoip(git_show(TOR_PATH, "src/config/geoip", commithash)) except Exception, e: print >> sys.stderr, "Skipping %s %s: %s" % ("src/config/geoip", commithash, e) continue for cc, count in sorted(ccs.items()): print ",".join([date.strftime("%Y-%m-%d"), "4", cc, str(count)]) for line in history(TOR_PATH, "src/config/geoip6"): parts = line.strip().split() commithash = parts[0] date = datetime.datetime.strptime(parts[1], "%Y-%m-%d") try: ccs = parse_geoip6(git_show(TOR_PATH, "src/config/geoip6", commithash)) except Exception, e: print >> sys.stderr, "Skipping %s %s: %s" % ("src/config/geoip6", commithash, e) continue for cc, count in sorted(ccs.items()): print ",".join([date.strftime("%Y-%m-%d"), "6", cc, str(count)])
_______________________________________________ tor-dev mailing list tor-dev@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-dev