Сценарий во вложении выводит непереведённые (начинается после 474) по порядку popcon:
xserver-xorg-video-modesetting id 132923 https://ddtp2.debian.net/ddt.cgi?desc_id=132923&getuntrans=ru libpython3.4-minimal id 217824 https://ddtp2.debian.net/ddt.cgi?desc_id=217824&getuntrans=ru python3.4-minimal id 217852 https://ddtp2.debian.net/ddt.cgi?desc_id=217852&getuntrans=ru libtotem-plparser18 id 63754 https://ddtp2.debian.net/ddt.cgi?desc_id=63754&getuntrans=ru gir1.2-gnomedesktop-3.0 id 82307 https://ddtp2.debian.net/ddt.cgi?desc_id=82307&getuntrans=ru Странно, но с | tee похоже не работает.
#!/usr/bin/python2 desc = 'Checking untranslated packages description for language' popcon_addr = 'http://popcon.debian.org/by_vote' ddtp_addr = 'https://ddtp2.debian.net' import urllib import re import argparse check_lang = 'ru' total_packages_num = 1000 arg = argparse.ArgumentParser(description = desc) arg.add_argument('-n', '--number', help = 'check number of package') arg.add_argument('-l', '--language', help = 'check for language') arg.add_argument('-v', '--verbose', help = 'be verbose', action = 'store_true') args = arg.parse_args() if args.language != None: check_lang = args.language if args.number != None: total_packages_num = int(args.number) print 'Checking for "' + check_lang + '" from ' + ddtp_addr + ' by ' + popcon_addr + ' ...' uo = urllib.urlopen(popcon_addr) # Description: <a href="ddt.cgi?desc_id=237239">237239</a><br> # This Description is active<br> desc_re = re.compile('(Description: <a href="ddt.cgi\?desc_id=)([0-9]+?)"') # This Description is not yet translated to ru <a href="ddt.cgi?desc_id=52130&getuntrans=ru"> not_trans_re = re.compile('(.*Description is not yet translated to ' + check_lang + ') <a href="(.*?)">') c = 0 while True: l = uo.readline() l = l.strip() # Skipping comment if l[0] == '#': continue c += 1 if c > total_packages_num: break # Package name pkg = l.split()[1] for i in urllib.urlopen(ddtp_addr + '/ddt.cgi?package=' + pkg).readlines(): desc_id = desc_re.match(i) if desc_id == None: continue desc_id = desc_id.group(2) if args.verbose: print str(c) + ' of ' + str(total_packages_num) + ' ' + \ pkg + ' id ' + desc_id for j in urllib.urlopen(ddtp_addr + '/ddt.cgi?desc_id=' + desc_id).readlines(): not_trans = not_trans_re.match(j) if not_trans == None: continue print pkg + ' id ' + desc_id + '\t' + ddtp_addr + '/' + not_trans.group(2) # Check only first (active) description id break uo.close()