Merge authors: Jim Popovitch (jimpop) Related merge proposals: https://code.launchpad.net/~jimpop/mailman/contrib-sitemapgen/+merge/347941 proposed by: Jim Popovitch (jimpop) ------------------------------------------------------------ revno: 1771 [merge] committer: Mark Sapiro <m...@msapiro.net> branch nick: 2.1 timestamp: Thu 2018-06-14 10:04:45 -0700 message: Added contrib/sitemapgen. added: contrib/README.sitemapgen contrib/sitemapgen modified: NEWS
-- lp:mailman/2.1 https://code.launchpad.net/~mailman-coders/mailman/2.1 Your team Mailman Checkins is subscribed to branch lp:mailman/2.1. To unsubscribe from this branch go to https://code.launchpad.net/~mailman-coders/mailman/2.1/+edit-subscription
=== modified file 'NEWS' --- NEWS 2018-06-12 13:55:49 +0000 +++ NEWS 2018-06-14 17:04:45 +0000 @@ -87,6 +87,11 @@ - Quoting in the mailman-config command has been changed from double to single quotes to allow double-quoted parameters. (LP:1774986) + Miscellaneous + + - Added to the contrib directory, a script from Jim Popovitch to generate + Sitemap files for a list's archive. + 2.1.26 (04-Feb-2018) Security === added file 'contrib/README.sitemapgen' --- contrib/README.sitemapgen 1970-01-01 00:00:00 +0000 +++ contrib/README.sitemapgen 2018-06-14 01:59:15 +0000 @@ -0,0 +1,11 @@ +Hacked from bin/sync_members + +Copy mailman/contrib/sitemapgen to mailman/bin/ and execute it as so: + + /path/to/mailman/bin/sitemapgen -l <listname> + +Alternatively add it to cron like so: + +5 0 * * * for l in `/path/to/mailman/bin/list_lists -apb`; do + /path/to/mailman/bin/sitemapgen $l; done + === added file 'contrib/sitemapgen' --- contrib/sitemapgen 1970-01-01 00:00:00 +0000 +++ contrib/sitemapgen 2018-06-14 01:59:15 +0000 @@ -0,0 +1,164 @@ +#! @PYTHON@ + +# For a given listname, this script generates sitemap.xml.gz files +# under archives/private/<listname>/ +# +# Copyright (C) 1998-2018 by the Free Software Foundation, Inc. +# +# graciously hacked from bin/sync_members +# + +"""Build Sitemap files for an archive + +Usage: %(program)s [options] listname + +Where `options' are: + + --help + -h + Print this message. + + listname + Required. This specifies the list to generate sitemaps for. +""" + +import os +import sys +import paths +# Import this /after/ paths so that the sys.path is properly hacked +import email.Utils +from Mailman import MailList +from Mailman import Errors +from Mailman import Utils +from Mailman.UserDesc import UserDesc +from Mailman import mm_cfg +from Mailman.i18n import _ +import getopt +import re +import time +from stat import * +from datetime import datetime, timedelta +import gzip + + +# sitemap priorities in age-in-weeks/priority/changefreq tuples +priorities = ([1, 1.0, "daily"], + [4, 1.0, "weekly"], + [30, 1.0, "monthly"], + [52, 0.9, "never"], + [100, 0.8, "never"], + [200, 0.7, "never"], + [300, 0.6, "never"], + [400, 0.5, "never"]) + + +program = sys.argv[0] + +def usage(code, msg=''): + if code: + fd = sys.stderr + else: + fd = sys.stdout + print >> fd, _(__doc__) + if msg: + print >> fd, msg + sys.exit(code) + + + +def main(): + listname = None + + # TBD: can't use getopt with this command line syntax, which is broken and + # should be changed to be getopt compatible. + i = 1 + while i < len(sys.argv): + opt = sys.argv[i] + if opt in ('-h', '--help'): + usage(0) + else: + try: + listname = sys.argv[i].lower() + i += 1 + except IndexError: + usage(1, _('No listname given')) + break + + if listname is None: + usage(1, _('Must have a listname')) + + # get the locked list object + try: + mlist = MailList.MailList(listname, lock=0) + except Errors.MMListError, e: + print _('No such list: %(listname)s') + sys.exit(1) + + rootdir = mlist.archive_dir() + rooturl = mlist.GetBaseArchiveURL() + + reArcPath = re.compile(r'^\d+') + reArcFile = re.compile(r'\d+\.html') + + sitemaps = [] + + now = datetime.now() + + for folder in os.listdir(rootdir): + path = os.path.join(rootdir,folder) + if not os.path.isdir(path) or not reArcPath.search(folder): + continue + + dirtime = os.path.getmtime(path) + + os.umask(0022) + sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz") + f = gzip.open(sitemap, 'wb') + + f.write('<?xml version="1.0" encoding="UTF-8"?>\n') + f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n') + + for file in os.listdir(path): + if not reArcFile.search(file): + continue + + # get timestamp of file + st = os.stat(os.path.join(rootdir,folder,file)) + mtime = st[ST_MTIME] #modification time + + ts = datetime.fromtimestamp(mtime) + for weeks, priority, changefreq in priorities: + if ts > now - timedelta(weeks = weeks): + break + + f.write(' <url>\n <loc>' + os.path.join(rooturl,folder,file) + '</loc>\n') + f.write(' <lastmod>' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '</lastmod>\n') + f.write(' <changefreq>' + changefreq + '</changefreq>\n') + f.write(' <priority>' + str(priority) + '</priority>\n') + f.write(' </url>\n') + + f.write('</urlset>\n') + f.close() + + sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz"))) + + + # write out the sitemapindex file + sitemapindex = os.path.join(rootdir,"sitemap.xml.gz") + f = gzip.open(sitemapindex, 'wb') + + f.write('<?xml version="1.0" encoding="UTF-8"?>\n') + f.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n') + + for sitemap in sitemaps: + f.write(' <sitemap>\n <loc>' + sitemap + '</loc>\n') + f.write(' <lastmod>' + time.strftime("%Y-%m-%d", now.timetuple()) + '</lastmod>\n') + f.write(' </sitemap>\n') + + f.write('</sitemapindex>\n') + f.close() + + +if __name__ == '__main__': + main() +
_______________________________________________ Mailman-checkins mailing list Mailman-checkins@python.org Unsubscribe: https://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org