Merge authors:
  Jim Popovitch (jimpop)
Related merge proposals:
  https://code.launchpad.net/~jimpop/mailman/contrib-sitemapgen/+merge/347941
  proposed by: Jim Popovitch (jimpop)
------------------------------------------------------------
revno: 1771 [merge]
committer: Mark Sapiro <m...@msapiro.net>
branch nick: 2.1
timestamp: Thu 2018-06-14 10:04:45 -0700
message:
  Added contrib/sitemapgen.
added:
  contrib/README.sitemapgen
  contrib/sitemapgen
modified:
  NEWS


--
lp:mailman/2.1
https://code.launchpad.net/~mailman-coders/mailman/2.1

Your team Mailman Checkins is subscribed to branch lp:mailman/2.1.
To unsubscribe from this branch go to 
https://code.launchpad.net/~mailman-coders/mailman/2.1/+edit-subscription
=== modified file 'NEWS'
--- NEWS	2018-06-12 13:55:49 +0000
+++ NEWS	2018-06-14 17:04:45 +0000
@@ -87,6 +87,11 @@
     - Quoting in the mailman-config command has been changed from double to
       single quotes to allow double-quoted parameters.  (LP:1774986)
 
+  Miscellaneous
+
+    - Added to the contrib directory, a script from Jim Popovitch to generate
+      Sitemap files for a list's archive.
+
 2.1.26 (04-Feb-2018)
 
   Security

=== added file 'contrib/README.sitemapgen'
--- contrib/README.sitemapgen	1970-01-01 00:00:00 +0000
+++ contrib/README.sitemapgen	2018-06-14 01:59:15 +0000
@@ -0,0 +1,11 @@
+Hacked from bin/sync_members
+
+Copy mailman/contrib/sitemapgen to mailman/bin/ and execute it as so:
+
+  /path/to/mailman/bin/sitemapgen -l <listname>
+
+Alternatively add it to cron like so:
+
+5 0 * * *  for l in `/path/to/mailman/bin/list_lists -apb`; do 
+		/path/to/mailman/bin/sitemapgen $l; done
+

=== added file 'contrib/sitemapgen'
--- contrib/sitemapgen	1970-01-01 00:00:00 +0000
+++ contrib/sitemapgen	2018-06-14 01:59:15 +0000
@@ -0,0 +1,164 @@
+#! @PYTHON@
+
+# For a given listname, this script generates sitemap.xml.gz files
+# under archives/private/<listname>/
+#
+# Copyright (C) 1998-2018 by the Free Software Foundation, Inc.
+#
+# graciously hacked from bin/sync_members
+#
+
+"""Build Sitemap files for an archive
+
+Usage: %(program)s [options] listname
+
+Where `options' are:
+
+    --help
+    -h
+        Print this message.
+
+    listname
+        Required.  This specifies the list to generate sitemaps for.
+"""
+
+import os
+import sys
+import paths
+# Import this /after/ paths so that the sys.path is properly hacked
+import email.Utils
+from Mailman import MailList
+from Mailman import Errors
+from Mailman import Utils
+from Mailman.UserDesc import UserDesc
+from Mailman import mm_cfg
+from Mailman.i18n import _
+import getopt
+import re
+import time
+from stat import *
+from datetime import datetime, timedelta
+import gzip
+
+
+# sitemap priorities in age-in-weeks/priority/changefreq tuples
+priorities = ([1, 1.0, "daily"],
+              [4, 1.0, "weekly"],
+              [30, 1.0, "monthly"],
+              [52, 0.9, "never"],
+              [100, 0.8, "never"],
+              [200, 0.7, "never"],
+              [300, 0.6, "never"],
+              [400, 0.5, "never"])
+
+
+program = sys.argv[0]
+
+def usage(code, msg=''):
+    if code:
+        fd = sys.stderr
+    else:
+        fd = sys.stdout
+    print >> fd, _(__doc__)
+    if msg:
+        print >> fd, msg
+    sys.exit(code)
+
+
+
+def main():
+    listname = None
+
+    # TBD: can't use getopt with this command line syntax, which is broken and
+    # should be changed to be getopt compatible.
+    i = 1
+    while i < len(sys.argv):
+        opt = sys.argv[i]
+        if opt in ('-h', '--help'):
+            usage(0)
+        else:
+            try:
+                listname = sys.argv[i].lower()
+                i += 1
+            except IndexError:
+                usage(1, _('No listname given'))
+            break
+
+    if listname is None:
+        usage(1, _('Must have a listname'))
+
+    # get the locked list object
+    try:
+        mlist = MailList.MailList(listname, lock=0)
+    except Errors.MMListError, e:
+        print _('No such list: %(listname)s')
+        sys.exit(1)
+
+    rootdir = mlist.archive_dir()
+    rooturl = mlist.GetBaseArchiveURL()
+
+    reArcPath = re.compile(r'^\d+')
+    reArcFile = re.compile(r'\d+\.html')
+
+    sitemaps = []
+
+    now = datetime.now()
+
+    for folder in os.listdir(rootdir):
+        path = os.path.join(rootdir,folder)
+        if not os.path.isdir(path) or not reArcPath.search(folder):
+            continue
+    
+        dirtime = os.path.getmtime(path)
+    
+        os.umask(0022)
+        sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz")
+        f = gzip.open(sitemap, 'wb')
+    
+        f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";>\n')
+    
+        for file in os.listdir(path):
+            if not reArcFile.search(file):
+                continue
+    
+            # get timestamp of file
+            st = os.stat(os.path.join(rootdir,folder,file))
+            mtime = st[ST_MTIME] #modification time
+    
+            ts = datetime.fromtimestamp(mtime)
+            for weeks, priority, changefreq in priorities:
+                if ts > now - timedelta(weeks = weeks):
+                    break
+    
+            f.write('  <url>\n  <loc>' + os.path.join(rooturl,folder,file) + '</loc>\n')
+            f.write('    <lastmod>' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '</lastmod>\n')
+            f.write('    <changefreq>' + changefreq + '</changefreq>\n')
+            f.write('    <priority>' + str(priority) + '</priority>\n')
+            f.write('  </url>\n')
+    
+        f.write('</urlset>\n')
+        f.close()
+    
+        sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz")))
+    
+    
+    # write out the sitemapindex file
+    sitemapindex = os.path.join(rootdir,"sitemap.xml.gz")
+    f = gzip.open(sitemapindex, 'wb')
+    
+    f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+    f.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";>\n')
+    
+    for sitemap in sitemaps:
+        f.write('  <sitemap>\n  <loc>' + sitemap + '</loc>\n')
+        f.write('    <lastmod>' + time.strftime("%Y-%m-%d", now.timetuple()) + '</lastmod>\n')
+        f.write('  </sitemap>\n')
+    
+    f.write('</sitemapindex>\n')
+    f.close()
+
+
+if __name__ == '__main__':
+    main()
+

_______________________________________________
Mailman-checkins mailing list
Mailman-checkins@python.org
Unsubscribe: 
https://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org

Reply via email to