[MediaWiki-commits] [Gerrit] operations/mediawiki-config[master]: WIP: `scap scrape` plugin split out from change 306259

20after4 (Code Review) Wed, 21 Sep 2016 08:35:57 -0700

20after4 has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/312016


Change subject: WIP: `scap scrape` plugin split out from change 306259
......................................................................

WIP: `scap scrape` plugin split out from change 306259

This scrapes the deployment page to find patches for the current
swat window.

Change-Id: I4aa48c752369f09e7e4c674d82e67ef98f5e0828
---
A scap/plugins/scrape.py
1 file changed, 106 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config 
refs/changes/16/312016/1

diff --git a/scap/plugins/scrape.py b/scap/plugins/scrape.py
new file mode 100644
index 0000000..b537d53
--- /dev/null
+++ b/scap/plugins/scrape.py
@@ -0,0 +1,106 @@
+from __future__ import division, absolute_import
+from __future__ import print_function, unicode_literals
+from bs4 import BeautifulSoup
+from datetime import datetime
+import dateutil.parser
+from dateutil.tz import tzlocal
+from dateutil.relativedelta import relativedelta
+import os.path
+import requests
+import scap.cli as cli
+
+
+@cli.command('scrape', help='Scrape deployment calendar for changeids.')
+class scrape(cli.Application):
+    ''' scrape the deployment calendar '''
+
+    gerrit_uri = 'https://gerrit.wikimedia.org'
+
+    @cli.argument('--start', action='store_true',
+                  help='Enqueue patches from the deployment calendar and begin'
+                  + ' SWAT deployment.')
+    @cli.argument('--list', action='store_true',
+                  help='List patches in the queue.')
+    def main(self, *extra_args):
+        queuedir = os.path.expanduser('~/.swat')
+        if not os.path.isdir(queuedir):
+            os.mkdir(queuedir)
+
+        if self.arguments.start is True:
+            swat_patches = self.scrape_deployment_calendar()
+            for patch in swat_patches:
+                patchfile = os.path.join(queuedir, patch[0])
+                with open(patchfile, mode='w') as f:
+                    for line in patch:
+                        f.write(line)
+                        f.write("\n")
+            for patch in swat_patches:
+                line = '{:<12}{:<35}{:<40}'.encode('UTF8').format(
+                    patch[0], patch[1], patch[2])
+                print(line)
+            return
+
+        if self.arguments.list is True:
+            for patch in os.listdir(queuedir):
+                patchfile = os.path.join(queuedir, patch)
+                with open(patchfile, mode='r') as f:
+                    print(f.read())
+            return
+
+    def scrape_deployment_calendar(self):
+        ''' scrape the deployment calendar at
+        wikitech.wikimedia.org/wiki/Deployments returning a list of patches
+        for the current swat window.
+        '''
+        def match_gerrit_link(tag):
+            return (
+                tag.name == 'a'
+                and tag.has_attr('href')
+                and tag['href'].startswith(self.gerrit_uri))
+        NOW = datetime.now(tzlocal())
+
+        patchlist = []
+
+        r = requests.get('https://wikitech.wikimedia.org/wiki/Deployments')
+        soup = BeautifulSoup(r.text, 'lxml')
+        # find all swat deploy windows on the deployment calendar
+        for tag in soup.find_all(title="SWAT deploys"):
+            # look up 3 levels to find the TR tag
+            row = tag.parent.parent.parent
+            if row.name != 'tr':
+                continue
+            # get the timestamp from the TR tag's id attribute
+            datestring = row['id'].rsplit('-', 1)[1] + "Z"
+            # parse the date string
+            window_start = dateutil.parser.parse(datestring)
+            window_end = window_start + relativedelta(hours=+1)
+            # if the current time is not within this window, continue to next
+
+            if NOW < window_start or NOW > window_end:
+                continue
+
+            # get all the names for developers with patches to deploy
+            people = row.find_all(class_='ircnick-container')
+
+            for person in people:
+                name = person.text
+
+                if person.parent.name == 'p':
+                    # All but the first developers' names are nested inside <p>
+                    r = person.parent
+                else:
+                    r = person
+                # get the list immediately following the developer name
+                nextTag = r.find_next_sibling('ul')
+                if not nextTag:
+                    continue
+                # find all gerrit links in this list
+                links = nextTag.find_all(match_gerrit_link)
+                if not links:
+                    continue
+
+                for link in links:
+                    patchlist.append((link.text.encode('UTF8'),
+                                      name.encode('UTF8'), link['href']))
+
+        return patchlist

-- 
To view, visit https://gerrit.wikimedia.org/r/312016
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4aa48c752369f09e7e4c674d82e67ef98f5e0828
Gerrit-PatchSet: 1
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: 20after4 <mmod...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] operations/mediawiki-config[master]: WIP: `scap scrape` plugin split out from change 306259

Reply via email to