Nuria has submitted this change and it was merged.

Change subject: Fix script after Phabricator layout changes
......................................................................


Fix script after Phabricator layout changes

This change includes:
- Fix the parsing of the tasks that was broken since Phab changed
  its layout (tasks are not html any more and can not be scraped).
- Add support to get the task points from the point 'field'.
- Use a global for config.

Bug: T130543
Change-Id: Idda3f7f2c0f792c423de32760812e110f2f4373f
---
M README.md
M analytics/phab-stats
M analytics/phab-stats-config.json
M requirements.txt
4 files changed, 72 insertions(+), 72 deletions(-)

Approvals:
  Nuria: Verified; Looks good to me, approved



diff --git a/README.md b/README.md
index fa073bf..465eec8 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 #Usage
 ```
- ./phab-stats --config ./phab-stats-config.json  2015-09-01 2015-12-31
+ ./phab-stats --config ./phab-stats-config.json  2015-09-01 2016-01-01
 ```
 #To install 
 
@@ -15,7 +15,7 @@
 ```
     sudo pip  install -r ./requirements.txt
 ```
-If lxml gives trouble, uninstall and install again. 
+If lxml gives trouble, uninstall and install again.
 
 Please see issue: https://github.com/disqus/python-phabricator/issues/22
 You need phabricator 0.0.5 to workarround it
@@ -30,7 +30,7 @@
 ```
 >arc install-certificate https://phabricator.wikimedia.org
 
- CONNECT  Connecting to "https://phabricator.wikimedia.org/api/";...
+CONNECT  Connecting to "https://phabricator.wikimedia.org/api/";...
 LOGIN TO PHABRICATOR
 Open this page in your browser and login to Phabricator if necessary:
 
@@ -43,4 +43,3 @@
  SUCCESS!  API Token installed.
 ```
 DONE! You should be able to run the script
-
diff --git a/analytics/phab-stats b/analytics/phab-stats
index e8868af..9ac2ba6 100755
--- a/analytics/phab-stats
+++ b/analytics/phab-stats
@@ -17,7 +17,6 @@
 import phabricator
 import requests
 from datetime import datetime
-from bs4 import BeautifulSoup
 
 
 WORKBOARD_URL_TEMPLATE = 
'https://phabricator.wikimedia.org/project/board/%s/query/all/'
@@ -66,22 +65,36 @@
     # The pattern that matches point definitions in task names.
     POINTS_REGEXP = re.compile(r'\[([0-9]+)\s*pts\]')
 
-    def __init__(self, id, name, column):
+    def __init__(self, id, name):
         self.id = id
         self.name = name
-        self.column = column
-        self.points = self.parse_points(name)
         # The transaction list is asigned separately for performance reasons.
-        # All task transactions for all the tasks are requested together in
-        # the same api call. So, after that, all tasks are decorated with it.
         self.transactions = None
+        # The following fields depend on the transactions to be calculated.
+        self.column = None
+        self.points = None
 
-    def parse_points(self, name):
-        match = Task.POINTS_REGEXP.search(name, re.I)
+    def set_transactions(self, transactions):
+        """
+        Sets the task's transactions and triggers the
+        calculation of the fields that depend on them.
+        """
+        self.transactions = transactions
+        self.column = self.column_at('now')
+        self.points = self.parse_points()
+
+    def parse_points(self):
+        """
+        Sets the task's points by trying to parse them from its title,
+        or otherwise looking at the transactions to calculate the point field.
+        """
+        match = Task.POINTS_REGEXP.search(self.name, re.I)
         if match:
             return int(match.group(1))
-        else:
-            return None
+        for transaction in self.transactions:
+            if transaction['transactionType'] == 'points':
+                return transaction['newValue']
+        return None
 
     def resolved_between(self, start_date, end_date):
         """
@@ -98,26 +111,28 @@
                 )
         return False
 
-    def column_at(self, date, config):
+    def column_at(self, date):
         """
         Returns the value of the column in which the task was at the given 
date.
+        If the date is 'now', returns the column where the task is currently.
         """
-        current_column = self.column
+        current_column = 0  # Board's initial column.
         # Transactions are sorted by timestamp, newest to oldest.
-        for transaction in self.transactions:
+        # So they need to be reversed.
+        for transaction in reversed(self.transactions):
             if transaction['transactionType'] == 'projectcolumn':
                 timestamp = int(transaction['dateCreated'])
                 transaction_date = datetime.fromtimestamp(timestamp)
-                if transaction_date >= date:
-                    columns = self.get_transaction_columns(transaction, config)
-                    current_column = columns['oldValue']
+                if date == 'now' or transaction_date <= date:
+                    columns = self.get_transaction_columns(transaction)
+                    current_column = columns['newValue']
                 else:
-                    # First column change before the given date.
+                    # First column change after the given date.
                     # This means the current_column is the requested value.
                     break
         return current_column
 
-    def steps_between(self, start_date, end_date, config):
+    def steps_between(self, start_date, end_date):
         """
         Returns the number of column changes that a task has suffered,
         regardless if they are forwards or backwards. Only specified
@@ -127,11 +142,11 @@
         steps = 0
         for transaction in self.transactions:
             if transaction['transactionType'] == 'projectcolumn':
-                columns = self.get_transaction_columns(transaction, config)
+                columns = self.get_transaction_columns(transaction)
                 steps += abs(columns['newValue'] - columns['oldValue'])
         return steps
 
-    def get_transaction_columns(self, transaction, config):
+    def get_transaction_columns(self, transaction):
         """
         Assumes the transaction is of type 'projectcolumn'.
         Returns the transaction's normalized old and new column values.
@@ -149,36 +164,15 @@
                 column_values[kind] = 0 # Outside of workboard.
                 continue
             # Translate Phabricator column phab-ids into column values.
-            if column_phid in config['column_names']:
-                column_name = config['column_names'][column_phid]
-                column_values[kind] = config['column_values'][column_name]
+            if column_phid in CONFIG['column_names']:
+                column_name = CONFIG['column_names'][column_phid]
+                column_values[kind] = CONFIG['column_values'][column_name]
             else:
                 column_values[kind] = 0 # Outside of workboard.
         return column_values
 
 
-def parse_workboard_html(workboard_html, config):
-    """
-    Parses workboard html looking for columns, tasks,
-    their titles and ids. Returns a list of task objects.
-    """
-    soup = BeautifulSoup(workboard_html, 'lxml')
-    columns = soup.find_all(class_='phui-workpanel-view')
-    tasks = []
-    for column in columns:
-        column_name = column.find(class_='phui-header-header').strings.next()
-        if column_name in config['column_values']:
-            column_value = config['column_values'][column_name]
-            column_tasks = column.find_all(class_='phui-object-item-name')
-            for column_task in column_tasks:
-                task_id = 
int(column_task.find(class_='phui-object-item-objname').string[1:])
-                task_name = 
column_task.find(class_='phui-object-item-link').string
-                task = Task(task_id, task_name, column_value)
-                tasks.append(task)
-    return tasks
-
-
-def calculate_points_resolved(tasks, args, config):
+def calculate_points_resolved(tasks, args):
     """
     Returns the sum of task points in all tasks that have been
     marked as resolved between args.start_date and args.end_date.
@@ -186,11 +180,11 @@
     points_resolved = 0
     for task in tasks:
         if task.resolved_between(args.start_date, args.end_date):
-            points_resolved += task.points or config['default_points']
+            points_resolved += task.points or CONFIG['default_points']
     return points_resolved
 
 
-def calculate_points_moved_to_the_right(tasks, args, config):
+def calculate_points_moved_to_the_right(tasks, args):
     """
     For each task, gets the column it was placed at args.start_date and
     at args.end_date, and calculates how many columns the task has moved
@@ -198,18 +192,18 @@
     the total number of steps defined in the workboard. Returns the sum
     of it all.
     """
-    total_columns = float(max([c['value'] for c in config['columns']]))
+    total_columns = float(max([c['value'] for c in CONFIG['columns']]))
     points_moved_right = 0
     for task in tasks:
-        initial_column = task.column_at(args.start_date, config)
-        final_column = task.column_at(args.end_date, config)
+        initial_column = task.column_at(args.start_date)
+        final_column = task.column_at(args.end_date)
         columns_moved = final_column - initial_column
-        task_points = task.points or config['default_points']
+        task_points = task.points or CONFIG['default_points']
         points_moved_right += columns_moved / total_columns * task_points
     return points_moved_right
 
 
-def calculate_average_steps(tasks, args, config):
+def calculate_average_steps(tasks, args):
     """
     Returns the average number of column changes that the tasks marked
     as resolved between args.start_date and args.end_date have seen.
@@ -219,32 +213,40 @@
     resolved_count = 0
     for task in tasks:
         if task.resolved_between(args.start_date, args.end_date):
-            steps_count += task.steps_between(args.start_date, args.end_date, 
config)
+            steps_count += task.steps_between(args.start_date, args.end_date)
             resolved_count += 1
     return steps_count / float(resolved_count) if resolved_count > 0 else 0
 
 
 if __name__ == '__main__':
     args = parse_arguments()
-    config = get_config(args)
+    CONFIG = get_config(args)
 
-    # Parse the workboard for tasks and their positions.
-    response = requests.get(WORKBOARD_URL_TEMPLATE % config['workboard'])
-    tasks = parse_workboard_html(response.text, config)
-
-    # Get the task transactions from Phabricator.
+    # Create the Phabricator API wrapper.
     phab = phabricator.Phabricator(timeout=500)
-    task_ids = [t.id for t in tasks]
-    transactions = phab.maniphest.gettasktransactions(ids=task_ids)
 
-    # Decorate the tasks witht their transactions.
+    # Get the tasks belonging to the given board.
+    api_tasks = phab.maniphest.query(projectPHIDs=[CONFIG['workboard']])
+    tasks = [
+        Task(api_tasks[task_id]['id'], api_tasks[task_id]['title'])
+        for task_id in api_tasks  # .values() not implemented
+    ]
+
+    # Decorate the tasks with their transactions.
+    task_ids = [int(task.id) for task in tasks]
+    transactions = phab.maniphest.gettasktransactions(ids=task_ids)
     for task in tasks:
-        task.transactions = transactions[str(task.id)]
+        task.set_transactions(transactions[task.id])
+
+    # Print the interval tasks.
+    for task in tasks:
+        if task.resolved_between(args.start_date, args.end_date):
+            print "<%d>\t%s" % (task.points or CONFIG['default_points'], 
task.name)
 
     # Calculate the metrics.
-    points_resolved = calculate_points_resolved(tasks, args, config)
-    points_moved = calculate_points_moved_to_the_right(tasks, args, config)
-    average_steps = calculate_average_steps(tasks, args, config)
+    points_resolved = calculate_points_resolved(tasks, args)
+    points_moved = calculate_points_moved_to_the_right(tasks, args)
+    average_steps = calculate_average_steps(tasks, args)
 
     # Write output.
     print('Date\tPoints resolved\tPoints moved to the right\tAverage steps')
diff --git a/analytics/phab-stats-config.json b/analytics/phab-stats-config.json
index a5b64fe..a6ec945 100644
--- a/analytics/phab-stats-config.json
+++ b/analytics/phab-stats-config.json
@@ -1,5 +1,5 @@
 {
-    "workboard": "1030",
+    "workboard": "PHID-PROJ-mseu6whr6z4kqtzmoahl",
     "columns": [{
             "name": "Next Up",
             "phid": "PHID-PCOL-gj52fgl7wdrdkoazrt6z",
diff --git a/requirements.txt b/requirements.txt
index 5f85296..e9f9373 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
 phabricator >= 0.4.0
-beautifulsoup4 >= 4.4.0
 lxml
 requests

-- 
To view, visit https://gerrit.wikimedia.org/r/279177
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idda3f7f2c0f792c423de32760812e110f2f4373f
Gerrit-PatchSet: 1
Gerrit-Project: analytics/limn-analytics-data
Gerrit-Branch: master
Gerrit-Owner: Mforns <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to