Nuria has submitted this change and it was merged.
Change subject: Fix script after Phabricator layout changes
......................................................................
Fix script after Phabricator layout changes
This change includes:
- Fix the parsing of the tasks that was broken since Phab changed
its layout (tasks are not html any more and can not be scraped).
- Add support to get the task points from the point 'field'.
- Use a global for config.
Bug: T130543
Change-Id: Idda3f7f2c0f792c423de32760812e110f2f4373f
---
M README.md
M analytics/phab-stats
M analytics/phab-stats-config.json
M requirements.txt
4 files changed, 72 insertions(+), 72 deletions(-)
Approvals:
Nuria: Verified; Looks good to me, approved
diff --git a/README.md b/README.md
index fa073bf..465eec8 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
#Usage
```
- ./phab-stats --config ./phab-stats-config.json 2015-09-01 2015-12-31
+ ./phab-stats --config ./phab-stats-config.json 2015-09-01 2016-01-01
```
#To install
@@ -15,7 +15,7 @@
```
sudo pip install -r ./requirements.txt
```
-If lxml gives trouble, uninstall and install again.
+If lxml gives trouble, uninstall and install again.
Please see issue: https://github.com/disqus/python-phabricator/issues/22
You need phabricator 0.0.5 to workarround it
@@ -30,7 +30,7 @@
```
>arc install-certificate https://phabricator.wikimedia.org
- CONNECT Connecting to "https://phabricator.wikimedia.org/api/"...
+CONNECT Connecting to "https://phabricator.wikimedia.org/api/"...
LOGIN TO PHABRICATOR
Open this page in your browser and login to Phabricator if necessary:
@@ -43,4 +43,3 @@
SUCCESS! API Token installed.
```
DONE! You should be able to run the script
-
diff --git a/analytics/phab-stats b/analytics/phab-stats
index e8868af..9ac2ba6 100755
--- a/analytics/phab-stats
+++ b/analytics/phab-stats
@@ -17,7 +17,6 @@
import phabricator
import requests
from datetime import datetime
-from bs4 import BeautifulSoup
WORKBOARD_URL_TEMPLATE =
'https://phabricator.wikimedia.org/project/board/%s/query/all/'
@@ -66,22 +65,36 @@
# The pattern that matches point definitions in task names.
POINTS_REGEXP = re.compile(r'\[([0-9]+)\s*pts\]')
- def __init__(self, id, name, column):
+ def __init__(self, id, name):
self.id = id
self.name = name
- self.column = column
- self.points = self.parse_points(name)
# The transaction list is asigned separately for performance reasons.
- # All task transactions for all the tasks are requested together in
- # the same api call. So, after that, all tasks are decorated with it.
self.transactions = None
+ # The following fields depend on the transactions to be calculated.
+ self.column = None
+ self.points = None
- def parse_points(self, name):
- match = Task.POINTS_REGEXP.search(name, re.I)
+ def set_transactions(self, transactions):
+ """
+ Sets the task's transactions and triggers the
+ calculation of the fields that depend on them.
+ """
+ self.transactions = transactions
+ self.column = self.column_at('now')
+ self.points = self.parse_points()
+
+ def parse_points(self):
+ """
+ Sets the task's points by trying to parse them from its title,
+ or otherwise looking at the transactions to calculate the point field.
+ """
+ match = Task.POINTS_REGEXP.search(self.name, re.I)
if match:
return int(match.group(1))
- else:
- return None
+ for transaction in self.transactions:
+ if transaction['transactionType'] == 'points':
+ return transaction['newValue']
+ return None
def resolved_between(self, start_date, end_date):
"""
@@ -98,26 +111,28 @@
)
return False
- def column_at(self, date, config):
+ def column_at(self, date):
"""
Returns the value of the column in which the task was at the given
date.
+ If the date is 'now', returns the column where the task is currently.
"""
- current_column = self.column
+ current_column = 0 # Board's initial column.
# Transactions are sorted by timestamp, newest to oldest.
- for transaction in self.transactions:
+ # So they need to be reversed.
+ for transaction in reversed(self.transactions):
if transaction['transactionType'] == 'projectcolumn':
timestamp = int(transaction['dateCreated'])
transaction_date = datetime.fromtimestamp(timestamp)
- if transaction_date >= date:
- columns = self.get_transaction_columns(transaction, config)
- current_column = columns['oldValue']
+ if date == 'now' or transaction_date <= date:
+ columns = self.get_transaction_columns(transaction)
+ current_column = columns['newValue']
else:
- # First column change before the given date.
+ # First column change after the given date.
# This means the current_column is the requested value.
break
return current_column
- def steps_between(self, start_date, end_date, config):
+ def steps_between(self, start_date, end_date):
"""
Returns the number of column changes that a task has suffered,
regardless if they are forwards or backwards. Only specified
@@ -127,11 +142,11 @@
steps = 0
for transaction in self.transactions:
if transaction['transactionType'] == 'projectcolumn':
- columns = self.get_transaction_columns(transaction, config)
+ columns = self.get_transaction_columns(transaction)
steps += abs(columns['newValue'] - columns['oldValue'])
return steps
- def get_transaction_columns(self, transaction, config):
+ def get_transaction_columns(self, transaction):
"""
Assumes the transaction is of type 'projectcolumn'.
Returns the transaction's normalized old and new column values.
@@ -149,36 +164,15 @@
column_values[kind] = 0 # Outside of workboard.
continue
# Translate Phabricator column phab-ids into column values.
- if column_phid in config['column_names']:
- column_name = config['column_names'][column_phid]
- column_values[kind] = config['column_values'][column_name]
+ if column_phid in CONFIG['column_names']:
+ column_name = CONFIG['column_names'][column_phid]
+ column_values[kind] = CONFIG['column_values'][column_name]
else:
column_values[kind] = 0 # Outside of workboard.
return column_values
-def parse_workboard_html(workboard_html, config):
- """
- Parses workboard html looking for columns, tasks,
- their titles and ids. Returns a list of task objects.
- """
- soup = BeautifulSoup(workboard_html, 'lxml')
- columns = soup.find_all(class_='phui-workpanel-view')
- tasks = []
- for column in columns:
- column_name = column.find(class_='phui-header-header').strings.next()
- if column_name in config['column_values']:
- column_value = config['column_values'][column_name]
- column_tasks = column.find_all(class_='phui-object-item-name')
- for column_task in column_tasks:
- task_id =
int(column_task.find(class_='phui-object-item-objname').string[1:])
- task_name =
column_task.find(class_='phui-object-item-link').string
- task = Task(task_id, task_name, column_value)
- tasks.append(task)
- return tasks
-
-
-def calculate_points_resolved(tasks, args, config):
+def calculate_points_resolved(tasks, args):
"""
Returns the sum of task points in all tasks that have been
marked as resolved between args.start_date and args.end_date.
@@ -186,11 +180,11 @@
points_resolved = 0
for task in tasks:
if task.resolved_between(args.start_date, args.end_date):
- points_resolved += task.points or config['default_points']
+ points_resolved += task.points or CONFIG['default_points']
return points_resolved
-def calculate_points_moved_to_the_right(tasks, args, config):
+def calculate_points_moved_to_the_right(tasks, args):
"""
For each task, gets the column it was placed at args.start_date and
at args.end_date, and calculates how many columns the task has moved
@@ -198,18 +192,18 @@
the total number of steps defined in the workboard. Returns the sum
of it all.
"""
- total_columns = float(max([c['value'] for c in config['columns']]))
+ total_columns = float(max([c['value'] for c in CONFIG['columns']]))
points_moved_right = 0
for task in tasks:
- initial_column = task.column_at(args.start_date, config)
- final_column = task.column_at(args.end_date, config)
+ initial_column = task.column_at(args.start_date)
+ final_column = task.column_at(args.end_date)
columns_moved = final_column - initial_column
- task_points = task.points or config['default_points']
+ task_points = task.points or CONFIG['default_points']
points_moved_right += columns_moved / total_columns * task_points
return points_moved_right
-def calculate_average_steps(tasks, args, config):
+def calculate_average_steps(tasks, args):
"""
Returns the average number of column changes that the tasks marked
as resolved between args.start_date and args.end_date have seen.
@@ -219,32 +213,40 @@
resolved_count = 0
for task in tasks:
if task.resolved_between(args.start_date, args.end_date):
- steps_count += task.steps_between(args.start_date, args.end_date,
config)
+ steps_count += task.steps_between(args.start_date, args.end_date)
resolved_count += 1
return steps_count / float(resolved_count) if resolved_count > 0 else 0
if __name__ == '__main__':
args = parse_arguments()
- config = get_config(args)
+ CONFIG = get_config(args)
- # Parse the workboard for tasks and their positions.
- response = requests.get(WORKBOARD_URL_TEMPLATE % config['workboard'])
- tasks = parse_workboard_html(response.text, config)
-
- # Get the task transactions from Phabricator.
+ # Create the Phabricator API wrapper.
phab = phabricator.Phabricator(timeout=500)
- task_ids = [t.id for t in tasks]
- transactions = phab.maniphest.gettasktransactions(ids=task_ids)
- # Decorate the tasks witht their transactions.
+ # Get the tasks belonging to the given board.
+ api_tasks = phab.maniphest.query(projectPHIDs=[CONFIG['workboard']])
+ tasks = [
+ Task(api_tasks[task_id]['id'], api_tasks[task_id]['title'])
+ for task_id in api_tasks # .values() not implemented
+ ]
+
+ # Decorate the tasks with their transactions.
+ task_ids = [int(task.id) for task in tasks]
+ transactions = phab.maniphest.gettasktransactions(ids=task_ids)
for task in tasks:
- task.transactions = transactions[str(task.id)]
+ task.set_transactions(transactions[task.id])
+
+ # Print the interval tasks.
+ for task in tasks:
+ if task.resolved_between(args.start_date, args.end_date):
+ print "<%d>\t%s" % (task.points or CONFIG['default_points'],
task.name)
# Calculate the metrics.
- points_resolved = calculate_points_resolved(tasks, args, config)
- points_moved = calculate_points_moved_to_the_right(tasks, args, config)
- average_steps = calculate_average_steps(tasks, args, config)
+ points_resolved = calculate_points_resolved(tasks, args)
+ points_moved = calculate_points_moved_to_the_right(tasks, args)
+ average_steps = calculate_average_steps(tasks, args)
# Write output.
print('Date\tPoints resolved\tPoints moved to the right\tAverage steps')
diff --git a/analytics/phab-stats-config.json b/analytics/phab-stats-config.json
index a5b64fe..a6ec945 100644
--- a/analytics/phab-stats-config.json
+++ b/analytics/phab-stats-config.json
@@ -1,5 +1,5 @@
{
- "workboard": "1030",
+ "workboard": "PHID-PROJ-mseu6whr6z4kqtzmoahl",
"columns": [{
"name": "Next Up",
"phid": "PHID-PCOL-gj52fgl7wdrdkoazrt6z",
diff --git a/requirements.txt b/requirements.txt
index 5f85296..e9f9373 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
phabricator >= 0.4.0
-beautifulsoup4 >= 4.4.0
lxml
requests
--
To view, visit https://gerrit.wikimedia.org/r/279177
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Idda3f7f2c0f792c423de32760812e110f2f4373f
Gerrit-PatchSet: 1
Gerrit-Project: analytics/limn-analytics-data
Gerrit-Branch: master
Gerrit-Owner: Mforns <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits