Repository: libcloud Updated Branches: refs/heads/trunk c20f5fe31 -> 743259ec4
Update scrape ec2 pricing script so it can also handle new pricing format (JavaScript JSONP) and fix it so correctly (recursively) sorts the pricing data. Project: http://git-wip-us.apache.org/repos/asf/libcloud/repo Commit: http://git-wip-us.apache.org/repos/asf/libcloud/commit/51d0a7fa Tree: http://git-wip-us.apache.org/repos/asf/libcloud/tree/51d0a7fa Diff: http://git-wip-us.apache.org/repos/asf/libcloud/diff/51d0a7fa Branch: refs/heads/trunk Commit: 51d0a7faf0b5a199dc034c9a2e30bc529ee3ad0c Parents: c20f5fe Author: Tomaz Muraus <to...@apache.org> Authored: Sat Sep 6 21:05:53 2014 +0200 Committer: Tomaz Muraus <to...@apache.org> Committed: Sat Sep 6 21:09:01 2014 +0200 ---------------------------------------------------------------------- contrib/scrape-ec2-prices.py | 63 +++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/libcloud/blob/51d0a7fa/contrib/scrape-ec2-prices.py ---------------------------------------------------------------------- diff --git a/contrib/scrape-ec2-prices.py b/contrib/scrape-ec2-prices.py index 0013232..e8c2b99 100755 --- a/contrib/scrape-ec2-prices.py +++ b/contrib/scrape-ec2-prices.py @@ -18,13 +18,20 @@ # under the License. import os +import re import json import time from collections import defaultdict, OrderedDict import requests +import demjson -ON_DEMAND_LINUX_URL = 'http://aws.amazon.com/ec2/pricing/json/linux-od.json' +LINUX_PRICING_URLS = [ + # Deprecated instances, JSON format + 'http://aws.amazon.com/ec2/pricing/json/linux-od.json', + # Instancances JavaScript files + 'https://a0.awsstatic.com/pricing/1/ec2/linux-od.min.js' +] EC2_REGIONS = [ 'us-east-1', @@ -96,23 +103,34 @@ PRICING_FILE_PATH = os.path.abspath(PRICING_FILE_PATH) def scrape_ec2_pricing(): - response = requests.get(ON_DEMAND_LINUX_URL) - data = response.json() + result = defaultdict(OrderedDict) - regions = data['config']['regions'] + for url in LINUX_PRICING_URLS: + response = requests.get(url) - result = defaultdict(OrderedDict) - for region_data in regions: - region_name = region_data['region'] - libcloud_region_name = REGION_NAME_MAP[region_name] - instance_types = region_data['instanceTypes'] + if re.match('.*?\.json$', url): + data = response.json() + elif re.match('.*?\.js$', url): + data = response.content + match = re.match('^.*callback\((.*?)\);?$', data, + re.MULTILINE | re.DOTALL) + data = match.group(1) + # demjson supports non-strict mode and can parse unquoted objects + data = demjson.decode(data) + + regions = data['config']['regions'] + + for region_data in regions: + region_name = region_data['region'] + libcloud_region_name = REGION_NAME_MAP[region_name] + instance_types = region_data['instanceTypes'] - for instance_type in instance_types: - sizes = instance_type['sizes'] + for instance_type in instance_types: + sizes = instance_type['sizes'] - for size in sizes: - price = size['valueColumns'][0]['prices']['USD'] - result[libcloud_region_name][size['size']] = price + for size in sizes: + price = size['valueColumns'][0]['prices']['USD'] + result[libcloud_region_name][size['size']] = price return result @@ -126,7 +144,7 @@ def update_pricing_file(pricing_file_path, pricing_data): data['compute'].update(pricing_data) # Always sort the pricing info - data = OrderedDict(sorted(data.items())) + data = sort_nested_dict(data) content = json.dumps(data, indent=4) lines = content.splitlines() @@ -137,6 +155,21 @@ def update_pricing_file(pricing_file_path, pricing_data): fp.write(content) +def sort_nested_dict(value): + """ + Recursively sort a nested dict. + """ + result = OrderedDict() + + for key, value in sorted(value.items()): + if isinstance(value, (dict, OrderedDict)): + result[key] = sort_nested_dict(value) + else: + result[key] = value + + return result + + def main(): print('Scraping EC2 pricing data')