Repository: libcloud
Updated Branches:
  refs/heads/trunk c20f5fe31 -> 743259ec4


Update scrape ec2 pricing script so it can also handle new pricing format
(JavaScript JSONP) and fix it so correctly (recursively) sorts the pricing
data.


Project: http://git-wip-us.apache.org/repos/asf/libcloud/repo
Commit: http://git-wip-us.apache.org/repos/asf/libcloud/commit/51d0a7fa
Tree: http://git-wip-us.apache.org/repos/asf/libcloud/tree/51d0a7fa
Diff: http://git-wip-us.apache.org/repos/asf/libcloud/diff/51d0a7fa

Branch: refs/heads/trunk
Commit: 51d0a7faf0b5a199dc034c9a2e30bc529ee3ad0c
Parents: c20f5fe
Author: Tomaz Muraus <to...@apache.org>
Authored: Sat Sep 6 21:05:53 2014 +0200
Committer: Tomaz Muraus <to...@apache.org>
Committed: Sat Sep 6 21:09:01 2014 +0200

----------------------------------------------------------------------
 contrib/scrape-ec2-prices.py | 63 +++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/libcloud/blob/51d0a7fa/contrib/scrape-ec2-prices.py
----------------------------------------------------------------------
diff --git a/contrib/scrape-ec2-prices.py b/contrib/scrape-ec2-prices.py
index 0013232..e8c2b99 100755
--- a/contrib/scrape-ec2-prices.py
+++ b/contrib/scrape-ec2-prices.py
@@ -18,13 +18,20 @@
 #  under the License.
 
 import os
+import re
 import json
 import time
 from collections import defaultdict, OrderedDict
 
 import requests
+import demjson
 
-ON_DEMAND_LINUX_URL = 'http://aws.amazon.com/ec2/pricing/json/linux-od.json'
+LINUX_PRICING_URLS = [
+    # Deprecated instances, JSON format
+    'http://aws.amazon.com/ec2/pricing/json/linux-od.json',
+    # Instancances JavaScript files
+    'https://a0.awsstatic.com/pricing/1/ec2/linux-od.min.js'
+]
 
 EC2_REGIONS = [
     'us-east-1',
@@ -96,23 +103,34 @@ PRICING_FILE_PATH = os.path.abspath(PRICING_FILE_PATH)
 
 
 def scrape_ec2_pricing():
-    response = requests.get(ON_DEMAND_LINUX_URL)
-    data = response.json()
+    result = defaultdict(OrderedDict)
 
-    regions = data['config']['regions']
+    for url in LINUX_PRICING_URLS:
+        response = requests.get(url)
 
-    result = defaultdict(OrderedDict)
-    for region_data in regions:
-        region_name = region_data['region']
-        libcloud_region_name = REGION_NAME_MAP[region_name]
-        instance_types = region_data['instanceTypes']
+        if re.match('.*?\.json$', url):
+            data = response.json()
+        elif re.match('.*?\.js$', url):
+            data = response.content
+            match = re.match('^.*callback\((.*?)\);?$', data,
+                             re.MULTILINE | re.DOTALL)
+            data = match.group(1)
+            # demjson supports non-strict mode and can parse unquoted objects
+            data = demjson.decode(data)
+
+        regions = data['config']['regions']
+
+        for region_data in regions:
+            region_name = region_data['region']
+            libcloud_region_name = REGION_NAME_MAP[region_name]
+            instance_types = region_data['instanceTypes']
 
-        for instance_type in instance_types:
-            sizes = instance_type['sizes']
+            for instance_type in instance_types:
+                sizes = instance_type['sizes']
 
-            for size in sizes:
-                price = size['valueColumns'][0]['prices']['USD']
-                result[libcloud_region_name][size['size']] = price
+                for size in sizes:
+                    price = size['valueColumns'][0]['prices']['USD']
+                    result[libcloud_region_name][size['size']] = price
 
     return result
 
@@ -126,7 +144,7 @@ def update_pricing_file(pricing_file_path, pricing_data):
     data['compute'].update(pricing_data)
 
     # Always sort the pricing info
-    data = OrderedDict(sorted(data.items()))
+    data = sort_nested_dict(data)
 
     content = json.dumps(data, indent=4)
     lines = content.splitlines()
@@ -137,6 +155,21 @@ def update_pricing_file(pricing_file_path, pricing_data):
         fp.write(content)
 
 
+def sort_nested_dict(value):
+    """
+    Recursively sort a nested dict.
+    """
+    result = OrderedDict()
+
+    for key, value in sorted(value.items()):
+        if isinstance(value, (dict, OrderedDict)):
+            result[key] = sort_nested_dict(value)
+        else:
+            result[key] = value
+
+    return result
+
+
 def main():
     print('Scraping EC2 pricing data')
 

Reply via email to