Revision: 337
Author: bslatkin
Date: Fri Feb 26 13:52:21 2010
Log: Hub: add url to domain mapping exceptions for shared hosts
http://code.google.com/p/pubsubhubbub/source/detail?r=337

Modified:
 /trunk/hub/dos.py
 /trunk/hub/dos_test.py

=======================================
--- /trunk/hub/dos.py   Fri Feb 26 11:54:31 2010
+++ /trunk/hub/dos.py   Fri Feb 26 13:52:21 2010
@@ -182,23 +182,37 @@
 # needed for domains like 'appspot.com' that are shared across totally
 # different developers.

-# Matches three groups: 1) an IP, 2) a domain, 3) other (eg, localhost)
+# Matches four groups:
+# 1) an IP, 2) a domain prefix, 3) a domain suffix, 4) other (eg, localhost)
 URL_DOMAIN_RE = re.compile(
     r'https?://(?:'
     r'([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)|'  # IP address
-    r'(?:(?:[a-zA-Z0-9-]+\.)*([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|'  # Domain
+    r'(?:([a-zA-Z0-9-]+\.)*([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|'  # Domain
     r'([^/]+)'  # Anyting else
     r')(?:/.*)?')  # The rest of the URL

+# Domains where the full domain should be used for any rate limiting or
+# statistics instead of just the suffix due to different developers being
+# present on different URLs.
+DOMAIN_EXCEPTIONS = frozenset([
+  'amazonaws.com',
+  'appspot.com',
+  'heroku.com',
+])
+

 def get_url_domain(url):
   """Returns the domain for a URL or 'bad_url if it's not a valid URL."""
   match = URL_DOMAIN_RE.match(url)
   if match:
-    groups = filter(bool, match.groups())
+    groups = list(match.groups())
+    if groups[1] and groups[2] in DOMAIN_EXCEPTIONS:
+      groups[2] = groups[1] + groups[2]
+    groups[1] = None
+    groups = filter(bool, groups)
   else:
-    groups = tuple()
-  return (groups + ('bad_url',))[0]
+    groups = []
+  return (groups + ['bad_url'])[0]

################################################################################

=======================================
--- /trunk/hub/dos_test.py      Fri Feb 26 11:54:31 2010
+++ /trunk/hub/dos_test.py      Fri Feb 26 13:52:21 2010
@@ -495,6 +495,12 @@
         'example.com',
         dos.get_url_domain('http://www.example.com'))

+  def testDomainExceptions(self):
+    """Tests that some URLs may use more than the domain suffix."""
+    self.assertEquals(
+        'example.appspot.com',
+        dos.get_url_domain('http://example.appspot.com/this-is?some=test'))
+
   def testIP(self):
     """Tests IP addresses."""
     self.assertEquals(

Reply via email to