Revision: 337
Author: bslatkin
Date: Fri Feb 26 13:52:21 2010
Log: Hub: add url to domain mapping exceptions for shared hosts
http://code.google.com/p/pubsubhubbub/source/detail?r=337
Modified:
/trunk/hub/dos.py
/trunk/hub/dos_test.py
=======================================
--- /trunk/hub/dos.py Fri Feb 26 11:54:31 2010
+++ /trunk/hub/dos.py Fri Feb 26 13:52:21 2010
@@ -182,23 +182,37 @@
# needed for domains like 'appspot.com' that are shared across totally
# different developers.
-# Matches three groups: 1) an IP, 2) a domain, 3) other (eg, localhost)
+# Matches four groups:
+# 1) an IP, 2) a domain prefix, 3) a domain suffix, 4) other (eg,
localhost)
URL_DOMAIN_RE = re.compile(
r'https?://(?:'
r'([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)|' # IP address
- r'(?:(?:[a-zA-Z0-9-]+\.)*([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|' # Domain
+ r'(?:([a-zA-Z0-9-]+\.)*([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|' # Domain
r'([^/]+)' # Anyting else
r')(?:/.*)?') # The rest of the URL
+# Domains where the full domain should be used for any rate limiting or
+# statistics instead of just the suffix due to different developers being
+# present on different URLs.
+DOMAIN_EXCEPTIONS = frozenset([
+ 'amazonaws.com',
+ 'appspot.com',
+ 'heroku.com',
+])
+
def get_url_domain(url):
"""Returns the domain for a URL or 'bad_url if it's not a valid URL."""
match = URL_DOMAIN_RE.match(url)
if match:
- groups = filter(bool, match.groups())
+ groups = list(match.groups())
+ if groups[1] and groups[2] in DOMAIN_EXCEPTIONS:
+ groups[2] = groups[1] + groups[2]
+ groups[1] = None
+ groups = filter(bool, groups)
else:
- groups = tuple()
- return (groups + ('bad_url',))[0]
+ groups = []
+ return (groups + ['bad_url'])[0]
################################################################################
=======================================
--- /trunk/hub/dos_test.py Fri Feb 26 11:54:31 2010
+++ /trunk/hub/dos_test.py Fri Feb 26 13:52:21 2010
@@ -495,6 +495,12 @@
'example.com',
dos.get_url_domain('http://www.example.com'))
+ def testDomainExceptions(self):
+ """Tests that some URLs may use more than the domain suffix."""
+ self.assertEquals(
+ 'example.appspot.com',
+ dos.get_url_domain('http://example.appspot.com/this-is?some=test'))
+
def testIP(self):
"""Tests IP addresses."""
self.assertEquals(