Revision: 342
Author: bslatkin
Date: Mon Mar 1 12:24:09 2010
Log: hub: do single-key stats differently, template tweaks, subdomain
exceptions
http://code.google.com/p/pubsubhubbub/source/detail?r=342
Modified:
/trunk/hub/dos.py
/trunk/hub/dos_test.py
/trunk/hub/stats_table.html
=======================================
--- /trunk/hub/dos.py Mon Mar 1 00:07:24 2010
+++ /trunk/hub/dos.py Mon Mar 1 12:24:09 2010
@@ -187,19 +187,29 @@
URL_DOMAIN_RE = re.compile(
r'https?://(?:'
r'([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)|' # IP address
- r'((?:[a-zA-Z0-9-]+\.)+[a-zA-Z0-9-]+)|' # Domain
+ r'(?:((?:[a-zA-Z0-9-]+\.)*)([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|' # Domain
r'([^/]+)' # Anyting else
r')(?:/.*)?') # The rest of the URL
+# Domains where only the suffix is important.
+DOMAIN_SUFFIX_EXCEPTIONS = frozenset([
+ 'blogspot.com',
+ 'livejournal.com',
+])
+
def get_url_domain(url):
"""Returns the domain for a URL or 'bad_url if it's not a valid URL."""
match = URL_DOMAIN_RE.match(url)
if match:
- groups = filter(bool, match.groups())
+ groups = list(match.groups())
+ if groups[1] and groups[2] and groups[2] not in
DOMAIN_SUFFIX_EXCEPTIONS:
+ groups[2] = groups[1] + groups[2]
+ groups[1] = None
+ groups = filter(bool, groups)
else:
- groups = tuple()
- return (groups + ('bad_url',))[0]
+ groups = []
+ return (groups + ['bad_url'])[0]
################################################################################
@@ -642,6 +652,14 @@
"""
return self.sample_dict.get(key, [])
+ def set_single_sample(self, key):
+ """Sets that this result is for a single key.
+
+ Args:
+ key: The sampling key.
+ """
+ self.total_samples = self.get_count(key)
+
def sample_objects(self):
"""Gets the contents of this result object for use in template
rendering.
@@ -811,9 +829,6 @@
key, when_encoded, value_encoded = (
combined_value.rsplit(':', 2) + ['', '', ''])[:3]
if single_key is not None and single_key != key:
- # Must decement the overall count in the result to prevent us from
- # leaking the total number of samples made thus far.
- results.total_samples -= 1
continue
if len(when_encoded) != 4:
@@ -828,6 +843,11 @@
(start_time + config.period + config.tolerance)):
results.add(key, when, value)
+ # For a single sample we need to set the counter to the number of
unique
+ # samples so we don't leak the overall QPS being pushed for this event.
+ if single_key is not None:
+ results.set_single_sample(single_key)
+
return results
################################################################################
=======================================
--- /trunk/hub/dos_test.py Mon Mar 1 00:07:24 2010
+++ /trunk/hub/dos_test.py Mon Mar 1 12:24:09 2010
@@ -498,8 +498,8 @@
def testDomainExceptions(self):
"""Tests that some URLs may use more than the domain suffix."""
self.assertEquals(
- 'example.appspot.com',
- dos.get_url_domain('http://example.appspot.com/this-is?some=test'))
+ 'blogspot.com',
+
dos.get_url_domain('http://example.blogspot.com/this-is?some=test'))
def testIP(self):
"""Tests IP addresses."""
@@ -1042,7 +1042,7 @@
def testGetSingleKey(self):
"""Tests getting the stats for a single key."""
config = dos.ReservoirConfig(
- 'always',
+ 'single-sample',
period=300,
rate=1,
samples=10000,
@@ -1053,23 +1053,27 @@
reporter.set(self.url1, config)
reporter.set(self.url2, config)
reporter.set(self.url3, config)
+ reporter.set(self.url3 + '&okay=1', config)
+ reporter.set(self.url3 + '&okay=2', config)
+ reporter.set(self.url3 + '&okay=3', config)
+ reporter.set(self.url3 + '&okay=4', config)
reporter.set(self.url4, config)
reporter.set(self.url5, config)
self.gettime_results.extend([0, 10, 10])
sampler.sample(reporter)
results = sampler.get(config)
- self.assertEquals(5, results.total_samples)
- self.assertEquals(5, results.unique_samples)
+ self.assertEquals(9, results.total_samples)
+ self.assertEquals(9, results.unique_samples)
self.verify_sample(results, self.domainA, 1, 0.1)
- self.verify_sample(results, self.domainB, 2, 0.2)
+ self.verify_sample(results, self.domainB, 6, 0.6)
self.verify_sample(results, self.domainC, 1, 0.1)
self.verify_sample(results, self.domainD, 1, 0.1)
- results = sampler.get(config, self.domainA)
- self.assertEquals(1, results.total_samples)
- self.assertEquals(1, results.unique_samples)
- self.verify_sample(results, self.domainA, 1, 0.1)
- self.verify_no_sample(results, self.domainB)
+ results = sampler.get(config, self.domainB)
+ self.assertEquals(6, results.total_samples)
+ self.assertEquals(6, results.unique_samples)
+ self.verify_sample(results, self.domainB, 6, 0.6)
+ self.verify_no_sample(results, self.domainA)
self.verify_no_sample(results, self.domainC)
self.verify_no_sample(results, self.domainD)
=======================================
--- /trunk/hub/stats_table.html Sun Feb 28 23:22:07 2010
+++ /trunk/hub/stats_table.html Mon Mar 1 12:24:09 2010
@@ -22,10 +22,10 @@
<tr align="right">
<td align="left">{{sample.key|escape}}</td>
<td>{{sample.count}}</td>
- <td>{{sample.frequency|floatformat:"-4"}}/sec</td>
- <td>{{sample.min|floatformat:"-4"}}</td>
- <td>{{sample.max|floatformat:"-4"}}</td>
- <td>{{sample.average|floatformat:"-4"}} {{result.value_units}}</td>
+ <td>{{sample.frequency|floatformat:"-2"}}/sec</td>
+ <td>{{sample.min|floatformat:"-2"}}</td>
+ <td>{{sample.max|floatformat:"-2"}}</td>
+ <td>{{sample.average|floatformat:"-2"}} {{result.value_units}}</td>
</tr>
{% endfor %}
</table>