Revision: 342
Author: bslatkin
Date: Mon Mar  1 12:24:09 2010
Log: hub: do single-key stats differently, template tweaks, subdomain exceptions
http://code.google.com/p/pubsubhubbub/source/detail?r=342

Modified:
 /trunk/hub/dos.py
 /trunk/hub/dos_test.py
 /trunk/hub/stats_table.html

=======================================
--- /trunk/hub/dos.py   Mon Mar  1 00:07:24 2010
+++ /trunk/hub/dos.py   Mon Mar  1 12:24:09 2010
@@ -187,19 +187,29 @@
 URL_DOMAIN_RE = re.compile(
     r'https?://(?:'
     r'([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)|'  # IP address
-    r'((?:[a-zA-Z0-9-]+\.)+[a-zA-Z0-9-]+)|'  # Domain
+    r'(?:((?:[a-zA-Z0-9-]+\.)*)([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|'  # Domain
     r'([^/]+)'  # Anyting else
     r')(?:/.*)?')  # The rest of the URL

+# Domains where only the suffix is important.
+DOMAIN_SUFFIX_EXCEPTIONS = frozenset([
+  'blogspot.com',
+  'livejournal.com',
+])
+

 def get_url_domain(url):
   """Returns the domain for a URL or 'bad_url if it's not a valid URL."""
   match = URL_DOMAIN_RE.match(url)
   if match:
-    groups = filter(bool, match.groups())
+    groups = list(match.groups())
+ if groups[1] and groups[2] and groups[2] not in DOMAIN_SUFFIX_EXCEPTIONS:
+      groups[2] = groups[1] + groups[2]
+    groups[1] = None
+    groups = filter(bool, groups)
   else:
-    groups = tuple()
-  return (groups + ('bad_url',))[0]
+    groups = []
+  return (groups + ['bad_url'])[0]

################################################################################

@@ -642,6 +652,14 @@
     """
     return self.sample_dict.get(key, [])

+  def set_single_sample(self, key):
+    """Sets that this result is for a single key.
+
+    Args:
+      key: The sampling key.
+    """
+    self.total_samples = self.get_count(key)
+
   def sample_objects(self):
"""Gets the contents of this result object for use in template rendering.

@@ -811,9 +829,6 @@
       key, when_encoded, value_encoded = (
           combined_value.rsplit(':', 2) + ['', '', ''])[:3]
       if single_key is not None and single_key != key:
-        # Must decement the overall count in the result to prevent us from
-        # leaking the total number of samples made thus far.
-        results.total_samples -= 1
         continue

       if len(when_encoded) != 4:
@@ -828,6 +843,11 @@
           (start_time + config.period + config.tolerance)):
         results.add(key, when, value)

+ # For a single sample we need to set the counter to the number of unique
+    # samples so we don't leak the overall QPS being pushed for this event.
+    if single_key is not None:
+      results.set_single_sample(single_key)
+
     return results

################################################################################
=======================================
--- /trunk/hub/dos_test.py      Mon Mar  1 00:07:24 2010
+++ /trunk/hub/dos_test.py      Mon Mar  1 12:24:09 2010
@@ -498,8 +498,8 @@
   def testDomainExceptions(self):
     """Tests that some URLs may use more than the domain suffix."""
     self.assertEquals(
-        'example.appspot.com',
-        dos.get_url_domain('http://example.appspot.com/this-is?some=test'))
+        'blogspot.com',
+ dos.get_url_domain('http://example.blogspot.com/this-is?some=test'))

   def testIP(self):
     """Tests IP addresses."""
@@ -1042,7 +1042,7 @@
   def testGetSingleKey(self):
     """Tests getting the stats for a single key."""
     config = dos.ReservoirConfig(
-        'always',
+        'single-sample',
         period=300,
         rate=1,
         samples=10000,
@@ -1053,23 +1053,27 @@
     reporter.set(self.url1, config)
     reporter.set(self.url2, config)
     reporter.set(self.url3, config)
+    reporter.set(self.url3 + '&okay=1', config)
+    reporter.set(self.url3 + '&okay=2', config)
+    reporter.set(self.url3 + '&okay=3', config)
+    reporter.set(self.url3 + '&okay=4', config)
     reporter.set(self.url4, config)
     reporter.set(self.url5, config)
     self.gettime_results.extend([0, 10, 10])
     sampler.sample(reporter)
     results = sampler.get(config)
-    self.assertEquals(5, results.total_samples)
-    self.assertEquals(5, results.unique_samples)
+    self.assertEquals(9, results.total_samples)
+    self.assertEquals(9, results.unique_samples)
     self.verify_sample(results, self.domainA, 1, 0.1)
-    self.verify_sample(results, self.domainB, 2, 0.2)
+    self.verify_sample(results, self.domainB, 6, 0.6)
     self.verify_sample(results, self.domainC, 1, 0.1)
     self.verify_sample(results, self.domainD, 1, 0.1)

-    results = sampler.get(config, self.domainA)
-    self.assertEquals(1, results.total_samples)
-    self.assertEquals(1, results.unique_samples)
-    self.verify_sample(results, self.domainA, 1, 0.1)
-    self.verify_no_sample(results, self.domainB)
+    results = sampler.get(config, self.domainB)
+    self.assertEquals(6, results.total_samples)
+    self.assertEquals(6, results.unique_samples)
+    self.verify_sample(results, self.domainB, 6, 0.6)
+    self.verify_no_sample(results, self.domainA)
     self.verify_no_sample(results, self.domainC)
     self.verify_no_sample(results, self.domainD)

=======================================
--- /trunk/hub/stats_table.html Sun Feb 28 23:22:07 2010
+++ /trunk/hub/stats_table.html Mon Mar  1 12:24:09 2010
@@ -22,10 +22,10 @@
   <tr align="right">
     <td align="left">{{sample.key|escape}}</td>
     <td>{{sample.count}}</td>
-    <td>{{sample.frequency|floatformat:"-4"}}/sec</td>
-    <td>{{sample.min|floatformat:"-4"}}</td>
-    <td>{{sample.max|floatformat:"-4"}}</td>
-    <td>{{sample.average|floatformat:"-4"}} {{result.value_units}}</td>
+    <td>{{sample.frequency|floatformat:"-2"}}/sec</td>
+    <td>{{sample.min|floatformat:"-2"}}</td>
+    <td>{{sample.max|floatformat:"-2"}}</td>
+    <td>{{sample.average|floatformat:"-2"}} {{result.value_units}}</td>
   </tr>
   {% endfor %}
 </table>

Reply via email to