Revision: 294
Author: bslatkin
Date: Mon Oct 26 13:36:06 2009
Log: fixed handling of multi-line feed IDs
http://code.google.com/p/pubsubhubbub/source/detail?r=294

Modified:
 /trunk/hub/feed_identifier.py
 /trunk/hub/feed_identifier_test.py
 /trunk/hub/main.py
 /trunk/hub/main_test.py

=======================================
--- /trunk/hub/feed_identifier.py       Mon Oct 26 09:29:16 2009
+++ /trunk/hub/feed_identifier.py       Mon Oct 26 13:36:06 2009
@@ -50,7 +50,7 @@
       parser: Instance of the xml.sax parser being used with this handler.
     """
     self.parser = parser
-    self.link = None
+    self.link = []
     self.tag_stack = []
     self.capture_next_element = False

@@ -69,14 +69,21 @@
           self.capture_next_element = True

   def endElement(self, name):
-    if not self.link:
+    if self.link:
+      self.capture_next_element = False
+    else:
       if DEBUG: logging.debug('End stack level %r', name)
       self.tag_stack.pop()

   def characters(self, content):
     if self.capture_next_element:
-      self.capture_next_element = False
-      self.link = content
+      self.link.append(content)
+
+  def get_link(self):
+    if not self.link:
+      return None
+    else:
+      return ''.join(self.link).strip()


 class AtomFeedIdentifier(FeedIdentifier):
@@ -121,7 +128,7 @@
   parser.setEntityResolver(TrivialEntityResolver())
   parser.parse(data_stream)

-  return handler.link
+  return handler.get_link()


 __all__ = ['identify', 'DEBUG']
=======================================
--- /trunk/hub/feed_identifier_test.py  Mon Sep 21 15:18:21 2009
+++ /trunk/hub/feed_identifier_test.py  Mon Oct 26 13:36:06 2009
@@ -52,6 +52,10 @@
     feed_id = feed_identifier.identify(self.load('rss_rdf.xml'), 'atom')
     self.assertTrue(feed_id is None)

+  def testWhitespace(self):
+ feed_id = feed_identifier.identify(self.load('whitespace_id.xml'), 'atom')
+    self.assertEquals('my feed id here', feed_id)
+
   def testBadFormat(self):
     self.assertRaises(xml.sax.SAXParseException,
                       feed_identifier.identify,
=======================================
--- /trunk/hub/main.py  Mon Oct 26 09:49:07 2009
+++ /trunk/hub/main.py  Mon Oct 26 13:36:06 2009
@@ -1543,10 +1543,14 @@
       if feed is None:
         continue

+      fix_feed_id = feed.feed_id
+      if fix_feed_id is not None:
+        fix_feed_id = fix_feed_id.strip()
+
# No expansion for feeds that have no known topic -> feed_id relation, but # record those with KnownFeed as having a mapping from topic -> topic for
       # backwards compatibility with existing production data.
-      if feed.feed_id:
+      if fix_feed_id:
         topics.append(feed.topic)
         feed_ids.append(feed.feed_id)
       else:
@@ -2558,7 +2562,7 @@
     for feed_type in order:
       try:
         feed_id = feed_identifier.identify(response.content, feed_type)
-        if feed_id:
+        if feed_id is not None:
           break
         else:
           parse_failures += 1
=======================================
--- /trunk/hub/main_test.py     Mon Oct 26 09:47:02 2009
+++ /trunk/hub/main_test.py     Mon Oct 26 13:36:06 2009
@@ -408,15 +408,49 @@

     expected = {
       'http://example.com/foobar1':
-          set(['http://example.com/foobar1', u'http://example.com/meep2']),
+          set(['http://example.com/foobar1', 'http://example.com/meep2']),
       'http://example.com/meep2':
-          set([u'http://example.com/foobar1', 'http://example.com/meep2']),
+          set(['http://example.com/foobar1', 'http://example.com/meep2']),
       'http://example.com/blah4':
-          set(['http://example.com/blah4', u'http://example.com/stuff3']),
+          set(['http://example.com/blah4', 'http://example.com/stuff3']),
       'http://example.com/neehaw6':
           set(['http://example.com/neehaw6']),
       'http://example.com/stuff3':
-          set([u'http://example.com/blah4', 'http://example.com/stuff3'])
+          set(['http://example.com/blah4', 'http://example.com/stuff3'])
+    }
+    self.assertEquals(expected, result)
+
+  def testDeriveAdditionalTopicsWhitespace(self):
+    """Tests when the feed ID contains whitespace it is handled correctly.
+
+    This test is only required because the 'feed_identifier' module did not
+    properly strip whitespace in its initial version.
+    """
+    # topic -> feed_id with whitespace
+    feed = KnownFeed.create(self.topic)
+    feed.feed_id = self.feed_id
+    feed.put()
+    KnownFeedIdentity.update(self.feed_id, self.topic)
+
+    # topic2 -> feed_id without whitespace
+    feed = KnownFeed.create(self.topic2)
+    feed.feed_id = '\n  %s  \n \n' % self.feed_id
+    feed.put()
+    KnownFeedIdentity.update(self.feed_id, self.topic2)
+
+    # topic3 -> KnownFeed where feed_id = all whitespace
+    feed = KnownFeed.create(self.topic3)
+    feed.feed_id = '\n  \n \n'
+    feed.put()
+
+    result = KnownFeedIdentity.derive_additional_topics([
+        self.topic, self.topic2, self.topic3])
+
+    expected = {
+        'http://example.com/foobar1':
+ set(['http://example.com/foobar1', 'http://example.com/meep2']),
+        'http://example.com/stuff3':
+            set(['http://example.com/stuff3']),
     }
     self.assertEquals(expected, result)

Reply via email to