Revision: 294
Author: bslatkin
Date: Mon Oct 26 13:36:06 2009
Log: fixed handling of multi-line feed IDs
http://code.google.com/p/pubsubhubbub/source/detail?r=294
Modified:
/trunk/hub/feed_identifier.py
/trunk/hub/feed_identifier_test.py
/trunk/hub/main.py
/trunk/hub/main_test.py
=======================================
--- /trunk/hub/feed_identifier.py Mon Oct 26 09:29:16 2009
+++ /trunk/hub/feed_identifier.py Mon Oct 26 13:36:06 2009
@@ -50,7 +50,7 @@
parser: Instance of the xml.sax parser being used with this handler.
"""
self.parser = parser
- self.link = None
+ self.link = []
self.tag_stack = []
self.capture_next_element = False
@@ -69,14 +69,21 @@
self.capture_next_element = True
def endElement(self, name):
- if not self.link:
+ if self.link:
+ self.capture_next_element = False
+ else:
if DEBUG: logging.debug('End stack level %r', name)
self.tag_stack.pop()
def characters(self, content):
if self.capture_next_element:
- self.capture_next_element = False
- self.link = content
+ self.link.append(content)
+
+ def get_link(self):
+ if not self.link:
+ return None
+ else:
+ return ''.join(self.link).strip()
class AtomFeedIdentifier(FeedIdentifier):
@@ -121,7 +128,7 @@
parser.setEntityResolver(TrivialEntityResolver())
parser.parse(data_stream)
- return handler.link
+ return handler.get_link()
__all__ = ['identify', 'DEBUG']
=======================================
--- /trunk/hub/feed_identifier_test.py Mon Sep 21 15:18:21 2009
+++ /trunk/hub/feed_identifier_test.py Mon Oct 26 13:36:06 2009
@@ -52,6 +52,10 @@
feed_id = feed_identifier.identify(self.load('rss_rdf.xml'), 'atom')
self.assertTrue(feed_id is None)
+ def testWhitespace(self):
+ feed_id =
feed_identifier.identify(self.load('whitespace_id.xml'), 'atom')
+ self.assertEquals('my feed id here', feed_id)
+
def testBadFormat(self):
self.assertRaises(xml.sax.SAXParseException,
feed_identifier.identify,
=======================================
--- /trunk/hub/main.py Mon Oct 26 09:49:07 2009
+++ /trunk/hub/main.py Mon Oct 26 13:36:06 2009
@@ -1543,10 +1543,14 @@
if feed is None:
continue
+ fix_feed_id = feed.feed_id
+ if fix_feed_id is not None:
+ fix_feed_id = fix_feed_id.strip()
+
# No expansion for feeds that have no known topic -> feed_id
relation, but
# record those with KnownFeed as having a mapping from topic ->
topic for
# backwards compatibility with existing production data.
- if feed.feed_id:
+ if fix_feed_id:
topics.append(feed.topic)
feed_ids.append(feed.feed_id)
else:
@@ -2558,7 +2562,7 @@
for feed_type in order:
try:
feed_id = feed_identifier.identify(response.content, feed_type)
- if feed_id:
+ if feed_id is not None:
break
else:
parse_failures += 1
=======================================
--- /trunk/hub/main_test.py Mon Oct 26 09:47:02 2009
+++ /trunk/hub/main_test.py Mon Oct 26 13:36:06 2009
@@ -408,15 +408,49 @@
expected = {
'http://example.com/foobar1':
- set(['http://example.com/foobar1', u'http://example.com/meep2']),
+ set(['http://example.com/foobar1', 'http://example.com/meep2']),
'http://example.com/meep2':
- set([u'http://example.com/foobar1', 'http://example.com/meep2']),
+ set(['http://example.com/foobar1', 'http://example.com/meep2']),
'http://example.com/blah4':
- set(['http://example.com/blah4', u'http://example.com/stuff3']),
+ set(['http://example.com/blah4', 'http://example.com/stuff3']),
'http://example.com/neehaw6':
set(['http://example.com/neehaw6']),
'http://example.com/stuff3':
- set([u'http://example.com/blah4', 'http://example.com/stuff3'])
+ set(['http://example.com/blah4', 'http://example.com/stuff3'])
+ }
+ self.assertEquals(expected, result)
+
+ def testDeriveAdditionalTopicsWhitespace(self):
+ """Tests when the feed ID contains whitespace it is handled correctly.
+
+ This test is only required because the 'feed_identifier' module did not
+ properly strip whitespace in its initial version.
+ """
+ # topic -> feed_id with whitespace
+ feed = KnownFeed.create(self.topic)
+ feed.feed_id = self.feed_id
+ feed.put()
+ KnownFeedIdentity.update(self.feed_id, self.topic)
+
+ # topic2 -> feed_id without whitespace
+ feed = KnownFeed.create(self.topic2)
+ feed.feed_id = '\n %s \n \n' % self.feed_id
+ feed.put()
+ KnownFeedIdentity.update(self.feed_id, self.topic2)
+
+ # topic3 -> KnownFeed where feed_id = all whitespace
+ feed = KnownFeed.create(self.topic3)
+ feed.feed_id = '\n \n \n'
+ feed.put()
+
+ result = KnownFeedIdentity.derive_additional_topics([
+ self.topic, self.topic2, self.topic3])
+
+ expected = {
+ 'http://example.com/foobar1':
+
set(['http://example.com/foobar1', 'http://example.com/meep2']),
+ 'http://example.com/stuff3':
+ set(['http://example.com/stuff3']),
}
self.assertEquals(expected, result)