Revision: 291
Author: bslatkin
Date: Mon Oct 26 09:29:16 2009
Log: small runtime error fixes
http://code.google.com/p/pubsubhubbub/source/detail?r=291
Added:
/trunk/hub/feed_diff_testdata/xhtml_entities.xml
Modified:
/trunk/hub/feed_identifier.py
/trunk/hub/main.py
/trunk/hub/main_test.py
=======================================
--- /dev/null
+++ /trunk/hub/feed_diff_testdata/xhtml_entities.xml Mon Oct 26 09:29:16
2009
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0
Transitional//EN" "does_not_exist">
+<html><body>©</body></html>
=======================================
--- /trunk/hub/feed_identifier.py Mon Sep 21 21:38:43 2009
+++ /trunk/hub/feed_identifier.py Mon Oct 26 09:29:16 2009
@@ -31,6 +31,13 @@
DEBUG = False
+class TrivialEntityResolver(xml.sax.handler.EntityResolver):
+ """Pass-through entity resolver."""
+
+ def resolveEntity(self, publicId, systemId):
+ return cStringIO.StringIO()
+
+
class FeedIdentifier(xml.sax.handler.ContentHandler):
"""Base SAX content handler for identifying feeds."""
@@ -111,6 +118,7 @@
assert False, 'Invalid feed format "%s"' % format
parser.setContentHandler(handler)
+ parser.setEntityResolver(TrivialEntityResolver())
parser.parse(data_stream)
return handler.link
=======================================
--- /trunk/hub/main.py Wed Sep 23 23:32:10 2009
+++ /trunk/hub/main.py Mon Oct 26 09:29:16 2009
@@ -1536,14 +1536,15 @@
output_dict = {}
known_feeds = KnownFeed.get([KnownFeed.create_key(t) for t in topics])
- # No expansion for feeds that have no known topic -> feed_id relation,
but
- # record those with KnownFeed as having a mapping from topic -> topic
for
- # backwards compatibility with existing production data.
topics = []
feed_ids = []
for feed in known_feeds:
if feed is None:
continue
+
+ # No expansion for feeds that have no known topic -> feed_id
relation, but
+ # record those with KnownFeed as having a mapping from topic ->
topic for
+ # backwards compatibility with existing production data.
if feed.feed_id:
topics.append(feed.topic)
feed_ids.append(feed.feed_id)
@@ -2150,8 +2151,10 @@
parse_failures += 1
if parse_failures == len(order):
- logging.error('Could not parse feed content:\n%s', error_traceback)
- return False
+ logging.error('Could not parse feed; giving up:\n%s', error_traceback)
+ # That's right, we return True. This will cause the fetch to be
+ # abandoned on parse failures because the feed is beyond hope!
+ return True
# If we have more entities than we'd like to handle, only save a subset
of
# them and force this task to retry as if it failed. This will cause two
@@ -2543,7 +2546,7 @@
else:
parse_failures += 1
error_traceback = 'Could not determine feed_id'
- except xml.sax.SAXException:
+ except Exception:
error_traceback = traceback.format_exc()
logging.debug(
'Could not parse feed for content of %d bytes in
format "%s":\n%s',
=======================================
--- /trunk/hub/main_test.py Wed Sep 23 22:30:26 2009
+++ /trunk/hub/main_test.py Mon Oct 26 09:29:16 2009
@@ -1643,12 +1643,12 @@
self.handle('post', ('topic', self.topic))
feed = FeedToFetch.get_by_key_name(get_hash_key_name(self.topic))
- self.assertEquals(1, feed.fetching_failures)
+ self.assertTrue(feed is None)
testutil.get_tasks(main.EVENT_QUEUE, expected_count=0)
tasks = testutil.get_tasks(main.FEED_QUEUE, expected_count=1)
- tasks.extend(testutil.get_tasks(main.FEED_RETRIES_QUEUE,
expected_count=1))
- self.assertEquals([self.topic] * 2, [t['params']['topic'] for t in
tasks])
+ tasks.extend(testutil.get_tasks(main.FEED_RETRIES_QUEUE,
expected_count=0))
+ self.assertEquals([self.topic], [t['params']['topic'] for t in tasks])
def testCacheHit(self):
"""Tests when the fetched feed matches the last cached version of
it."""
@@ -1954,7 +1954,7 @@
'get', topic, 200, 'this does not parse')
self.handle('post', ('topic', topic))
feed = FeedToFetch.get_by_key_name(get_hash_key_name(topic))
- self.assertEquals(1, feed.fetching_failures)
+ self.assertTrue(feed is None)
def testPullBadFeed(self):
"""Tests when the content parses, but is not a good Atom document."""
@@ -1967,7 +1967,7 @@
urlfetch_test_stub.instance.expect('get', topic, 200, data)
self.handle('post', ('topic', topic))
feed = FeedToFetch.get_by_key_name(get_hash_key_name(topic))
- self.assertEquals(1, feed.fetching_failures)
+ self.assertTrue(feed is None)
def testPullGoodAtom(self):
"""Tests when the Atom XML can parse just fine."""