Revision: 291
Author: bslatkin
Date: Mon Oct 26 09:29:16 2009
Log: small runtime error fixes
http://code.google.com/p/pubsubhubbub/source/detail?r=291

Added:
 /trunk/hub/feed_diff_testdata/xhtml_entities.xml
Modified:
 /trunk/hub/feed_identifier.py
 /trunk/hub/main.py
 /trunk/hub/main_test.py

=======================================
--- /dev/null
+++ /trunk/hub/feed_diff_testdata/xhtml_entities.xml Mon Oct 26 09:29:16 2009
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "does_not_exist">
+<html><body>&copy;</body></html>
=======================================
--- /trunk/hub/feed_identifier.py       Mon Sep 21 21:38:43 2009
+++ /trunk/hub/feed_identifier.py       Mon Oct 26 09:29:16 2009
@@ -31,6 +31,13 @@
 DEBUG = False


+class TrivialEntityResolver(xml.sax.handler.EntityResolver):
+  """Pass-through entity resolver."""
+
+  def resolveEntity(self, publicId, systemId):
+    return cStringIO.StringIO()
+
+
 class FeedIdentifier(xml.sax.handler.ContentHandler):
   """Base SAX content handler for identifying feeds."""

@@ -111,6 +118,7 @@
     assert False, 'Invalid feed format "%s"' % format

   parser.setContentHandler(handler)
+  parser.setEntityResolver(TrivialEntityResolver())
   parser.parse(data_stream)

   return handler.link
=======================================
--- /trunk/hub/main.py  Wed Sep 23 23:32:10 2009
+++ /trunk/hub/main.py  Mon Oct 26 09:29:16 2009
@@ -1536,14 +1536,15 @@
     output_dict = {}
     known_feeds = KnownFeed.get([KnownFeed.create_key(t) for t in topics])

- # No expansion for feeds that have no known topic -> feed_id relation, but - # record those with KnownFeed as having a mapping from topic -> topic for
-    # backwards compatibility with existing production data.
     topics = []
     feed_ids = []
     for feed in known_feeds:
       if feed is None:
         continue
+
+ # No expansion for feeds that have no known topic -> feed_id relation, but + # record those with KnownFeed as having a mapping from topic -> topic for
+      # backwards compatibility with existing production data.
       if feed.feed_id:
         topics.append(feed.topic)
         feed_ids.append(feed.feed_id)
@@ -2150,8 +2151,10 @@
       parse_failures += 1

   if parse_failures == len(order):
-    logging.error('Could not parse feed content:\n%s', error_traceback)
-    return False
+    logging.error('Could not parse feed; giving up:\n%s', error_traceback)
+    # That's right, we return True. This will cause the fetch to be
+    # abandoned on parse failures because the feed is beyond hope!
+    return True

# If we have more entities than we'd like to handle, only save a subset of
   # them and force this task to retry as if it failed. This will cause two
@@ -2543,7 +2546,7 @@
         else:
           parse_failures += 1
           error_traceback = 'Could not determine feed_id'
-      except xml.sax.SAXException:
+      except Exception:
         error_traceback = traceback.format_exc()
         logging.debug(
'Could not parse feed for content of %d bytes in format "%s":\n%s',
=======================================
--- /trunk/hub/main_test.py     Wed Sep 23 22:30:26 2009
+++ /trunk/hub/main_test.py     Mon Oct 26 09:29:16 2009
@@ -1643,12 +1643,12 @@
     self.handle('post', ('topic', self.topic))

     feed = FeedToFetch.get_by_key_name(get_hash_key_name(self.topic))
-    self.assertEquals(1, feed.fetching_failures)
+    self.assertTrue(feed is None)

     testutil.get_tasks(main.EVENT_QUEUE, expected_count=0)
     tasks = testutil.get_tasks(main.FEED_QUEUE, expected_count=1)
- tasks.extend(testutil.get_tasks(main.FEED_RETRIES_QUEUE, expected_count=1)) - self.assertEquals([self.topic] * 2, [t['params']['topic'] for t in tasks]) + tasks.extend(testutil.get_tasks(main.FEED_RETRIES_QUEUE, expected_count=0))
+    self.assertEquals([self.topic], [t['params']['topic'] for t in tasks])

   def testCacheHit(self):
"""Tests when the fetched feed matches the last cached version of it."""
@@ -1954,7 +1954,7 @@
         'get', topic, 200, 'this does not parse')
     self.handle('post', ('topic', topic))
     feed = FeedToFetch.get_by_key_name(get_hash_key_name(topic))
-    self.assertEquals(1, feed.fetching_failures)
+    self.assertTrue(feed is None)

   def testPullBadFeed(self):
     """Tests when the content parses, but is not a good Atom document."""
@@ -1967,7 +1967,7 @@
     urlfetch_test_stub.instance.expect('get', topic, 200, data)
     self.handle('post', ('topic', topic))
     feed = FeedToFetch.get_by_key_name(get_hash_key_name(topic))
-    self.assertEquals(1, feed.fetching_failures)
+    self.assertTrue(feed is None)

   def testPullGoodAtom(self):
     """Tests when the Atom XML can parse just fine."""

Reply via email to