Revision: 325
Author: bslatkin
Date: Wed Feb  3 14:44:17 2010
Log: hub: removed indexed property from FeedEntryRecord model-- save space
http://code.google.com/p/pubsubhubbub/source/detail?r=325

Modified:
 /trunk/hub/main.py
 /trunk/hub/main_test.py

=======================================
--- /trunk/hub/main.py  Wed Feb  3 10:35:39 2010
+++ /trunk/hub/main.py  Wed Feb  3 14:44:17 2010
@@ -926,16 +926,14 @@
     return headers


-class FeedEntryRecord(db.Model):
+class FeedEntryRecord(db.Expando):
   """Represents a feed entry that has been seen.

The key name of this entity is a get_hash_key_name() hash of the combination
   of the topic URL and the entry_id.
   """
-
- entry_id = db.TextProperty(required=True) # To allow 500+ length entry IDs.
-  entry_id_hash = db.StringProperty(required=True)
-  entry_content_hash = db.StringProperty()
+  entry_id_hash = db.StringProperty(required=True, indexed=False)
+  entry_content_hash = db.StringProperty(indexed=False)
   update_time = db.DateTimeProperty(auto_now=True)

   @classmethod
@@ -992,7 +990,6 @@
     key = cls.create_key(topic, entry_id)
     return cls(key_name=key.name(),
                parent=key.parent(),
-               entry_id=entry_id,
                entry_id_hash=sha1_hash(entry_id),
                entry_content_hash=content_hash)

@@ -1904,7 +1901,7 @@
     existing_entries.extend(FeedEntryRecord.get_entries_for_topic(
         topic, key_set))

-  existing_dict = dict((e.entry_id, e.entry_content_hash)
+  existing_dict = dict((e.entry_id_hash, e.entry_content_hash)
                        for e in existing_entries if e)
logging.debug('Retrieved %d feed entries, %d of which have been seen before',
                 len(entries_map), len(existing_dict))
@@ -1913,9 +1910,10 @@
   entry_payloads = []
   for entry_id, new_content in entries_map.iteritems():
     new_content_hash = sha1_hash(new_content)
+    new_entry_id_hash = sha1_hash(entry_id)
     # Mark the entry as new if the sha1 hash is different.
     try:
-      old_content_hash = existing_dict[entry_id]
+      old_content_hash = existing_dict[new_entry_id_hash]
       if old_content_hash == new_content_hash:
         continue
     except KeyError:
=======================================
--- /trunk/hub/main_test.py     Wed Feb  3 10:26:36 2010
+++ /trunk/hub/main_test.py     Wed Feb  3 14:44:17 2010
@@ -1376,13 +1376,14 @@
   @staticmethod
   def get_entry(entry_id, entry_list):
     """Finds the entry with the given ID in the list of entries."""
-    return [e for e in entry_list if e.entry_id == entry_id][0]
+ return [e for e in entry_list if e.entry_id_hash == sha1_hash(entry_id)][0]

   def testAllNewContent(self):
     """Tests when al pulled feed content is new."""
     entry_list, entry_payloads = self.run_test()
-    entry_id_set = set(f.entry_id for f in entry_list)
-    self.assertEquals(set(self.entries_map.keys()), entry_id_set)
+    entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+    self.assertEquals(set(sha1_hash(k) for k in self.entries_map.keys()),
+                      entry_id_hash_set)
     self.assertEquals(self.entries_map.values(), entry_payloads)

   def testSomeExistingEntries(self):
@@ -1393,8 +1394,8 @@
         self.topic, 'id2', sha1_hash('content2')).put()

     entry_list, entry_payloads = self.run_test()
-    entry_id_set = set(f.entry_id for f in entry_list)
-    self.assertEquals(set(['id3']), entry_id_set)
+    entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+ self.assertEquals(set(sha1_hash(k) for k in ['id3']), entry_id_hash_set)
     self.assertEquals(['content3'], entry_payloads)

   def testPulledEntryNewer(self):
@@ -1406,8 +1407,9 @@
     self.entries_map['id1'] = 'newcontent1'

     entry_list, entry_payloads = self.run_test()
-    entry_id_set = set(f.entry_id for f in entry_list)
-    self.assertEquals(set(['id1', 'id3']), entry_id_set)
+    entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+    self.assertEquals(set(sha1_hash(k) for k in ['id1', 'id3']),
+                      entry_id_hash_set)

     # Verify the old entry would be overwritten.
     entry1 = self.get_entry('id1', entry_list)
@@ -1418,8 +1420,9 @@
     """Tests when the content contains unicode characters."""
     self.entries_map['id2'] = u'\u2019 asdf'
     entry_list, entry_payloads = self.run_test()
-    entry_id_set = set(f.entry_id for f in entry_list)
-    self.assertEquals(set(self.entries_map.keys()), entry_id_set)
+    entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+    self.assertEquals(set(sha1_hash(k) for k in self.entries_map.keys()),
+                      entry_id_hash_set)

   def testMultipleParallelBatches(self):
"""Tests that retrieving FeedEntryRecords is done in multiple batches."""
@@ -1435,8 +1438,9 @@
     main.MAX_FEED_ENTRY_RECORD_LOOKUPS = 1
     try:
       entry_list, entry_payloads = self.run_test()
-      entry_id_set = set(f.entry_id for f in entry_list)
-      self.assertEquals(set(self.entries_map.keys()), entry_id_set)
+      entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+      self.assertEquals(set(sha1_hash(k) for k in self.entries_map.keys()),
+                        entry_id_hash_set)
       self.assertEquals(self.entries_map.values(), entry_payloads)
       self.assertEquals(3, calls[0])
     finally:
@@ -1510,7 +1514,9 @@
     # EventToDeliver and FeedRecord.
     feed_entries = FeedEntryRecord.get_entries_for_topic(
         self.topic, self.all_ids)
-    self.assertEquals(self.all_ids, [e.entry_id for e in feed_entries])
+    self.assertEquals(
+        [sha1_hash(k) for k in self.all_ids],
+        [e.entry_id_hash for e in feed_entries])

     work = EventToDeliver.all().get()
     event_key = work.key()
@@ -1543,7 +1549,9 @@

     feed_entries = FeedEntryRecord.get_entries_for_topic(
         self.topic, self.all_ids)
-    self.assertEquals(self.all_ids, [e.entry_id for e in feed_entries])
+    self.assertEquals(
+        [sha1_hash(k) for k in self.all_ids],
+        [e.entry_id_hash for e in feed_entries])

     work = EventToDeliver.all().get()
     event_key = work.key()
@@ -1576,7 +1584,9 @@

     feed_entries = FeedEntryRecord.get_entries_for_topic(
         self.topic, self.all_ids)
-    self.assertEquals(self.all_ids, [e.entry_id for e in feed_entries])
+    self.assertEquals(
+        [sha1_hash(k) for k in self.all_ids],
+        [e.entry_id_hash for e in feed_entries])

     work = EventToDeliver.all().get()
     event_key = work.key()
@@ -1798,7 +1808,9 @@
     # Verify that all feed entry records have been written along with the
     # EventToDeliver and FeedRecord.
     feed_entries = list(FeedEntryRecord.all())
- self.assertEquals(set(self.all_ids), set(e.entry_id for e in feed_entries))
+    self.assertEquals(
+        set(sha1_hash(k) for k in self.all_ids),
+        set(e.entry_id_hash for e in feed_entries))

     work = EventToDeliver.all().get()
     event_key = work.key()
@@ -1820,7 +1832,7 @@
   def testPutSplittingFails(self):
"""Tests when splitting put() calls still doesn't help and we give up."""
     # Make the content way too big.
-    content_template = ('content' * 100 + '%s')
+    content_template = ('content' * 150 + '%s')
     self.all_ids = [str(i) for i in xrange(1000)]
     self.entry_payloads = [
       (content_template % entry_id) for entry_id in self.all_ids
@@ -1895,8 +1907,9 @@
     feed_entries = FeedEntryRecord.get_entries_for_topic(
         self.topic, self.all_ids)
     expected_records = main.MAX_NEW_FEED_ENTRY_RECORDS
-    self.assertEquals(self.all_ids[:expected_records],
-                      [e.entry_id for e in feed_entries])
+    self.assertEquals(
+        [sha1_hash(k) for k in self.all_ids[:expected_records]],
+        [e.entry_id_hash for e in feed_entries])

     work = EventToDeliver.all().get()
     event_key = work.key()

Reply via email to