Revision: 325
Author: bslatkin
Date: Wed Feb 3 14:44:17 2010
Log: hub: removed indexed property from FeedEntryRecord model-- save space
http://code.google.com/p/pubsubhubbub/source/detail?r=325
Modified:
/trunk/hub/main.py
/trunk/hub/main_test.py
=======================================
--- /trunk/hub/main.py Wed Feb 3 10:35:39 2010
+++ /trunk/hub/main.py Wed Feb 3 14:44:17 2010
@@ -926,16 +926,14 @@
return headers
-class FeedEntryRecord(db.Model):
+class FeedEntryRecord(db.Expando):
"""Represents a feed entry that has been seen.
The key name of this entity is a get_hash_key_name() hash of the
combination
of the topic URL and the entry_id.
"""
-
- entry_id = db.TextProperty(required=True) # To allow 500+ length entry
IDs.
- entry_id_hash = db.StringProperty(required=True)
- entry_content_hash = db.StringProperty()
+ entry_id_hash = db.StringProperty(required=True, indexed=False)
+ entry_content_hash = db.StringProperty(indexed=False)
update_time = db.DateTimeProperty(auto_now=True)
@classmethod
@@ -992,7 +990,6 @@
key = cls.create_key(topic, entry_id)
return cls(key_name=key.name(),
parent=key.parent(),
- entry_id=entry_id,
entry_id_hash=sha1_hash(entry_id),
entry_content_hash=content_hash)
@@ -1904,7 +1901,7 @@
existing_entries.extend(FeedEntryRecord.get_entries_for_topic(
topic, key_set))
- existing_dict = dict((e.entry_id, e.entry_content_hash)
+ existing_dict = dict((e.entry_id_hash, e.entry_content_hash)
for e in existing_entries if e)
logging.debug('Retrieved %d feed entries, %d of which have been seen
before',
len(entries_map), len(existing_dict))
@@ -1913,9 +1910,10 @@
entry_payloads = []
for entry_id, new_content in entries_map.iteritems():
new_content_hash = sha1_hash(new_content)
+ new_entry_id_hash = sha1_hash(entry_id)
# Mark the entry as new if the sha1 hash is different.
try:
- old_content_hash = existing_dict[entry_id]
+ old_content_hash = existing_dict[new_entry_id_hash]
if old_content_hash == new_content_hash:
continue
except KeyError:
=======================================
--- /trunk/hub/main_test.py Wed Feb 3 10:26:36 2010
+++ /trunk/hub/main_test.py Wed Feb 3 14:44:17 2010
@@ -1376,13 +1376,14 @@
@staticmethod
def get_entry(entry_id, entry_list):
"""Finds the entry with the given ID in the list of entries."""
- return [e for e in entry_list if e.entry_id == entry_id][0]
+ return [e for e in entry_list if e.entry_id_hash ==
sha1_hash(entry_id)][0]
def testAllNewContent(self):
"""Tests when al pulled feed content is new."""
entry_list, entry_payloads = self.run_test()
- entry_id_set = set(f.entry_id for f in entry_list)
- self.assertEquals(set(self.entries_map.keys()), entry_id_set)
+ entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+ self.assertEquals(set(sha1_hash(k) for k in self.entries_map.keys()),
+ entry_id_hash_set)
self.assertEquals(self.entries_map.values(), entry_payloads)
def testSomeExistingEntries(self):
@@ -1393,8 +1394,8 @@
self.topic, 'id2', sha1_hash('content2')).put()
entry_list, entry_payloads = self.run_test()
- entry_id_set = set(f.entry_id for f in entry_list)
- self.assertEquals(set(['id3']), entry_id_set)
+ entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+ self.assertEquals(set(sha1_hash(k) for k in ['id3']),
entry_id_hash_set)
self.assertEquals(['content3'], entry_payloads)
def testPulledEntryNewer(self):
@@ -1406,8 +1407,9 @@
self.entries_map['id1'] = 'newcontent1'
entry_list, entry_payloads = self.run_test()
- entry_id_set = set(f.entry_id for f in entry_list)
- self.assertEquals(set(['id1', 'id3']), entry_id_set)
+ entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+ self.assertEquals(set(sha1_hash(k) for k in ['id1', 'id3']),
+ entry_id_hash_set)
# Verify the old entry would be overwritten.
entry1 = self.get_entry('id1', entry_list)
@@ -1418,8 +1420,9 @@
"""Tests when the content contains unicode characters."""
self.entries_map['id2'] = u'\u2019 asdf'
entry_list, entry_payloads = self.run_test()
- entry_id_set = set(f.entry_id for f in entry_list)
- self.assertEquals(set(self.entries_map.keys()), entry_id_set)
+ entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+ self.assertEquals(set(sha1_hash(k) for k in self.entries_map.keys()),
+ entry_id_hash_set)
def testMultipleParallelBatches(self):
"""Tests that retrieving FeedEntryRecords is done in multiple
batches."""
@@ -1435,8 +1438,9 @@
main.MAX_FEED_ENTRY_RECORD_LOOKUPS = 1
try:
entry_list, entry_payloads = self.run_test()
- entry_id_set = set(f.entry_id for f in entry_list)
- self.assertEquals(set(self.entries_map.keys()), entry_id_set)
+ entry_id_hash_set = set(f.entry_id_hash for f in entry_list)
+ self.assertEquals(set(sha1_hash(k) for k in self.entries_map.keys()),
+ entry_id_hash_set)
self.assertEquals(self.entries_map.values(), entry_payloads)
self.assertEquals(3, calls[0])
finally:
@@ -1510,7 +1514,9 @@
# EventToDeliver and FeedRecord.
feed_entries = FeedEntryRecord.get_entries_for_topic(
self.topic, self.all_ids)
- self.assertEquals(self.all_ids, [e.entry_id for e in feed_entries])
+ self.assertEquals(
+ [sha1_hash(k) for k in self.all_ids],
+ [e.entry_id_hash for e in feed_entries])
work = EventToDeliver.all().get()
event_key = work.key()
@@ -1543,7 +1549,9 @@
feed_entries = FeedEntryRecord.get_entries_for_topic(
self.topic, self.all_ids)
- self.assertEquals(self.all_ids, [e.entry_id for e in feed_entries])
+ self.assertEquals(
+ [sha1_hash(k) for k in self.all_ids],
+ [e.entry_id_hash for e in feed_entries])
work = EventToDeliver.all().get()
event_key = work.key()
@@ -1576,7 +1584,9 @@
feed_entries = FeedEntryRecord.get_entries_for_topic(
self.topic, self.all_ids)
- self.assertEquals(self.all_ids, [e.entry_id for e in feed_entries])
+ self.assertEquals(
+ [sha1_hash(k) for k in self.all_ids],
+ [e.entry_id_hash for e in feed_entries])
work = EventToDeliver.all().get()
event_key = work.key()
@@ -1798,7 +1808,9 @@
# Verify that all feed entry records have been written along with the
# EventToDeliver and FeedRecord.
feed_entries = list(FeedEntryRecord.all())
- self.assertEquals(set(self.all_ids), set(e.entry_id for e in
feed_entries))
+ self.assertEquals(
+ set(sha1_hash(k) for k in self.all_ids),
+ set(e.entry_id_hash for e in feed_entries))
work = EventToDeliver.all().get()
event_key = work.key()
@@ -1820,7 +1832,7 @@
def testPutSplittingFails(self):
"""Tests when splitting put() calls still doesn't help and we give
up."""
# Make the content way too big.
- content_template = ('content' * 100 + '%s')
+ content_template = ('content' * 150 + '%s')
self.all_ids = [str(i) for i in xrange(1000)]
self.entry_payloads = [
(content_template % entry_id) for entry_id in self.all_ids
@@ -1895,8 +1907,9 @@
feed_entries = FeedEntryRecord.get_entries_for_topic(
self.topic, self.all_ids)
expected_records = main.MAX_NEW_FEED_ENTRY_RECORDS
- self.assertEquals(self.all_ids[:expected_records],
- [e.entry_id for e in feed_entries])
+ self.assertEquals(
+ [sha1_hash(k) for k in self.all_ids[:expected_records]],
+ [e.entry_id_hash for e in feed_entries])
work = EventToDeliver.all().get()
event_key = work.key()