# I'd like to use memcache to store db.Model instances but I'd like to
# guarantee that memcache entries do not stay stale for very long. At
# the same time, I'd like to use long memcache timeouts, significantly
# longer than the staleness constraint.
# For the purposes of discussion, I've defined five models, T1-T5,
# with different implementations of upd(), the method that updates the
# datastore for a given instance, and latest, which is supposed to get
# the latest version of such an instance from either memcache or the
# datastore.
# I defined these models because they're useful in describing the
# problems that I've run into. Each one comes with some questions
# about how transactions work.
# T1 and T3 do not satisfy my "not too stale" requirement due to race
# conditions which I describe in-line. T2, T4, and T5 are my attempts
# to eliminate those races.
# Note - caching all db.get() results violates my "not very stale"
# requirement. The discussion for T3 below shows why/how.
# I'm reasonably confident that T2 satisfies my "not too stale"
# requirement but it isn't as effective at keeping entries in memcache
# as T4 or T5. However, I don't know if T4 satisfies that
# requirement. If T4 doesn't satisfy that requirement, I don't know
# if it's possible to have something that both satisfies that
# requirement and is more effective than T2 without doing something
# like T5.
# As I discuss below, T5 may not work either and isn't applicable in
# many circumstances.
# FWIW, the specific data types in these examples are just to help me
# describe the issues. Hacks which use characteristics of said data
# types to meet the requirement are cool and everything, but the data
# types that I care about are different, so ....
from google.appengine.ext import db
from google.appengine.api import memcache
# Common code for T1-T4.
class TBase(db.Model):
num_updates = db.IntegerProperty(default=0)
@classmethod
def run_txn(cls, fn):
# This succeeds or a deadline error happens. Let's ignore the
# latter.
return db.run_in_transaction_custom_retries(1 << 20, fn)
@classmethod
def memcache_key(cls, key):
return str(key)
@classmethod
def from_cache(cls, key):
return memcache.get(cls.memcache_key(key))
_memcache_key = property(lambda self: self.memcache_key(self.key
()))
def from_ds(self):
return self.get(self.key())
def cache(self):
# I assume that any previous entry is deleted if memcache.set
# fails. If that's not true, uncomment the next line.
# self.flush()
memcache.set(self._memcache_key, self)
def flush(self):
memcache.delete(self._memcache_key)
# T1 doesn't work because there's a race in upd() after the
transaction.
class T1(TBase):
# Yes, self is stale after upd().
def upd(self):
def txn():
# and self may be stale before upd, so ....
s = self.from_ds()
s.num_updates += 1
s.put()
return s
# Calls to cache() doesn't necessarily complete in the same
# order as calls to the corresponding put().
self.run_txn(txn).cache()
# Want instance with latest num_updates for key.
@classmethod
def latest(cls, key):
obj = cls.from_cache(key)
if not obj:
obj = cls.get(key)
return obj
# T2 works if consistency related exceptions only occur during
# datastore operations and continue to occur until a transaction
# succeeds. In other words, T2 works if any transaction that
# completes its datastore put() is guaranteed to complete the cache()
# before any other instance completes its datatore put().
# However, T2 only writes newly updated entries into memcache. Once
# entries expire, they're not cached again.
class T2(TBase):
# self is still stale after upd().
def upd(self):
def txn():
s = self.from_ds()
s.num_updates += 1
s.put()
# Are these calls to cache() guaranteed to occur in the
# same order as successful calls to put()?
s.cache()
self.run_txn(txn)
@classmethod
def latest(cls, key):
obj = cls.from_cache(key)
if not obj:
obj = cls.get(key)
return obj
# Suppose that T2 works, but we want to cache only those entries that
# are being actively read. (Refreshing the cache from a datastore
# read also helps with entries that fall out of memcache for some
# reason.) One way to satisfy that "want" is to memcache instances
# that were read from the datastore outside of upd().
# T3 is a naive attempt to implement that "want". It doesn't work
# because there's a race between upd() and latest().
# We could use timeouts for the memcache write in latest() to limit
# the lifetime of potentially stale entries. (This assumes that
# memcache timeouts actually work.) However, the tighter the limit on
# staleness, the less benefit we get from memcache.
class T3(TBase):
# self is still stale after upd().
def upd(self):
def txn():
s = self.from_ds()
s.num_updates += 1
s.put()
# Using s.cache() instead of s.flush() does not solve the
# problem with latest().
s.flush()
self.run_txn(txn)
@classmethod
def latest(cls, key):
obj = cls.from_cache(key)
if not obj:
obj = cls.get(key)
if obj:
# There may have been a upd() between the previous
# get() and this cache(), so obj may be stale.
obj.cache()
return obj
# If T2 works, T4 will also work if consistency exceptions happen
# during datastore get() if another transaction has started a put() or
# if put() gets an exception if another transaction has done a get().
class T4(TBase):
# self is still stale after upd().
def upd(self):
def txn():
# Will either the get or put cause a retry if the
# transaction in latest is active with the same key?
s = self.from_ds()
s.num_updates += 1
s.put()
# If latest() doesn't work with s.flush(), it doesn't work
# with s.cache() either.
s.flush()
self.run_txn(txn)
@classmethod
def latest(cls, key):
obj = cls.from_cache(key)
if not obj:
def txn():
# Does the following get() succeed only if there
# aren't any active upd() transactions wrt key?
obj = cls.get(key)
if obj:
obj.cache()
return obj
obj = cls.run_txn(txn)
return obj
# If T4 doesn't work because transaction retries are not triggered by
# datastore reads but T2 does work because retries are triggered by
# datastore writes for changed instances, it's worth asking whether
# transaction retries are triggered for datastore writes of unchanged
# instances. If they are, T5 should work.
# Note that T5 uses a datastore write that only succeeds when it
# writes exactly what it read. That "unnecessary" write costs
# something. More important, it isn't always possible. Consider
# db.Model instances with auto_now properties - it will change them.
class T5(TBase):
# self is still stale after upd().
def upd(self):
def txn():
# Will either the get or put cause a retry if the
# transaction in latest is active with the same key?
s = self.from_ds()
s.num_updates += 1
s.put()
# If latest() doesn't work with s.flush(), it doesn't work
# with s.cache() either.
s.flush()
self.run_txn(txn)
@classmethod
def latest(cls, key):
obj = cls.from_cache(key)
if not obj:
def txn():
obj = cls.get(key)
if obj:
# If there's an another active transaction for the
# same key, does this put, which doesn't change
# anything, cause an appropriate retry?
obj.put()
obj.cache()
return obj
obj = cls.run_txn(txn)
return obj
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"Google App Engine" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/google-appengine?hl=en
-~----------~----~----~----~------~----~------~--~---