Author: stefan2
Date: Sat Apr 27 20:30:11 2013
New Revision: 1476664
URL: http://svn.apache.org/r1476664
Log:
Cherry-pick merge r458643-1476567 from branches/cache-server to /trunk.
These patches minimize the membuffer cache addressing overhead and
implement a more sophisticated caching scheme. Both are essential for
fsfs-format7 performance.
Not intended for 1.8 back-port.
Modified:
subversion/trunk/ (props changed)
subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
Propchange: subversion/trunk/
------------------------------------------------------------------------------
Merged /subversion/branches/cache-server:r1458644-1476567
Modified: subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/cache-membuffer.c?rev=1476664&r1=1476663&r2=1476664&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/cache-membuffer.c (original)
+++ subversion/trunk/subversion/libsvn_subr/cache-membuffer.c Sat Apr 27
20:30:11 2013
@@ -45,6 +45,8 @@
*
* 1. A linear data buffer containing cached items in a serialized
* representation. There may be arbitrary gaps between entries.
+ * This buffer is sub-devided into (currently two) cache levels.
+ *
* 2. A directory of cache entries. This is organized similar to CPU
* data caches: for every possible key, there is exactly one group
* of entries that may contain the header info for an item with
@@ -56,23 +58,30 @@
* between different processes and / or to persist them on disk. These
* out-of-process features have not been implemented, yet.
*
+ * Superficially, cache levels are being used as usual: insertion happens
+ * into L1 and evictions will promote items to L2. But their whole point
+ * is a different one. L1 uses a circular buffer, i.e. we have perfect
+ * caching for the last N bytes where N is the size of L1. L2 uses a more
+ * elaborate scheme based on priorities and hit counts as described below.
+ *
* The data buffer usage information is implicitly given by the directory
* entries. Every USED entry has a reference to the previous and the next
* used dictionary entry and this double-linked list is ordered by the
* offsets of their item data within the data buffer. So removing data,
* for instance, is done simply by unlinking it from the chain, implicitly
* marking the entry as well as the data buffer section previously
- * associated to it as unused.
+ * associated to it as unused. First and last element of that chain are
+ * being referenced from the respective cache level.
*
- * Insertion can occur at only one, sliding position. It is marked by its
- * offset in the data buffer plus the index of the first used entry at or
- * behind that position. If this gap is too small to accommodate the new
- * item, the insertion window is extended as described below. The new entry
- * will always be inserted at the bottom end of the window and since the
- * next used entry is known, properly sorted insertion is possible.
+ * Insertion can occur at only one, sliding position per cache level. It is
+ * marked by its offset in the data buffer and the index of the first used
+ * entry at or behind that position. If this gap is too small to accommodate
+ * the new item, the insertion window is extended as described below. The new
+ * entry will always be inserted at the bottom end of the window and since
+ * the next used entry is known, properly sorted insertion is possible.
*
* To make the cache perform robustly in a wide range of usage scenarios,
- * a randomized variant of LFU is used (see ensure_data_insertable for
+ * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
* details). Every item holds a read hit counter and there is a global read
* hit counter. The more hits an entry has in relation to the average, the
* more it is likely to be kept using a rand()-based condition. The test is
@@ -86,7 +95,7 @@
* they get not used for a while. Also, even a cache thrashing situation
* about 50% of the content survives every 50% of the cache being re-written
* with new entries. For details on the fine-tuning involved, see the
- * comments in ensure_data_insertable().
+ * comments in ensure_data_insertable_l2().
*
* To limit the entry size and management overhead, not the actual item keys
* but only their MD5 checksums will not be stored. This is reasonably safe
@@ -313,7 +322,7 @@ static svn_error_t* assert_equal_tags(co
/* A single dictionary entry. Since all entries will be allocated once
* during cache creation, those entries might be either used or unused.
* An entry is used if and only if it is contained in the doubly-linked
- * list of used entries.
+ * list of used entries per cache level.
*/
typedef struct entry_t
{
@@ -321,7 +330,8 @@ typedef struct entry_t
*/
entry_key_t key;
- /* The offset of the cached item's serialized data within the data buffer.
+ /* The offset of the cached item's serialized data within the caches
+ * DATA buffer.
*/
apr_uint64_t offset;
@@ -337,15 +347,15 @@ typedef struct entry_t
/* Reference to the next used entry in the order defined by offset.
* NO_INDEX indicates the end of the list; this entry must be referenced
- * by the caches membuffer_cache_t.last member. NO_INDEX also implies
- * that the data buffer is not used beyond offset+size.
+ * by the caches cache_level_t.last member. NO_INDEX also implies that
+ * the data buffer is not used beyond offset+size.
* Only valid for used entries.
*/
apr_uint32_t next;
/* Reference to the previous used entry in the order defined by offset.
* NO_INDEX indicates the end of the list; this entry must be referenced
- * by the caches membuffer_cache_t.first member.
+ * by the caches cache_level_t.first member.
* Only valid for used entries.
*/
apr_uint32_t previous;
@@ -368,28 +378,12 @@ typedef struct entry_group_t
entry_t entries[GROUP_SIZE];
} entry_group_t;
-/* The cache header structure.
+/* Per-cache level header structure. Instances of this are members of
+ * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
+ * All offset values are global / absolute to that whole buffer.
*/
-struct svn_membuffer_t
+typedef struct cache_level_t
{
- /* Number of cache segments. Must be a power of 2.
- Please note that this structure represents only one such segment
- and that all segments must / will report the same values here. */
- apr_uint32_t segment_count;
-
- /* The dictionary, GROUP_SIZE * group_count entries long. Never NULL.
- */
- entry_group_t *directory;
-
- /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
- * Allows for efficiently marking groups as "not initialized".
- */
- unsigned char *group_initialized;
-
- /* Size of dictionary in groups. Must be > 0.
- */
- apr_uint32_t group_count;
-
/* Reference to the first (defined by the order content in the data
* buffer) dictionary entry used by any data item.
* NO_INDEX for an empty cache.
@@ -410,18 +404,46 @@ struct svn_membuffer_t
apr_uint32_t next;
- /* Pointer to the data buffer, data_size bytes long. Never NULL.
+ /* First offset in the caches DATA buffer that belongs to this level.
*/
- unsigned char *data;
+ apr_uint64_t start_offset;
- /* Size of data buffer in bytes. Must be > 0.
+ /* Size of data buffer allocated to this level in bytes. Must be > 0.
*/
- apr_uint64_t data_size;
+ apr_uint64_t size;
/* Offset in the data buffer where the next insertion shall occur.
*/
apr_uint64_t current_data;
+} cache_level_t;
+
+/* The cache header structure.
+ */
+struct svn_membuffer_t
+{
+ /* Number of cache segments. Must be a power of 2.
+ Please note that this structure represents only one such segment
+ and that all segments must / will report the same values here. */
+ apr_uint32_t segment_count;
+
+ /* The dictionary, GROUP_SIZE * group_count entries long. Never NULL.
+ */
+ entry_group_t *directory;
+
+ /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
+ * Allows for efficiently marking groups as "not initialized".
+ */
+ unsigned char *group_initialized;
+
+ /* Size of dictionary in groups. Must be > 0.
+ */
+ apr_uint32_t group_count;
+
+ /* Pointer to the data buffer, data_size bytes long. Never NULL.
+ */
+ unsigned char *data;
+
/* Total number of data buffer bytes in use. This is for statistics only.
*/
apr_uint64_t data_used;
@@ -431,6 +453,24 @@ struct svn_membuffer_t
*/
apr_uint64_t max_entry_size;
+ /* The cache levels, organized as sub-buffers. Since entries in the
+ * DIRECTORY use offsets in DATA for addressing, a cache lookup does
+ * not need to know the cache level of a specific item. Cache levels
+ * are only used to implement a hybrid insertion / eviction strategy.
+ */
+
+ /* First cache level, i.e. most insertions happen here. Very large
+ * items might get inserted directly into L2. L1 is a strict FIFO
+ * ring buffer that does not care about item priorities. All evicted
+ * items get a chance to be promoted to L2.
+ */
+ cache_level_t l1;
+
+ /* Second cache level, i.e. data evicted from L1 will be added here
+ * if the item is "important" enough or the L2 insertion window is large
+ * enough.
+ */
+ cache_level_t l2;
/* Number of used dictionary entries, i.e. number of cached items.
* In conjunction with hit_count, this is used calculate the average
@@ -621,6 +661,96 @@ get_index(svn_membuffer_t *cache, entry_
+ (apr_uint32_t)(entry - cache->directory[group_index].entries);
}
+/* Return the cache level of ENTRY in CACHE.
+ */
+static cache_level_t *
+get_cache_level(svn_membuffer_t *cache, entry_t *entry)
+{
+ return entry->offset < cache->l1.size ? &cache->l1
+ : &cache->l2;
+}
+
+/* Insert ENTRY to the chain of items that belong to LEVEL in CACHE. IDX
+ * is ENTRY's item index and is only given for efficiency. The insertion
+ * takes place just before LEVEL->NEXT. *CACHE will not be modified.
+ */
+static void
+chain_entry(svn_membuffer_t *cache,
+ cache_level_t *level,
+ entry_t *entry,
+ apr_uint32_t idx)
+{
+ /* insert ENTRY before this item */
+ entry_t *next = level->next == NO_INDEX
+ ? NULL
+ : get_entry(cache, level->next);
+ assert(idx == get_index(cache, entry));
+
+ /* update entry chain
+ */
+ entry->next = level->next;
+ if (level->first == NO_INDEX)
+ {
+ /* insert as the first entry and only in the chain
+ */
+ entry->previous = NO_INDEX;
+ level->last = idx;
+ level->first = idx;
+ }
+ else if (next == NULL)
+ {
+ /* insert as the last entry in the chain.
+ * Note that it cannot also be at the beginning of the chain.
+ */
+ entry->previous = level->last;
+ get_entry(cache, level->last)->next = idx;
+ level->last = idx;
+ }
+ else
+ {
+ /* insert either at the start of a non-empty list or
+ * somewhere in the middle
+ */
+ entry->previous = next->previous;
+ next->previous = idx;
+
+ if (entry->previous != NO_INDEX)
+ get_entry(cache, entry->previous)->next = idx;
+ else
+ level->first = idx;
+ }
+}
+
+/* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
+ * is ENTRY's item index and is only given for efficiency. Please note
+ * that neither *CACHE nor *ENTRY will not be modified.
+ */
+static void
+unchain_entry(svn_membuffer_t *cache,
+ cache_level_t *level,
+ entry_t *entry,
+ apr_uint32_t idx)
+{
+ assert(idx == get_index(cache, entry));
+
+ /* update
+ */
+ if (level->next == idx)
+ level->next = entry->next;
+
+ /* unlink it from the chain of used entries
+ */
+ if (entry->previous == NO_INDEX)
+ level->first = entry->next;
+ else
+ get_entry(cache, entry->previous)->next = entry->next;
+
+ if (entry->next == NO_INDEX)
+ level->last = entry->previous;
+ else
+ get_entry(cache, entry->next)->previous = entry->previous;
+}
+
/* Remove the used ENTRY from the CACHE, i.e. make it "unused".
* In contrast to insertion, removal is possible for any entry.
*/
@@ -633,6 +763,7 @@ drop_entry(svn_membuffer_t *cache, entry
apr_uint32_t group_index = idx / GROUP_SIZE;
entry_group_t *group = &cache->directory[group_index];
apr_uint32_t last_in_group = group_index * GROUP_SIZE + group->used - 1;
+ cache_level_t *level = get_cache_level(cache, entry);
/* Only valid to be called for used entries.
*/
@@ -646,39 +777,31 @@ drop_entry(svn_membuffer_t *cache, entry
/* extend the insertion window, if the entry happens to border it
*/
- if (idx == cache->next)
- cache->next = entry->next;
+ if (idx == level->next)
+ level->next = entry->next;
else
- if (entry->next == cache->next)
+ if (entry->next == level->next)
{
/* insertion window starts right behind the entry to remove
*/
if (entry->previous == NO_INDEX)
{
/* remove the first entry -> insertion may start at pos 0, now */
- cache->current_data = 0;
+ level->current_data = level->start_offset;
}
else
{
/* insertion may start right behind the previous entry */
entry_t *previous = get_entry(cache, entry->previous);
- cache->current_data = ALIGN_VALUE( previous->offset
+ level->current_data = ALIGN_VALUE( previous->offset
+ previous->size);
}
}
/* unlink it from the chain of used entries
*/
- if (entry->previous == NO_INDEX)
- cache->first = entry->next;
- else
- get_entry(cache, entry->previous)->next = entry->next;
-
- if (entry->next == NO_INDEX)
- cache->last = entry->previous;
- else
- get_entry(cache, entry->next)->previous = entry->previous;
-
+ unchain_entry(cache, level, entry, idx);
+
/* Move last entry into hole (if the removed one is not the last used).
* We need to do this since all used entries are at the beginning of
* the group's entries array.
@@ -689,18 +812,22 @@ drop_entry(svn_membuffer_t *cache, entry
*/
*entry = group->entries[group->used-1];
+ /* this ENTRY may belong to a different cache level than the entry
+ * we have just removed */
+ level = get_cache_level(cache, entry);
+
/* update foreign links to new index
*/
- if (last_in_group == cache->next)
- cache->next = idx;
+ if (last_in_group == level->next)
+ level->next = idx;
if (entry->previous == NO_INDEX)
- cache->first = idx;
+ level->first = idx;
else
get_entry(cache, entry->previous)->next = idx;
if (entry->next == NO_INDEX)
- cache->last = idx;
+ level->last = idx;
else
get_entry(cache, entry->next)->previous = idx;
}
@@ -722,16 +849,14 @@ insert_entry(svn_membuffer_t *cache, ent
apr_uint32_t idx = get_index(cache, entry);
apr_uint32_t group_index = idx / GROUP_SIZE;
entry_group_t *group = &cache->directory[group_index];
- entry_t *next = cache->next == NO_INDEX
- ? NULL
- : get_entry(cache, cache->next);
+ cache_level_t *level = get_cache_level(cache, entry);
/* The entry must start at the beginning of the insertion window.
* It must also be the first unused entry in the group.
*/
- assert(entry->offset == cache->current_data);
+ assert(entry->offset == level->current_data);
assert(idx == group_index * GROUP_SIZE + group->used);
- cache->current_data = ALIGN_VALUE(entry->offset + entry->size);
+ level->current_data = ALIGN_VALUE(entry->offset + entry->size);
/* update usage counters
*/
@@ -742,42 +867,12 @@ insert_entry(svn_membuffer_t *cache, ent
/* update entry chain
*/
- entry->next = cache->next;
- if (cache->first == NO_INDEX)
- {
- /* insert as the first entry and only in the chain
- */
- entry->previous = NO_INDEX;
- cache->last = idx;
- cache->first = idx;
- }
- else if (next == NULL)
- {
- /* insert as the last entry in the chain.
- * Note that it cannot also be at the beginning of the chain.
- */
- entry->previous = cache->last;
- get_entry(cache, cache->last)->next = idx;
- cache->last = idx;
- }
- else
- {
- /* insert either at the start of a non-empty list or
- * somewhere in the middle
- */
- entry->previous = next->previous;
- next->previous = idx;
-
- if (entry->previous != NO_INDEX)
- get_entry(cache, entry->previous)->next = idx;
- else
- cache->first = idx;
- }
+ chain_entry(cache, level, entry, idx);
/* The current insertion position must never point outside our
* data buffer.
*/
- assert(cache->current_data <= cache->data_size);
+ assert(level->current_data <= level->start_offset + level->size);
}
/* Map a KEY of 16 bytes to the CACHE and group that shall contain the
@@ -950,6 +1045,7 @@ static void
move_entry(svn_membuffer_t *cache, entry_t *entry)
{
apr_size_t size = ALIGN_VALUE(entry->size);
+ cache_level_t *level = get_cache_level(cache, entry);
/* This entry survived this cleansing run. Reset half of its
* hit count so that its removal gets more likely in the next
@@ -963,41 +1059,75 @@ move_entry(svn_membuffer_t *cache, entry
* Size-aligned moves tend to be faster than non-aligned ones
* because no "odd" bytes at the end need to special treatment.
*/
- if (entry->offset != cache->current_data)
+ if (entry->offset != level->current_data)
{
- memmove(cache->data + cache->current_data,
+ memmove(cache->data + level->current_data,
cache->data + entry->offset,
size);
- entry->offset = cache->current_data;
+ entry->offset = level->current_data;
}
/* The insertion position is now directly behind this entry.
*/
- cache->current_data = entry->offset + size;
- cache->next = entry->next;
+ level->current_data = entry->offset + size;
+ level->next = entry->next;
/* The current insertion position must never point outside our
* data buffer.
*/
- assert(cache->current_data <= cache->data_size);
+ assert(level->current_data <= level->start_offset + level->size);
}
-/* If necessary, enlarge the insertion window until it is at least
- * SIZE bytes long. SIZE must not exceed the data buffer size.
- * Return TRUE if enough room could be found or made. A FALSE result
+/* Move ENTRY in CACHE from L1 to L2.
+ */
+static void
+promote_entry(svn_membuffer_t *cache, entry_t *entry)
+{
+ apr_uint32_t idx = get_index(cache, entry);
+ apr_size_t size = ALIGN_VALUE(entry->size);
+ assert(get_cache_level(cache, entry) == &cache->l1);
+
+ /* copy item from the current location in L1 to the start of L2's
+ * insertion window */
+ memmove(cache->data + cache->l2.current_data,
+ cache->data + entry->offset,
+ size);
+ entry->offset = cache->l2.current_data;
+
+ /* The insertion position is now directly behind this entry.
+ */
+ cache->l2.current_data += size;
+
+ /* remove ENTRY from chain of L1 entries and put it into L2
+ */
+ unchain_entry(cache, &cache->l1, entry, idx);
+ chain_entry(cache, &cache->l2, entry, idx);
+}
+
+/* This function implements the cache insertion / eviction strategy for L2.
+ *
+ * If necessary, enlarge the insertion window of CACHE->L2 until it is at
+ * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
+ * data buffer size allocated to CACHE->L2. IDX is the item index of
+ * TO_FIT_IN and is given for performance reasons.
+ *
+ * Return TRUE if enough room could be found or made. A FALSE result
* indicates that the respective item shall not be added.
*/
static svn_boolean_t
-ensure_data_insertable(svn_membuffer_t *cache, apr_size_t size)
+ensure_data_insertable_l2(svn_membuffer_t *cache,
+ entry_t *to_fit_in,
+ apr_uint32_t idx)
{
entry_t *entry;
apr_uint64_t average_hit_value;
apr_uint64_t threshold;
- /* accumulated size of the entries that have been removed to make
- * room for the new one.
- */
- apr_size_t drop_size = 0;
+ /* accumulated "worth" of items dropped so far */
+ apr_size_t drop_hits = 0;
+
+ /* verify parameters */
+ assert(idx == get_index(cache, to_fit_in));
/* This loop will eventually terminate because every cache entry
* would get dropped eventually:
@@ -1015,41 +1145,37 @@ ensure_data_insertable(svn_membuffer_t *
{
/* first offset behind the insertion window
*/
- apr_uint64_t end = cache->next == NO_INDEX
- ? cache->data_size
- : get_entry(cache, cache->next)->offset;
+ apr_uint64_t end = cache->l2.next == NO_INDEX
+ ? cache->l2.start_offset + cache->l2.size
+ : get_entry(cache, cache->l2.next)->offset;
/* leave function as soon as the insertion window is large enough
*/
- if (end >= size + cache->current_data)
+ if (end >= to_fit_in->size + cache->l2.current_data)
return TRUE;
- /* Don't be too eager to cache data. Smaller items will fit into
- * the cache after dropping a single item. Of the larger ones, we
- * will only accept about 50%. They are also likely to get evicted
- * soon due to their notoriously low hit counts.
- *
- * As long as enough similarly or even larger sized entries already
- * exist in the cache, much less insert requests will be rejected.
+ /* if the net worth (in hits) of items removed is already larger
+ * than what we want to insert, reject TO_FIT_IN because it still
+ * does not fit in.
*/
- if (2 * drop_size > size)
+ if (drop_hits > to_fit_in->hit_count)
return FALSE;
/* try to enlarge the insertion window
*/
- if (cache->next == NO_INDEX)
+ if (cache->l2.next == NO_INDEX)
{
/* We reached the end of the data buffer; restart at the beginning.
* Due to the randomized nature of our LFU implementation, very
* large data items may require multiple passes. Therefore, SIZE
* should be restricted to significantly less than data_size.
*/
- cache->current_data = 0;
- cache->next = cache->first;
+ cache->l2.current_data = cache->l2.start_offset;
+ cache->l2.next = cache->l2.first;
}
else
{
- entry = get_entry(cache, cache->next);
+ entry = get_entry(cache, cache->l2.next);
/* Keep entries that are very small. Those are likely to be data
* headers or similar management structures. So, they are probably
@@ -1061,14 +1187,24 @@ ensure_data_insertable(svn_membuffer_t *
{
move_entry(cache, entry);
}
+ else if (cache->l2.next / GROUP_SIZE == idx / GROUP_SIZE)
+ {
+ /* Special case: we cannot drop entries that are in the same
+ * group as TO_FIT_IN because that might the latter to become
+ * invalidated it it happens to be the highest used entry in
+ * the group. So, we must keep ENTRY unconditionally.
+ * (this is a very rare condition)
+ */
+ move_entry(cache, entry);
+ }
else
{
svn_boolean_t keep;
if (cache->hit_count > cache->used_entries)
{
- /* Roll the dice and determine a threshold somewhere from 0
up
- * to 2 times the average hit count.
+ /* Roll the dice and determine a threshold somewhere from
+ * 0 up to 2 times the average hit count.
*/
average_hit_value = cache->hit_count / cache->used_entries;
threshold = (average_hit_value+1) * (rand() % 4096) / 2048;
@@ -1077,9 +1213,9 @@ ensure_data_insertable(svn_membuffer_t *
}
else
{
- /* general hit count is low. Keep everything that got hit
- * at all and assign some 50% survival chance to everything
- * else.
+ /* general hit count is low. Keep everything that got
+ * hit at all and assign some 50% survival chance to
+ * everything else.
*/
keep = (entry->hit_count > 0) || (rand() & 1);
}
@@ -1087,15 +1223,16 @@ ensure_data_insertable(svn_membuffer_t *
/* keepers or destroyers? */
if (keep)
{
+ /* Keep ENTRY and move the insertion window.
+ */
move_entry(cache, entry);
}
else
{
- /* Drop the entry from the end of the insertion window, if it
- * has been hit less than the threshold. Otherwise, keep it
and
- * move the insertion window one entry further.
+ /* Drop the entry from the end of the insertion window,
+ * because it had been hit less than the threshold.
*/
- drop_size += entry->size;
+ drop_hits += entry->hit_count;
drop_entry(cache, entry);
}
}
@@ -1106,6 +1243,70 @@ ensure_data_insertable(svn_membuffer_t *
* right answer. */
}
+/* This function implements the cache insertion / eviction strategy for L1.
+ *
+ * If necessary, enlarge the insertion window of CACHE->L1 by promoting
+ * entries to L2 until it is at least SIZE bytes long.
+ *
+ * Return TRUE if enough room could be found or made. A FALSE result
+ * indicates that the respective item shall not be added because it is
+ * too large.
+ */
+static svn_boolean_t
+ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
+{
+ entry_t *entry;
+
+ /* Guarantees that the while loop will terminate. */
+ if (size > cache->l1.size)
+ return FALSE;
+
+ /* This loop will eventually terminate because every cache entry
+ * would get dropped eventually.
+ */
+ while (1)
+ {
+ /* first offset behind the insertion window
+ */
+ apr_uint64_t end = cache->l1.next == NO_INDEX
+ ? cache->l1.start_offset + cache->l1.size
+ : get_entry(cache, cache->l1.next)->offset;
+
+ /* leave function as soon as the insertion window is large enough
+ */
+ if (end >= size + cache->l1.current_data)
+ return TRUE;
+
+ /* Enlarge the insertion window
+ */
+ if (cache->l1.next == NO_INDEX)
+ {
+ /* We reached the end of the data buffer; restart at the beginning.
+ * Due to the randomized nature of our LFU implementation, very
+ * large data items may require multiple passes. Therefore, SIZE
+ * should be restricted to significantly less than data_size.
+ */
+ cache->l1.current_data = cache->l1.start_offset;
+ cache->l1.next = cache->l1.first;
+ }
+ else
+ {
+ /* Remove the entry from the end of insertion window and promote
+ * it to L2, if it is important enough.
+ */
+ entry = get_entry(cache, cache->l1.next);
+
+ if (ensure_data_insertable_l2(cache, entry, cache->l1.next))
+ promote_entry(cache, entry);
+ else
+ drop_entry(cache, entry);
+ }
+ }
+
+ /* This will never be reached. But if it was, "can't insert" was the
+ * right answer. */
+}
+
/* Mimic apr_pcalloc in APR_POOL_DEBUG mode, i.e. handle failed allocations
* (e.g. OOM) properly: Allocate at least SIZE bytes from POOL and zero
* the content of the allocated memory if ZERO has been set. Return NULL
@@ -1225,13 +1426,13 @@ svn_cache__membuffer_cache_create(svn_me
*/
data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
- /* For cache sizes > 4TB, individual cache segments will be larger
- * than 16GB allowing for >4GB entries. But caching chunks larger
- * than 4GB is simply not supported.
+ /* For cache sizes > 16TB, individual cache segments will be larger
+ * than 32GB allowing for >4GB entries. But caching chunks larger
+ * than 4GB are simply not supported.
*/
- max_entry_size = data_size / 4 > MAX_ITEM_SIZE
+ max_entry_size = data_size / 8 > MAX_ITEM_SIZE
? MAX_ITEM_SIZE
- : data_size / 4;
+ : data_size / 8;
/* to keep the entries small, we use 32 bit indexes only
* -> we need to ensure that no more then 4G entries exist.
@@ -1259,13 +1460,25 @@ svn_cache__membuffer_cache_create(svn_me
hence "unused" */
c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
- c[seg].first = NO_INDEX;
- c[seg].last = NO_INDEX;
- c[seg].next = NO_INDEX;
+ /* Allocate 1/4th of the data buffer to L1
+ */
+ c[seg].l1.first = NO_INDEX;
+ c[seg].l1.last = NO_INDEX;
+ c[seg].l1.next = NO_INDEX;
+ c[seg].l1.start_offset = 0;
+ c[seg].l1.size = ALIGN_VALUE(data_size / 4);
+ c[seg].l1.current_data = 0;
+
+ /* The remaining 3/4th will be used as L2
+ */
+ c[seg].l2.first = NO_INDEX;
+ c[seg].l2.last = NO_INDEX;
+ c[seg].l2.next = NO_INDEX;
+ c[seg].l2.start_offset = c[seg].l1.size;
+ c[seg].l2.size = data_size - c[seg].l1.size;
+ c[seg].l2.current_data = c[seg].l2.start_offset;
- c[seg].data_size = data_size;
c[seg].data = secure_aligned_alloc(pool, (apr_size_t)data_size, FALSE);
- c[seg].current_data = 0;
c[seg].data_used = 0;
c[seg].max_entry_size = max_entry_size;
@@ -1397,7 +1610,7 @@ membuffer_cache_set_internal(svn_membuff
*/
if ( buffer != NULL
&& cache->max_entry_size >= size
- && ensure_data_insertable(cache, size))
+ && ensure_data_insertable_l1(cache, size))
{
/* Remove old data for this key, if that exists.
* Get an unused entry for the key and and initialize it with
@@ -1405,7 +1618,7 @@ membuffer_cache_set_internal(svn_membuff
*/
entry = find_entry(cache, group_index, to_find, TRUE);
entry->size = size;
- entry->offset = cache->current_data;
+ entry->offset = cache->l1.current_data;
#ifdef SVN_DEBUG_CACHE_MEMBUFFER
@@ -1758,13 +1971,13 @@ membuffer_cache_set_partial_internal(svn
*/
drop_entry(cache, entry);
if ( (cache->max_entry_size >= size)
- && ensure_data_insertable(cache, size))
+ && ensure_data_insertable_l1(cache, size))
{
/* Write the new entry.
*/
entry = find_entry(cache, group_index, to_find, TRUE);
entry->size = size;
- entry->offset = cache->current_data;
+ entry->offset = cache->l1.current_data;
if (size)
memcpy(cache->data + entry->offset, data, size);
@@ -2112,9 +2325,9 @@ static svn_error_t *
svn_membuffer_get_segment_info(svn_membuffer_t *segment,
svn_cache__info_t *info)
{
- info->data_size += segment->data_size;
+ info->data_size += segment->l1.size + segment->l2.size;
info->used_size += segment->data_used;
- info->total_size += segment->data_size +
+ info->total_size += segment->l1.size + segment->l2.size +
segment->group_count * GROUP_SIZE * sizeof(entry_t);
info->used_entries += segment->used_entries;