Re: [PR] Add S3-FIFO RAM cache eviction algorithm (ram_cache.algorithm = 2) [trafficserver]

via GitHub Wed, 10 Jun 2026 18:57:53 -0700


Copilot commented on code in PR #13255:
URL: https://github.com/apache/trafficserver/pull/13255#discussion_r3392790640



##########
src/iocore/cache/RamCacheS3FIFO.cc:
##########
@@ -0,0 +1,418 @@
+/** @file
+
+  A brief file description
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+// S3-FIFO RAM cache replacement policy.
+//
+// The design follows Yang, Zhang, Qiu, Yue & Rashmi, "FIFO queues are all you 
need for cache
+// eviction" (SOSP 2023) and https://s3fifo.com, mirroring the libCacheSim 
reference (S3FIFO.c).
+//
+// EXPERIMENTAL: three FIFO queues -- a small admission queue S (~10% of 
capacity), a main queue M
+// (~90%, a 2-bit CLOCK), and a ghost queue G that remembers the keys of 
recently evicted objects
+// (no data). New objects enter S; on eviction from S an object that was 
reused (frequency >= 2) is
+// promoted to M, otherwise it is demoted to G. A subsequent miss whose key is 
in G enters M
+// directly. The small queue + ghost give S3-FIFO admission control that 
filters one-hit-wonders --
+// exactly the CDN "quick demotion" property -- at FIFO cost (no per-hit 
reordering). The ghost
+// holds keys, so it costs per-entry metadata at high object cardinality; that 
metadata is bounded
+// and counted against ram_cache.size so total memory stays within the 
configured budget. Selectable
+// as proxy.config.cache.ram_cache.algorithm = 2; see the admin guide 
(records.yaml.en.rst).
+
+#include "P_RamCache.h"
+#include "P_CacheInternal.h"
+#include "StripeSM.h"
+#include "iocore/eventsystem/IOBuffer.h"
+#include "tscore/CryptoHash.h"
+#include "tscore/List.h"
+
+#define ENTRY_OVERHEAD         128 // per-entry metadata counted against 
ram_cache.size
+#define MAIN_PERCENT           90  // main queue target size, percent of 
capacity
+#define GHOST_SIZE_PERCENT     90  // ghost remembers keys for ~this percent 
of capacity by object size
+#define GHOST_MEM_PERCENT      25  // but never more ghost metadata than this 
percent of size (OOM-safe)
+#define MOVE_TO_MAIN_THRESHOLD 2   // an object in S is promoted to M once 
reused this many times
+#define FREQ_MAX               3   // 2-bit saturating frequency counter
+
+enum { SEG_SMALL = 0, SEG_MAIN = 1, SEG_GHOST = 2 };
+
+struct RamCacheS3FIFOEntry {
+  CryptoHash key;
+  uint64_t   auxkey;
+  uint32_t   size; // object bytes; resident entries account size + 
ENTRY_OVERHEAD against the budget
+  uint8_t    seg;  // SEG_SMALL / SEG_MAIN / SEG_GHOST
+  uint8_t    freq; // 0..FREQ_MAX
+  LINK(RamCacheS3FIFOEntry, lru_link);
+  LINK(RamCacheS3FIFOEntry, hash_link);
+  Ptr<IOBufferData> data; // null for ghost entries
+};
+
+struct RamCacheS3FIFO : public RamCache {
+  int     get(CryptoHash *key, Ptr<IOBufferData> *ret_data, uint64_t auxkey = 
0) override;
+  int     put(CryptoHash *key, IOBufferData *data, uint32_t len, bool copy = 
false, uint64_t auxkey = 0) override;
+  int     fixup(const CryptoHash *key, uint64_t old_auxkey, uint64_t 
new_auxkey) override;
+  int64_t size() const override;
+  void    init(int64_t max_bytes, StripeSM *stripe) override;
+
+private:
+  int64_t _max_bytes        = 0;
+  int64_t _ghost_size_limit = 0; // ghost object-size bound (keeps it 
proportional for big objects)
+  int64_t _ghost_max        = 0; // ghost entry-count bound (caps metadata to 
GHOST_MEM_PERCENT)
+  int64_t _s_bytes          = 0; // resident bytes in the small queue (incl. 
ENTRY_OVERHEAD per entry)
+  int64_t _m_bytes          = 0; // resident bytes in the main queue
+  int64_t _g_bytes          = 0; // ghost object-size sum (vs 
_ghost_size_limit)
+  int64_t _g_count          = 0; // ghost entries; each costs ~ENTRY_OVERHEAD, 
counted in the budget
+  int64_t _objects          = 0; // resident objects (S + M)
+  int64_t _nentries         = 0; // hash entries (resident + ghost), for 
sizing the hash table
+
+  Que(RamCacheS3FIFOEntry, lru_link) _seg[3]; // head = oldest, tail = newest, 
per segment
+  DList(RamCacheS3FIFOEntry, hash_link) *_bucket = nullptr;
+  int       _nbuckets                            = 0;
+  int       _ibuckets                            = 0;
+  StripeSM *_stripe                              = nullptr;
+
+  void                 _resize_hashtable();
+  RamCacheS3FIFOEntry *_lookup(const CryptoHash *key, uint64_t auxkey);
+  void                 _unlink_hash(RamCacheS3FIFOEntry *e);
+  void                 _remove(RamCacheS3FIFOEntry *e);
+  void                 _to_main(RamCacheS3FIFOEntry *e);
+  void                 _to_ghost(RamCacheS3FIFOEntry *e);
+  bool                 _evict_ghost_one();
+  void                 _enforce_ghost();
+  void                 _evict_small();
+  void                 _evict_main();
+  void                 _evict();
+};
+
+ClassAllocator<RamCacheS3FIFOEntry, false> 
ramCacheS3FIFOEntryAllocator("RamCacheS3FIFOEntry");
+
+static const int bucket_sizes[] = {8191,    16381,   32749,    65521,    
131071,   262139,    524287,    1048573,   2097143,
+                                   4194301, 8388593, 16777213, 33554393, 
67108859, 134217689, 268435399, 536870909, 1073741827};
+
+void
+RamCacheS3FIFO::_resize_hashtable()
+{
+  ink_release_assert(_ibuckets < static_cast<int>(sizeof(bucket_sizes) / 
sizeof(bucket_sizes[0])));
+  int     anbuckets = bucket_sizes[_ibuckets];
+  int64_t s         = anbuckets * sizeof(DList(RamCacheS3FIFOEntry, 
hash_link));
+
+  DList(RamCacheS3FIFOEntry, hash_link) *new_bucket = 
static_cast<DList(RamCacheS3FIFOEntry, hash_link) *>(ats_malloc(s));
+  memset(static_cast<void *>(new_bucket), 0, s);
+  if (_bucket) {
+    for (int64_t i = 0; i < _nbuckets; i++) {
+      RamCacheS3FIFOEntry *e = nullptr;
+      while ((e = _bucket[i].pop())) {
+        new_bucket[e->key.slice32(3) % anbuckets].push(e);
+      }
+    }
+    ats_free(_bucket);
+  }
+  _bucket   = new_bucket;
+  _nbuckets = anbuckets;
+}
+
+void
+RamCacheS3FIFO::init(int64_t abytes, StripeSM *astripe)
+{
+  _stripe    = astripe;
+  _max_bytes = abytes;
+  if (!_max_bytes) {
+    return;
+  }
+  _ghost_size_limit = _max_bytes / 100 * GHOST_SIZE_PERCENT;
+  // The ghost stores keys only, but each key still costs ~ENTRY_OVERHEAD of 
real memory. Bound the
+  // ghost by both its object-size sum (keeps it proportional for large 
objects) and an entry count
+  // that caps its metadata at GHOST_MEM_PERCENT of the configured size; the 
metadata is counted
+  // against the budget (see put) so total memory never exceeds ram_cache.size.
+  _ghost_max = (_max_bytes / 100 * GHOST_MEM_PERCENT) / ENTRY_OVERHEAD;

Review Comment:
   The percent-of-capacity calculations use `(_max_bytes / 100) * PERCENT`, 
which truncates before scaling. This slightly underestimates the ghost 
size/count limits (e.g., 25% can become 24.99%), and it’s easy to avoid by 
multiplying first and dividing last (keeping it in int64_t).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add S3-FIFO RAM cache eviction algorithm (ram_cache.algorithm = 2) [trafficserver]

Reply via email to