Github user jpeach commented on a diff in the pull request:

    https://github.com/apache/trafficserver/pull/653#discussion_r64931897
  
    --- Diff: iocore/hostdb/P_RefCountCache.h ---
    @@ -0,0 +1,572 @@
    +#ifndef _P_RefCountCache_h_
    +#define _P_RefCountCache_h_
    +
    +#include "I_EventSystem.h"
    +
    +#include <ts/Map.h>
    +#include <ts/List.h>
    +
    +#include <cstdint>
    +// TODO: no vector?
    +#include <vector>
    +#include <unistd.h>
    +#include <string.h>
    +#include <sys/stat.h>
    +#include <fcntl.h>
    +#include <sys/mman.h>
    +#include <cmath>
    +#include <fstream>
    +
    +#include "ts/I_Version.h"
    +
    +#define REFCOUNT_CACHE_EVENT_SYNC REFCOUNT_CACHE_EVENT_EVENTS_START
    +
    +#define REFCOUNTCACHE_MAGIC_NUMBER 0x0BAD2D9
    +#define REFCOUNTCACHE_MAJOR_VERSION 1
    +#define REFCOUNTCACHE_MINOR_VERSION 0
    +
    +// TODO: elsewhere?
    +#define PtrMutex Ptr<ProxyMutex>
    +
    +// In the cache we want to recount our items
    +// This class will handle memory allocations (on the freelist) and
    +// take care of implementing RefCountObj-- so that the items we want in
    +// the cache don't have to inherit from it (which makes 
serialization/deserialization much easier)
    +class RefCountCacheItemBase : public RefCountObj
    +{
    +public:
    +  RefCountCacheItemBase(uint64_t key = 0, unsigned int size = 0, char 
*iobuf = NULL, int iobuffer_index = 0)
    +  {
    +    this->key = key;
    +    this->size = size;
    +    this->iobuf = iobuf;
    +    this->iobuffer_index = iobuffer_index;
    +  }
    +  void
    +  free()
    +  {
    +    if (this->size > 0) {
    +      // free the block
    +      ioBufAllocator[iobuffer_index].free_void((void *)(iobuf));
    +    }
    +  }
    +  uint64_t key;      // Key of this item
    +  unsigned int size; // how much space does this guy get
    +  int64_t iobuffer_index;
    +  char *iobuf;
    +};
    +
    +// Template to the particular class, so we can caste the item ptr correctly
    +template <class C> class RefCountCacheItem : public RefCountCacheItemBase
    +{
    +public:
    +  using RefCountCacheItemBase::RefCountCacheItemBase;
    +  C *item();
    +};
    +
    +template <class C>
    +C *
    +RefCountCacheItem<C>::item()
    +{
    +  return (C *)this->iobuf;
    +}
    +
    +// Layer of inderection for the hashmap-- since it needs lots of things 
inside of it
    +class RefCountCacheHashingValue
    +{
    +public:
    +  Ptr<RefCountCacheItemBase> item;
    +  LINK(RefCountCacheHashingValue, item_link);
    +  RefCountCacheHashingValue(Ptr<RefCountCacheItemBase> i) : item(i){};
    +};
    +
    +// TODO: template?
    +struct RefCountCacheHashing {
    +  typedef uint64_t ID;
    +  typedef uint64_t const Key;
    +  typedef RefCountCacheHashingValue Value;
    +  typedef DList(RefCountCacheHashingValue, item_link) ListHead;
    +
    +  static ID
    +  hash(Key key)
    +  {
    +    return key;
    +  }
    +  static Key
    +  key(Value const *value)
    +  {
    +    return value->item->key;
    +  }
    +  static bool
    +  equal(Key lhs, Key rhs)
    +  {
    +    return lhs == rhs;
    +  }
    +};
    +
    +// The RefCountCachePartition is simply a map of key -> 
Ptr<RefCountCacheItem<YourClass>>
    +// TODO: add metrics
    +// TODO: store partition number for debugging
    +template <class C> class RefCountCachePartition
    +{
    +public:
    +  RefCountCachePartition(int maxSize, int maxItems);
    +  RefCountCacheItem<C> *alloc(uint64_t key, int size = 0);
    +  Ptr<RefCountCacheItem<C>> get(uint64_t key);
    +  void del(uint64_t key);
    +  void clear();
    +
    +  int numItems();
    +  void copy(std::vector<Ptr<RefCountCacheItemBase>> *items);
    +
    +  typedef typename TSHashTable<RefCountCacheHashing>::iterator 
iteratortype;
    +  typedef typename TSHashTable<RefCountCacheHashing>::self hashtype;
    +  typedef typename TSHashTable<RefCountCacheHashing>::Location 
locationtype;
    +  TSHashTable<RefCountCacheHashing> *getMap();
    +
    +  PtrMutex lock; // Lock
    +
    +private:
    +  int maxSize;
    +  int maxItems;
    +
    +  int size;
    +  int items;
    +
    +  hashtype itemMap;
    +};
    +
    +template <class C> RefCountCachePartition<C>::RefCountCachePartition(int 
maxSize, int maxItems)
    +{
    +  this->maxSize = maxSize;
    +  this->maxItems = maxItems;
    +  this->size = 0;
    +  this->items = 0;
    +
    +  // Initialize lock
    +  this->lock = new_ProxyMutex();
    +}
    +
    +// TODO: error somehow if you asked for something too big!
    +// Allocate space for an item `C` as well as `size` extra space.
    +template <class C>
    +RefCountCacheItem<C> *
    +RefCountCachePartition<C>::alloc(uint64_t key, int size)
    +{
    +  size += sizeof(C);
    +  // Remove any colliding entries
    +  this->del(key);
    +
    +  // Now allocat a sufficiently large iobuf to store our data
    +  // as well as the intermediate layers we have
    +  int allocSize = size + sizeof(RefCountCacheItem<C>) + 
sizeof(RefCountCacheHashingValue);
    +  // allocate a sufficiently large block from the ioBufAllocator
    +  int iobuffer_index = iobuffer_size_to_index(allocSize);
    +  ink_release_assert(iobuffer_index >= 0);
    +  char *iobuf = (char *)ioBufAllocator[iobuffer_index].alloc_void();
    +
    +  RefCountCacheItem<C> *item = new (iobuf + size) 
RefCountCacheItem<C>(key, size, iobuf, iobuffer_index);
    +
    +  // TODO: either put the RefCountCacheItem on the same iobuf, or move this
    +  // to a class allocator
    +
    +  // if we are past our limits, then lets not store a reference to the 
return in our map
    +  if ((this->maxItems > 0 && this->items > this->maxItems) || 
(this->maxSize > 0 && this->size > this->maxSize)) {
    +    return item;
    +  }
    +
    +  // Add our entry to the map
    +  RefCountCacheHashingValue *val =
    +    new ((char *)item + sizeof(RefCountCacheItem<C>)) 
RefCountCacheHashingValue(make_ptr((RefCountCacheItemBase *)item));
    +  this->itemMap.insert(val);
    +
    +  return item;
    +}
    +
    +template <class C>
    +Ptr<RefCountCacheItem<C>>
    +RefCountCachePartition<C>::get(uint64_t key)
    +{
    +  locationtype l = this->itemMap.find(key);
    +  if (l.isValid()) {
    +    // found
    +    return make_ptr((RefCountCacheItem<C> *)l.m_value->item.get());
    +  } else {
    +    return Ptr<RefCountCacheItem<C>>(NULL);
    +  }
    +}
    +
    +template <class C>
    +void
    +RefCountCachePartition<C>::del(uint64_t key)
    +{
    +  this->itemMap.remove(key);
    +}
    +
    +template <class C>
    +void
    +RefCountCachePartition<C>::clear()
    +{
    +  // Clear the in memory hashmap
    +  // TODO: delete all items (not sure if clear calls delete on all items)
    +  this->itemMap.clear();
    +  this->items = 0;
    +  this->size = 0;
    +}
    +
    +template <class C>
    +int
    +RefCountCachePartition<C>::numItems()
    +{
    +  return this->itemMap.count();
    +}
    +
    +template <class C>
    +void
    +RefCountCachePartition<C>::copy(std::vector<Ptr<RefCountCacheItemBase>> 
*items)
    +{
    +  for (RefCountCachePartition<C>::iteratortype i = this->itemMap.begin(); 
i != this->itemMap.end(); ++i) {
    +    items->push_back(make_ptr((RefCountCacheItemBase *)i->item.get()));
    +  }
    +}
    +
    +// TODO: pass an iterator or something? Seems not good to pass a pointer 
to a member
    +template <class C>
    +TSHashTable<RefCountCacheHashing> *
    +RefCountCachePartition<C>::getMap()
    +{
    +  return &this->itemMap;
    +}
    +
    +// The implementation of this class must be in here, as this is used by 
template
    +// classes, and c++ rocks that way
    +// The header for the cache, this is used to check if the serialized cache 
is compatible
    +class RefCountCacheHeader
    +{
    +public:
    +  unsigned int magic;
    +  VersionNumber version;
    +
    +  int numPartitions;
    +  int partitionMaxSize;
    +
    +  RefCountCacheHeader(int numParts, int partSize)
    +    : magic(REFCOUNTCACHE_MAGIC_NUMBER), numPartitions(numParts), 
partitionMaxSize(partSize)
    +  {
    +    this->version.ink_major = REFCOUNTCACHE_MAJOR_VERSION;
    +    this->version.ink_minor = REFCOUNTCACHE_MINOR_VERSION;
    +  };
    +
    +  bool
    +  operator==(const RefCountCacheHeader other) const
    +  {
    +    return (this->magic == other.magic && this->version.ink_major == 
other.version.ink_major &&
    +            this->version.ink_minor == other.version.ink_minor && 
this->numPartitions == other.numPartitions &&
    +            this->partitionMaxSize == other.partitionMaxSize);
    +  }
    +
    +  bool
    +  compatible(RefCountCacheHeader *other)
    +  {
    +    //      printf("magic %d %d\n", this->magic, other->magic);
    +    //      printf("version.ink_major %d %d\n", this->version.ink_major, 
other->version.ink_major);
    +    //      printf("numPartitions %d %d\n", this->numPartitions, 
other->numPartitions);
    +    //      printf("partitionMaxSize %d %d\n", this->partitionMaxSize, 
other->partitionMaxSize);
    +
    +    return (this->magic == other->magic && this->version.ink_major == 
other->version.ink_major &&
    +            this->numPartitions == other->numPartitions && 
this->partitionMaxSize == other->partitionMaxSize);
    +  };
    +};
    +
    +// Base for syncing purposes
    +// Since the actual RefCountCache is templated, we don't want to require
    +// importing the template type for all uses (such as syncing)
    +class RefCountCacheBase
    +{
    +public:
    +  void sync_partitions(Continuation *cont);
    +  virtual RefCountCacheHeader *
    +  get_header()
    +  {
    +    return NULL;
    +  };
    +  virtual ProxyMutex *
    +  lock_for_partition(int)
    +  {
    +    return NULL;
    +  };
    +  virtual int
    +  partition_count()
    +  {
    +    return 0;
    +  };
    +  virtual int
    +  partition_itemcount(int part)
    +  {
    +    return 0;
    +  };
    +  virtual void copy_partition(int part, 
std::vector<Ptr<RefCountCacheItemBase>> *items){};
    +  virtual std::string
    +  get_filepath()
    +  {
    +    return NULL;
    +  };
    +};
    +
    +// RefCountCache is a ref-counted key->value map to store "atomic" blocks 
of memory (from the freelist)
    +// which includes at least an instance of `C`. The primary use case is 
that the struct (`C`)
    +// has a dynamically sized attribute (array, string, etc.). To accommodate 
this use-case
    +// RefCountCache allows you to call alloc() with a key and an optional 
`size` which will be the
    +// additional amount of space required to store your additional 
information.
    +// Ptrs to these "blocks" of memory can be retrieved again using the get() 
method with the key provided
    +// during the alloc()
    +template <class C> class RefCountCache : public RefCountCacheBase
    +{
    +public:
    +  // Constructor
    +  RefCountCache(int numPartitions, int size = -1, int items = -1);
    +  // Destructor
    +  ~RefCountCache();
    +  RefCountCacheItem<C> *alloc(uint64_t key, int size = 0);
    +  Ptr<RefCountCacheItem<C>> get(uint64_t key);
    +  int partitionForKey(uint64_t key);
    +  void del(uint64_t key);
    +  void clear();
    +
    +  RefCountCacheHeader *get_header();
    +  ProxyMutex *lock_for_key(uint64_t key);
    +  virtual ProxyMutex *lock_for_partition(int pnum);
    +  int partition_count();
    +  int partition_itemcount(int part);
    +  TSHashTable<RefCountCacheHashing> *partition_getMap(int part);
    +  void copy_partition(int part, std::vector<Ptr<RefCountCacheItemBase>> 
*items);
    +  std::string get_filepath();
    +
    +  // TODO: better names
    +  int numItems();
    +
    +  int load_file(std::string filepath); // Load the file from disk (if 
exists)
    +  int totalSize();
    +
    +  // Sync methods
    +  int sync_all();
    +
    +private:
    +  int maxSize;  // Total size
    +  int maxItems; // Total number of items allowed
    +
    +  int numPartitions;
    +  int partitionMaxSize;
    +  int partitionMaxItems;
    +
    +  std::string filepath;
    +
    +  std::vector<RefCountCachePartition<C>> partitions;
    +
    +  // Header
    +  RefCountCacheHeader header; // Our header
    +};
    +
    +// TODO: add `calloc`
    +template <class C>
    +RefCountCache<C>::RefCountCache(int numPartitions, int size, int items)
    +  : partitions(numPartitions, RefCountCachePartition<C>(size / 
numPartitions, items / numPartitions)),
    +    header(RefCountCacheHeader(numPartitions, size))
    +{
    +  this->maxSize = size;
    +  this->maxItems = items;
    +  this->numPartitions = numPartitions;
    +
    +  // TODO: fix if either is not divisible by numPartitions
    +  this->partitionMaxSize = this->maxSize / this->numPartitions;
    +  this->partitionMaxItems = this->maxItems / this->numPartitions;
    +
    +  // TODO: fix this, bad to initialize it wrong
    +  this->header.partitionMaxSize = this->partitionMaxSize;
    +}
    +
    +// Deconstruct the class
    +template <class C> RefCountCache<C>::~RefCountCache()
    +{
    +  delete this->partitions;
    +}
    +
    +template <class C>
    +RefCountCacheItem<C> *
    +RefCountCache<C>::alloc(uint64_t key, int size)
    +{
    +  return this->partitions[this->partitionForKey(key)].alloc(key, size);
    +}
    +
    +template <class C>
    +Ptr<RefCountCacheItem<C>>
    +RefCountCache<C>::get(uint64_t key)
    +{
    +  return this->partitions[this->partitionForKey(key)].get(key);
    +}
    +
    +// Pick a partition for a given item
    +template <class C>
    +int
    +RefCountCache<C>::partitionForKey(uint64_t key)
    +{
    +  return key % this->numPartitions;
    +}
    +
    +// TODO: return a copy???
    +template <class C>
    +RefCountCacheHeader *
    +RefCountCache<C>::get_header()
    +{
    +  return &this->header;
    +}
    +
    +template <class C>
    +ProxyMutex *
    +RefCountCache<C>::lock_for_key(uint64_t key)
    +{
    +  return this->partitions[this->partitionForKey(key)].lock.get();
    +}
    +
    +template <class C>
    +ProxyMutex *
    +RefCountCache<C>::lock_for_partition(int pnum)
    +{
    +  return this->partitions[pnum].lock.get();
    +}
    +
    +template <class C>
    +int
    +RefCountCache<C>::partition_count()
    +{
    +  return this->numPartitions;
    +}
    +
    +template <class C>
    +int
    +RefCountCache<C>::partition_itemcount(int pnum)
    +{
    +  return this->partitions[pnum].numItems();
    +}
    +
    +template <class C>
    +TSHashTable<RefCountCacheHashing> *
    +RefCountCache<C>::partition_getMap(int pnum)
    +{
    +  return this->partitions[pnum].getMap();
    +}
    +
    +template <class C>
    +void
    +RefCountCache<C>::copy_partition(int pnum, 
std::vector<Ptr<RefCountCacheItemBase>> *items)
    +{
    +  this->partitions[pnum].copy(items);
    +}
    +
    +template <class C>
    +std::string
    +RefCountCache<C>::get_filepath()
    +{
    +  return this->filepath;
    +}
    +
    +template <class C>
    +void
    +RefCountCache<C>::del(uint64_t key)
    +{
    +  this->partitions[this->partitionForKey(key)].del(key);
    +}
    +
    +template <class C>
    +void
    +RefCountCache<C>::clear()
    +{
    +  for (int i = 0; i < this->numPartitions; i++) {
    +    this->partitions[i].clear();
    +  }
    +}
    +
    +// Return the number of items in all partitions of this cache
    +template <class C>
    +int
    +RefCountCache<C>::numItems()
    +{
    +  int ret = 0;
    +  for (int i = 0; i < this->numPartitions; i++) {
    +    ret += this->partitions[i].numItems();
    +  }
    +  return ret;
    +}
    +
    +// Attempt to open a file at `filepath`. If we can open the file, we'll 
attempt to
    +// load items from it (assuming the header is compatible). Afterwards 
we'll keep
    +// the FD open and use this as the backing store for our cache
    +// The return is the number of items loaded from disk. Errors are -1
    +// TODO: error if you already have a filepath
    +template <class C>
    +int
    +RefCountCache<C>::load_file(std::string filepath)
    +{
    +  this->filepath = filepath;
    +  std::ifstream infile;
    +  infile.open(this->filepath.c_str(), std::ios::in | std::ios::binary);
    +
    +  // read in the header
    +  RefCountCacheHeader tmpHeader = RefCountCacheHeader(0, 0);
    +  // TODO: check if we read it in
    +  infile.read((char *)&tmpHeader, sizeof(RefCountCacheHeader));
    +  if (!this->header.compatible(&tmpHeader)) {
    +    Warning("Incompatible cache at %s, not loading.", filepath.c_str());
    +    return -1; // TODO: specific code for incompatible
    +  }
    +
    +  RefCountCacheItemBase tmpItem = RefCountCacheItemBase();
    +  while (infile.read((char *)&tmpItem, sizeof(tmpItem))) {
    +    C *newItem = (C *)this->alloc(tmpItem.key, tmpItem.size - 
sizeof(C))->iobuf;
    +    infile.read((char *)newItem, tmpItem.size);
    +  }
    +  // TODO: remove this? this is a hack to not have the destructor try to 
deallocate the iobuff
    +  tmpItem.size = 0;
    +  infile.close();
    +
    +  // TODO: catch error conditions!
    +  return 0;
    +}
    +
    +class RefCountCacheSync;
    +typedef int (RefCountCacheSync::*MCacheSyncHandler)(int, void *);
    +// This continuation is responsible for persisting RefCountCache to disk
    +// To avoid locking the partitions for a long time we'll do the following 
per-partition:
    +//    - lock
    +//    - copy ptrs
    +//    - unlock
    +//    - persist
    +// This way we only have to hold the lock on the partition for the time it 
takes to get Ptrs to all items in the partition
    +class RefCountCacheSync : public Continuation
    +{
    +public:
    +  int partition;         // Current partition
    +  RefCountCacheBase *cc; // Pointer to the entire cache
    +  Continuation *cont;
    +
    +  std::vector<Ptr<RefCountCacheItemBase>> partitionItems;
    +
    +  int copyPartition(int event, Event *e);
    +  int writePartition(int event, Event *e);
    +  int pauseEvent(int event, Event *e);
    +
    +  // Create the tmp file on disk we'll be writing to
    +  int initializeStorage(int event, Event *e);
    +  // do the final mv and close of file handle
    +  int finalizeSync();
    +
    +  RefCountCacheSync(Continuation *acont, RefCountCacheBase *cc);
    +
    +private:
    +  std::ofstream outfile;
    --- End diff --
    
    We should avoid iostream in the iocore layer.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to