[
https://issues.apache.org/jira/browse/TS-4331?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15349875#comment-15349875
]
ASF GitHub Bot commented on TS-4331:
------------------------------------
Github user jpeach commented on a diff in the pull request:
https://github.com/apache/trafficserver/pull/653#discussion_r68495724
--- Diff: iocore/hostdb/P_RefCountCache.h ---
@@ -0,0 +1,896 @@
+/** @file
+
+ A cache (with map-esque interface) for RefCountObjs
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+#ifndef _P_RefCountCache_h_
+#define _P_RefCountCache_h_
+
+#include "I_EventSystem.h"
+#include "P_EventSystem.h" // TODO: less? just need ET_TASK
+
+#include <ts/Map.h>
+#include "ts/PriorityQueue.h"
+
+#include <ts/List.h>
+#include <ts/ink_hrtime.h>
+
+#include "ts/Vec.h"
+#include "ts/I_Version.h"
+#include <unistd.h>
+
+#define REFCOUNT_CACHE_EVENT_SYNC REFCOUNT_CACHE_EVENT_EVENTS_START
+
+#define REFCOUNTCACHE_MAGIC_NUMBER 0x0BAD2D9
+#define REFCOUNTCACHE_MAJOR_VERSION 1
+#define REFCOUNTCACHE_MINOR_VERSION 0
+
+// Stats
+enum RefCountCache_Stats {
+ refcountcache_current_items_stat, // current number of items
+ refcountcache_current_size_stat, // current size of cache
+ refcountcache_total_inserts_stat, // total items inserted
+ refcountcache_total_failed_inserts_stat, // total items unable to insert
+ refcountcache_total_lookups_stat, // total get() calls
+ refcountcache_total_hits_stat, // total hits
+
+ // Persistence metrics
+ refcountcache_last_sync_time, // seconds since epoch of last
successful sync
+ refcountcache_last_total_items, // number of items sync last time
+ refcountcache_last_total_size, // total size at last sync
+
+ RefCountCache_Stat_Count
+};
+
+struct RefCountCacheItemMeta {
+ uint64_t key;
+ unsigned int size;
+ ink_time_t expiry_time; // expire time as seconds since epoch
+ RefCountCacheItemMeta(uint64_t key, unsigned int size, int expire_time =
-1) : key(key), size(size), expiry_time(expire_time) {}
+};
+
+// Layer of indirection for the hashmap-- since it needs lots of things
inside of it
+// We'll also use this as the item header, for persisting objects to disk
+class RefCountCacheHashEntry
+{
+public:
+ Ptr<RefCountObj> item;
+ LINK(RefCountCacheHashEntry, item_link);
+
+ PriorityQueueEntry<RefCountCacheHashEntry *> *expiry_entry;
+
+ RefCountCacheItemMeta meta;
+ void
+ set(RefCountObj *i, uint64_t key, unsigned int size, int expire_time)
+ {
+ this->item = make_ptr(i);
+ this->meta = RefCountCacheItemMeta(key, size, expire_time);
+ };
+ // Need a no-argument constructor to use the classAllocator
+ RefCountCacheHashEntry() : expiry_entry(NULL), meta(0, 0){};
+
+ // make these values comparable -- so we can sort them
+ bool
+ operator<(const RefCountCacheHashEntry &v2) const
+ {
+ return this->meta.expiry_time < v2.meta.expiry_time;
+ };
+};
+// Since the hashing values are all fixed size, we can simply use a
classAllocator to avoid mallocs
+extern ClassAllocator<RefCountCacheHashEntry>
refCountCacheHashingValueAllocator;
+extern ClassAllocator<PriorityQueueEntry<RefCountCacheHashEntry *>>
expiryQueueEntry;
+
+struct RefCountCacheHashing {
+ typedef uint64_t ID;
+ typedef uint64_t const Key;
+ typedef RefCountCacheHashEntry Value;
+ typedef DList(RefCountCacheHashEntry, item_link) ListHead;
+
+ static ID
+ hash(Key key)
+ {
+ return key;
+ }
+ static Key
+ key(Value const *value)
+ {
+ return value->meta.key;
+ }
+ static bool
+ equal(Key lhs, Key rhs)
+ {
+ return lhs == rhs;
+ }
+};
+
+// The RefCountCachePartition is simply a map of key -> Ptr<YourClass>
+// We partition the cache to reduce lock contention
+template <class C> class RefCountCachePartition
+{
+public:
+ RefCountCachePartition(unsigned int part_num, uint64_t max_size,
unsigned int max_items, RecRawStatBlock *rsb = NULL);
+ Ptr<C> get(uint64_t key);
+ void put(uint64_t key, C *item, int size = 0, int expire_time = 0);
+ void erase(uint64_t key, int expiry_time = -1);
+
+ void clear();
+ bool is_full();
+ bool reserve(unsigned int);
+
+ const size_t count();
+ void copy(Vec<RefCountCacheHashEntry *> &items);
+
+ typedef typename TSHashTable<RefCountCacheHashing>::iterator
iterator_type;
+ typedef typename TSHashTable<RefCountCacheHashing>::self hash_type;
+ typedef typename TSHashTable<RefCountCacheHashing>::Location
location_type;
+ TSHashTable<RefCountCacheHashing> *get_map();
+
+ Ptr<ProxyMutex> lock; // Lock
+
+private:
+ void metric_inc(RefCountCache_Stats metric_enum, int64_t data);
+
+ unsigned int part_num;
+
+ uint64_t max_size;
+ unsigned int max_items;
+
+ uint64_t size;
+ unsigned int items;
+
+ hash_type item_map;
+
+ PriorityQueue<RefCountCacheHashEntry *> expiry_queue;
+ RecRawStatBlock *rsb;
+};
+
+template <class C>
+RefCountCachePartition<C>::RefCountCachePartition(unsigned int part_num,
uint64_t max_size, unsigned int max_items,
+ RecRawStatBlock *rsb)
+{
+ this->part_num = part_num;
+ this->max_size = max_size;
+ this->max_items = max_items;
+ this->size = 0;
+ this->items = 0;
+ this->rsb = rsb;
+
+ // Initialize lock
+ this->lock = new_ProxyMutex();
+}
+
+template <class C>
+Ptr<C>
+RefCountCachePartition<C>::get(uint64_t key)
+{
+ this->metric_inc(refcountcache_total_lookups_stat, 1);
+ location_type l = this->item_map.find(key);
+ if (l.isValid()) {
+ // found
+ this->metric_inc(refcountcache_total_hits_stat, 1);
+ return make_ptr((C *)l.m_value->item.get());
+ } else {
+ return Ptr<C>();
+ }
+}
+
+template <class C>
+void
+RefCountCachePartition<C>::put(uint64_t key, C *item, int size, int
expire_time)
+{
+ this->metric_inc(refcountcache_total_inserts_stat, 1);
+ size += sizeof(C);
+ // Remove any colliding entries
+ this->erase(key);
+
+ // if we are full, and can't make space-- then don't store the item
+ if (this->is_full() && !this->reserve(size)) {
+ Debug("refcountcache", "partition %d is full-- not storing item
key=%ld", this->part_num, key);
+ this->metric_inc(refcountcache_total_failed_inserts_stat, 1);
+ return;
+ }
+
+ // Create our value-- which has a ref to the `item`
+ RefCountCacheHashEntry *val = refCountCacheHashingValueAllocator.alloc();
+ val->set(item, key, size, expire_time);
+
+ // add expiry_entry to expiry queue, if the expire time is positive
(otherwise it means don't expire)
+ if (expire_time >= 0) {
+ Debug("refcountcache", "partition %d adding entry with
expire_time=%d\n", this->part_num, expire_time);
+ PriorityQueueEntry<RefCountCacheHashEntry *> *expiry_entry =
expiryQueueEntry.alloc();
+ new ((void *)expiry_entry) PriorityQueueEntry<RefCountCacheHashEntry
*>(val);
+ expiry_queue.push(expiry_entry);
+ val->expiry_entry = expiry_entry;
+ }
+
+ // add the item to the map
+ this->item_map.insert(val);
+ this->size += val->meta.size;
+ this->items++;
+ this->metric_inc(refcountcache_current_size_stat,
(int64_t)val->meta.size);
+ this->metric_inc(refcountcache_current_items_stat, 1);
+}
+
+template <class C>
+void
+RefCountCachePartition<C>::erase(uint64_t key, int expiry_time)
+{
+ location_type l = this->item_map.find(key);
+ if (l.isValid()) {
+ if (expiry_time >= 0 && l.m_value->meta.expiry_time != expiry_time) {
+ return;
+ }
+ // found
+ this->item_map.remove(l);
+
+ // decrement usage counters
+ this->size -= l.m_value->meta.size;
+ this->items--;
+
+ this->metric_inc(refcountcache_current_size_stat,
-((int64_t)l.m_value->meta.size));
+ this->metric_inc(refcountcache_current_items_stat, -1);
+
+ // remove from expiry queue
+ if (l.m_value->expiry_entry != NULL) {
+ Debug("refcountcache", "partition %d deleting item from expiry_queue
idx=%d\n", this->part_num,
+ l.m_value->expiry_entry->index);
+ this->expiry_queue.erase(l.m_value->expiry_entry);
+ expiryQueueEntry.free(l.m_value->expiry_entry);
+ l.m_value->expiry_entry = NULL; // To avoid the destruction of `l`
calling the destructor again-- and causing issues
+ }
+ // Since the Value is actually RefCountObj-- when this gets deleted
normally it calls the wrong
+ // `free` method, this forces the delete/decr to happen with the right
type
+ Ptr<C> *tmp = (Ptr<C> *)&l.m_value->item;
+ tmp->clear();
+ refCountCacheHashingValueAllocator.free(l.m_value);
+ }
+}
+
+template <class C>
+void
+RefCountCachePartition<C>::clear()
+{
+ this->metric_inc(refcountcache_current_size_stat, -this->size);
+ this->metric_inc(refcountcache_current_items_stat, -this->items);
+ // Clear the in memory hashmap
+ // TODO: delete all items (not sure if clear calls delete on all items)
+ this->item_map.clear();
+ // clear the queue
+ PriorityQueueEntry<RefCountCacheHashEntry *> *tmp_entry;
+ while (!this->expiry_queue.empty()) {
+ tmp_entry = this->expiry_queue.top();
+ this->expiry_queue.pop();
+ expiryQueueEntry.free(tmp_entry);
+ }
+
+ this->items = 0;
+ this->size = 0;
+}
+
+// Are we full?
+template <class C>
+bool
+RefCountCachePartition<C>::is_full()
+{
+ Debug("refcountcache", "partition %d is full? items %d/%d size
%ld/%ld\n\n", this->part_num, this->items, this->max_items,
+ this->size, this->max_size);
+ return (this->max_items > 0 && this->items >= this->max_items) ||
(this->max_size > 0 && this->size >= this->max_size);
+}
+
+// Attempt to make space for item of `size`
+template <class C>
+bool
+RefCountCachePartition<C>::reserve(unsigned int size)
--- End diff --
This method seems inconsistent to me. You can size by item count or byte
count or both, but you can only reserve by byte count? This code won't work
correctly if you are not sizing by bytes.
In STL containers, ``reserve`` means to preallocate space for items, which
is not what this is really doing. I'd be happier if this was a private method
called ``evict_expired_entries`` which seems to more accurately describe what
it does.
> Hostdb consistency problems due to MultiCache
> ---------------------------------------------
>
> Key: TS-4331
> URL: https://issues.apache.org/jira/browse/TS-4331
> Project: Traffic Server
> Issue Type: Bug
> Components: HostDB
> Reporter: Thomas Jackson
> Assignee: Thomas Jackson
> Fix For: 7.0.0
>
>
> This ticket is for the correct long term fix to TS-4207
> pulled from a comment, which wraps up the issue
> {quote}
> Leif Hedstrom I have spent a decent amount of time on this while I was OOO on
> vacation the last couple of weeks. It seems that the root cause of this issue
> has always existed, and that the addition of always doing hostname storing
> (https://github.com/apache/trafficserver/commit/0e703e1e) we are just causing
> the issue to happen all the time.
> To understand the issue I'll give a little background in how hostdb is
> currently working. Basically hostdb is just a wrapper around this templated
> struct called MultiCache. MultiCache is "multi" not because it is templated,
> but because it has two types of storage (static-- blocks and dynamic--
> alloc). The static side of the cache can hold N HostDBInfo structs (the
> results of DNS queries). The dynamic side is used to store the round robin
> records and various strings associated with the record. The size of this
> dynamic space is defined as (N x [estimated_heap_bytes_per_entry. The basic
> problem we are running into is that we are putting too much preassure on the
> dynamic heap-- such that the heap is getting re-used while people still have
> references to items in that space.
> So, I've actually been working on re-writing MultiCache to allocate the
> entire required block at once (so we don't have this problem where the parent
> exists but not the children), but I'm not certain if we want such a change to
> go into the 6.x branch (I'm willing to discuss if we want). If we aren't
> comfortable with such a large change I suggest just accounting for the
> hostname size in the estimated_heap_bytes_per_entry as a stopgap solution.
> The maximum allowable size is 253 (so 254 with null terminator), but we could
> pick a smaller number (~120 or so seems to be more reasonable). Alternatively
> you can increase the number of records in hostdb (and the size accordingly)
> to increase the dynamic heap size.
> TLDR; almost done with the long term solution, but I'm not sure if we want to
> merge that into 6.x-- alternatively we can do a simple workaround in 6.x
> (https://github.com/apache/trafficserver/pull/553)
> {quote}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)