[ 
https://issues.apache.org/jira/browse/TS-4331?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15304279#comment-15304279
 ] 

ASF GitHub Bot commented on TS-4331:
------------------------------------

Github user jpeach commented on a diff in the pull request:

    https://github.com/apache/trafficserver/pull/653#discussion_r64931485
  
    --- Diff: iocore/hostdb/RefCountCache.cc ---
    @@ -0,0 +1,145 @@
    +// The goal here is to create a proof-of-concept cache struct which will 
cache an item
    +// for at least N seconds (configurable) and alloc a given amount of space 
for the given
    +// struct-- this way all of the dynamic pieces (strings, arrays, etc.) can 
be allocated
    +// within the same block.
    +
    +#include "ts/ink_platform.h"
    +#include "ts/I_Layout.h"
    +#include "P_HostDB.h"
    +#include "P_RefCountCache.h"
    +#include "P_EventSystem.h" // FIXME: need to have this in I_* header files.
    +#include "ts/ink_file.h"
    +
    +#include <stdio.h>
    +#include <unistd.h>
    +#include <string.h>
    +#include <unordered_map>
    +#include <sys/stat.h>
    +#include <fcntl.h>
    +#include <sys/mman.h>
    +#include <cmath>
    +
    +// TODO: remove? Config?
    +// static const int MC_SYNC_MIN_PAUSE_TIME = HRTIME_MSECONDS(200); // 
Pause for at least 200ms
    +
    +void
    +RefCountCacheBase::sync_partitions(Continuation *cont)
    +{
    +  eventProcessor.schedule_imm(new RefCountCacheSync(cont, this), ET_CALL);
    +}
    +
    +int
    +RefCountCacheSync::copyPartition(int event, Event *e)
    +{
    +  (void)event;
    +  if (partition >= cc->partition_count()) {
    +    // TODO: check return
    +    this->finalizeSync();
    +    cont->handleEvent(REFCOUNT_CACHE_EVENT_SYNC, 0);
    +    Debug("multicache", "RefCountCacheSync done");
    +    delete this;
    +    return EVENT_DONE;
    +  }
    +  Debug("hostdb", "sync partition=%d/%d", partition, 
cc->partition_count());
    +  // copy the partition into our buffer, then we'll let `pauseEvent` write 
it out
    +  this->partitionItems.reserve(cc->partition_itemcount(partition));
    +  cc->copy_partition(partition, &this->partitionItems);
    +  partition++;
    +  SET_HANDLER((MCacheSyncHandler)&RefCountCacheSync::writePartition);
    +  mutex = e->ethread->mutex;
    +  e->schedule_imm();
    +
    +  return EVENT_CONT;
    +}
    +
    +int
    +RefCountCacheSync::writePartition(int event, Event *e)
    +{
    +  // write the partition to disk
    +  // for item in this->partitionItems
    +  // write to disk with headers per item
    +  for (std::vector<Ptr<RefCountCacheItemBase>>::iterator it = 
this->partitionItems.begin(); it != this->partitionItems.end();
    +       ++it) {
    +    // TODO: figure out a cleaner way, dereferencing and taking the ptr 
seems terrible!
    +    this->outfile.write((char *)&*it->get(), sizeof(*it->get()));
    +    // write the contents
    +    this->outfile.write(it->get()->iobuf, it->get()->size);
    +  }
    +
    +  // Clear partition-- for the next user
    +  this->partitionItems.clear();
    +
    +  SET_HANDLER((MCacheSyncHandler)&RefCountCacheSync::pauseEvent);
    +
    +  // TODO: no sleep between partitions?
    +  // TODO: keep track of how long it took and sleep if we were faster than 
expected
    +  // TODO: figure out how to get access to hostdb_sync_frequency (probaby 
should be passed in instead of this global magic)
    +  // e->schedule_in(MAX(MC_SYNC_MIN_PAUSE_TIME, 
HRTIME_SECONDS(hostdb_sync_frequency - 5) / MULTI_CACHE_PARTITIONS));
    +  // e->schedule_in(MC_SYNC_MIN_PAUSE_TIME);
    +  e->schedule_imm();
    +  return EVENT_CONT;
    +}
    +
    +int
    +RefCountCacheSync::pauseEvent(int event, Event *e)
    +{
    +  (void)event;
    +  (void)e;
    +
    +  // Schedule up the next partition
    +  if (partition < cc->partition_count())
    +    mutex = cc->lock_for_partition(partition);
    +  else
    +    mutex = cont->mutex;
    +  SET_HANDLER((MCacheSyncHandler)&RefCountCacheSync::copyPartition);
    +  e->schedule_imm();
    +  return EVENT_CONT;
    +}
    +
    +// Open the tmp file, etc.
    +int
    +RefCountCacheSync::initializeStorage(int event, Event *e)
    +{
    +  this->filename = this->cc->get_filepath();
    +  this->tmp_filename = this->filename + ".syncing";
    +
    +  this->outfile.open(this->tmp_filename.c_str(), std::ios::out | 
std::ios::binary);
    +
    +  if (!this->outfile.is_open()) {
    +    Warning("Unable to create temporyary file %s, unable to persist 
hostdb\n", this->tmp_filename.c_str());
    --- End diff --
    
    * temporary


> Hostdb consistency problems due to MultiCache
> ---------------------------------------------
>
>                 Key: TS-4331
>                 URL: https://issues.apache.org/jira/browse/TS-4331
>             Project: Traffic Server
>          Issue Type: Bug
>          Components: HostDB
>            Reporter: Thomas Jackson
>            Assignee: Thomas Jackson
>             Fix For: 7.0.0
>
>
> This ticket is for the correct long term fix to TS-4207
> pulled from a comment, which wraps up the issue
> {quote}
> Leif Hedstrom I have spent a decent amount of time on this while I was OOO on 
> vacation the last couple of weeks. It seems that the root cause of this issue 
> has always existed, and that the addition of always doing hostname storing 
> (https://github.com/apache/trafficserver/commit/0e703e1e) we are just causing 
> the issue to happen all the time.
> To understand the issue I'll give a little background in how hostdb is 
> currently working. Basically hostdb is just a wrapper around this templated 
> struct called MultiCache. MultiCache is "multi" not because it is templated, 
> but because it has two types of storage (static-- blocks and dynamic-- 
> alloc). The static side of the cache can hold N HostDBInfo structs (the 
> results of DNS queries). The dynamic side is used to store the round robin 
> records and various strings associated with the record. The size of this 
> dynamic space is defined as (N x [estimated_heap_bytes_per_entry. The basic 
> problem we are running into is that we are putting too much preassure on the 
> dynamic heap-- such that the heap is getting re-used while people still have 
> references to items in that space.
> So, I've actually been working on re-writing MultiCache to allocate the 
> entire required block at once (so we don't have this problem where the parent 
> exists but not the children), but I'm not certain if we want such a change to 
> go into the 6.x branch (I'm willing to discuss if we want). If we aren't 
> comfortable with such a large change I suggest just accounting for the 
> hostname size in the estimated_heap_bytes_per_entry as a stopgap solution. 
> The maximum allowable size is 253 (so 254 with null terminator), but we could 
> pick a smaller number (~120 or so seems to be more reasonable). Alternatively 
> you can increase the number of records in hostdb (and the size accordingly) 
> to increase the dynamic heap size.
> TLDR; almost done with the long term solution, but I'm not sure if we want to 
> merge that into 6.x-- alternatively we can do a simple workaround in 6.x 
> (https://github.com/apache/trafficserver/pull/553)
> {quote}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to