I had a little think about the pathological case of most searches being for one of a few elements.

I'm sure my idea is not new, but it seems to me that keeping a 'hit count' for each element solves this. The decision of whether to swap then becomes:

++ Frequency[I]
if I >= Threshold1 then
choose an index J that is [either I/2 or rand(0...I-1), depending on preference]
  if (Frequency[I] - Frequency[J]) >= Threshold2 then
    swap Item[I] and Item[J]
    swap Frequency[I] and Frequency[J]
    I = J
  endif
endif
return I

I wrote a little test in c++ (sorry guys, old habits die hard):

template<class T>
struct mrucache
{
    using vector_type  = std::vector<T>;
    using iterator = typename vector_type::iterator;
    using const_iterator = typename vector_type::const_iterator;

    void add(T item) {
        _items.push_back(std::move(item));
        _frequency.push_back(0);
    }

    template<class Pred>
    iterator find_if(Pred&& pred)
    {
        using std::begin;
        using std::end;
        auto iter = std::find_if(begin(_items),
                                 end(_items),
                                 std::forward<Pred>(pred));
        if (iter != end(_items))
        {
            auto i = std::distance(_items.begin(), iter);
            ++ _frequency[i];
            i = maybe_swap(i);
            iter = std::next(begin(_items), i);
        }
        return iter;
    }

    std::size_t maybe_swap(std::size_t i)
    {
        if (i >= closeness_threshold)
        {
            auto candidate_i = i / 2;
if ((_frequency[i] - _frequency[candidate_i]) >= difference_threshold) {
                swap(_items[i], _items[candidate_i]);
                swap(_frequency[i], _frequency[candidate_i]);
                i = candidate_i;
            }
        }
        return i;
    }

    auto begin() const { return _items.begin(); }
    auto end() const { return _items.end(); }

    static const size_t closeness_threshold = 4;
    static const int difference_threshold = 1;

    std::vector<T> _items;
    std::vector<int> _frequency;
};

Reply via email to