[GitHub] nickva commented on a change in pull request #610: Optimize ddoc cache

git Mon, 10 Jul 2017 11:36:24 -0700

nickva commented on a change in pull request #610: Optimize ddoc cache
URL: https://github.com/apache/couchdb/pull/610#discussion_r126502586


 ##########
 File path: src/ddoc_cache/src/ddoc_cache_entry.erl
 ##########
 @@ -0,0 +1,332 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(ddoc_cache_entry).
+-behaviour(gen_server).
+
+
+-export([
+    dbname/1,
+    ddocid/1,
+    recover/1,
+    insert/2,
+
+    start_link/2,
+    shutdown/1,
+    open/2,
+    accessed/1,
+    refresh/1
+]).
+
+-export([
+    init/1,
+    terminate/2,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2,
+    code_change/3
+]).
+
+-export([
+    do_open/1
+]).
+
+
+-include("ddoc_cache.hrl").
+
+
+-record(st, {
+    key,
+    val,
+    opener,
+    waiters,
+    ts,
+    accessed
+}).
+
+
+dbname({Mod, Arg}) ->
+    Mod:dbname(Arg).
+
+
+ddocid({Mod, Arg}) ->
+    Mod:ddocid(Arg).
+
+
+recover({Mod, Arg}) ->
+    Mod:recover(Arg).
+
+
+insert({Mod, Arg}, Value) ->
+    Mod:insert(Arg, Value).
+
+
+start_link(Key, Default) ->
+    Pid = proc_lib:spawn_link(?MODULE, init, [{Key, Default}]),
+    {ok, Pid}.
+
+
+shutdown(Pid) ->
+    ok = gen_server:call(Pid, shutdown).
+
+
+open(Pid, Key) ->
+    try
+        Resp = gen_server:call(Pid, open),
+        case Resp of
+            {open_ok, Val} ->
+                Val;
+            {open_error, {T, R, S}} ->
+                erlang:raise(T, R, S)
+        end
+    catch exit:_ ->
+        % Its possible that this process was evicted just
+        % before we tried talking to it. Just fallback
+        % to a standard recovery
+        recover(Key)
+    end.
+
+
+accessed(Pid) ->
+    gen_server:cast(Pid, accessed).
+
+
+refresh(Pid) ->
+    gen_server:cast(Pid, force_refresh).
+
+
+init({Key, undefined}) ->
+    true = ets:update_element(?CACHE, Key, {#entry.pid, self()}),
+    St = #st{
+        key = Key,
+        opener = spawn_opener(Key),
+        waiters = [],
+        accessed = 1
+    },
+    ?EVENT(started, Key),
+    gen_server:enter_loop(?MODULE, [], St);
+
+init({Key, Default}) ->
+    Updates = [
+        {#entry.val, Default},
+        {#entry.pid, self()}
+    ],
+    NewTs = os:timestamp(),
+    true = ets:update_element(?CACHE, Key, Updates),
+    true = ets:insert(?LRU, {{NewTs, Key, self()}}),
+    St = #st{
+        key = Key,
+        val = {open_ok, {ok, Default}},
+        opener = start_timer(),
+        waiters = undefined,
+        ts = NewTs,
+        accessed = 1
+    },
+    ?EVENT(default_started, Key),
+    gen_server:enter_loop(?MODULE, [], St).
+
+
+terminate(_Reason, St) ->
+    #st{
+        key = Key,
+        opener = Pid,
+        ts = Ts
+    } = St,
+    % We may have already deleted our cache entry
+    % during shutdown
+    Pattern = #entry{key = Key, pid = self(), _ = '_'},
+    CacheMSpec = [{Pattern, [], [true]}],
+    true = ets:select_delete(?CACHE, CacheMSpec) < 2,
+    % We may have already deleted our LRU entry
+    % during shutdown
+    if Ts == undefined -> ok; true ->
+        LruMSpec = [{{{Ts, Key, self()}}, [], [true]}],
+        true = ets:select_delete(?LRU, LruMSpec) < 2
+    end,
+    % Blow away any current opener if it exists
+    if not is_pid(Pid) -> ok; true ->
+        catch exit(Pid, kill)
+    end,
+    ok.
+
+
+handle_call(open, From, #st{val = undefined} = St) ->
+    NewSt = St#st{
+        waiters = [From | St#st.waiters]
+    },
+    {noreply, NewSt};
+
+handle_call(open, _From, St) ->
+    {reply, St#st.val, St};
+
+handle_call(shutdown, _From, St) ->
+    remove_from_cache(St),
+    {stop, normal, ok, St};
+
+handle_call(Msg, _From, St) ->
+    {stop, {bad_call, Msg}, {bad_call, Msg}, St}.
+
+
+handle_cast(accessed, St) ->
 
 Review comment:
   Yap noticed that. It's drain(),update_lru(),wait_for_message (in 
handle_cast), drain(), update_lru(), wait_for_message and repeats etc. The test 
driver was always updating basically so the message queue sizes were in a 
steady stage and their length were basically determined by how many messages 
the test driver could send while an update was processed.
   
   I was mainly thinking of the ?LRU ets table as a bottleneck for the updating 
it (since it's O(log n) table updated centrally)  And suggested limiting the 
maximum rates of updates from each entry. So no matter how fast `accessed` 
messages are cast each gentry get at most 1 update per second. Thinking this 
cache operates more on a 1 sec to minute time scale, rather than a millisecond 
or microsecond, so there is not point on updating it that often.
   
   Not sure how expensive the message_queue_len but that might be an 
interesting approach. Wondering if it would lead to starvation. It queue length 
doesn't know which messages are in the queue. The sender might decide not to 
send an `accessed` message but the queue is actually filled with other messages 
so then the entry process never gets an accessed message.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] nickva commented on a change in pull request #610: Optimize ddoc cache

Reply via email to