Updated Branches:
  refs/heads/1.3.x 19c82720d -> 9e8782fa1

(COUCHDB-1305) isolate db process crashes

Merge of 1305-fix-isolate-db-crashes

[@davisp: I'll keep doing this nonsense until we allow merge commits!]

Note that there was previously a small error where couch_sys_dbs ETS
table was being searched for a #db record rather than the db name.

That's fixed here because I saw it. Sorry for not making it a
separate commit.

Squashed commit of the following:

commit f9e4e8a6e426d1569fac4cd707bbd393b102147d
Author: Randall Leeds <rand...@apache.org>
Date:   Sat Jan 26 06:28:20 2013 -0800

    don't handle case clause when death happens anyway

    Presumably the reason for the process exit is already logged by
    the exiting process (unlike an unexpected message of another sort
    which may not have been logged already).

commit 10a052a3eddbd7e89b553966895ee38a9ce439d4
Author: Randall Leeds <rand...@apache.org>
Date:   Sat Jan 26 06:25:23 2013 -0800

    consolidate two similar handle_info clauses

commit 6f3feb09e347dfe0c6812fd71e3f40d15d8d1ced
Author: Randall Leeds <rand...@apache.org>
Date:   Sat Jan 26 04:35:35 2013 -0800

    isolate db process crashes in couch_server

    closes COUCHDB-1305


Project: http://git-wip-us.apache.org/repos/asf/couchdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb/commit/9e8782fa
Tree: http://git-wip-us.apache.org/repos/asf/couchdb/tree/9e8782fa
Diff: http://git-wip-us.apache.org/repos/asf/couchdb/diff/9e8782fa

Branch: refs/heads/1.3.x
Commit: 9e8782fa1d5a0074272ea2d319ee658828e5da90
Parents: 19c8272
Author: Randall Leeds <rand...@apache.org>
Authored: Sat Jan 26 12:39:58 2013 -0800
Committer: Randall Leeds <rand...@apache.org>
Committed: Sat Jan 26 12:51:57 2013 -0800

----------------------------------------------------------------------
 src/couchdb/couch_server.erl |   51 ++++++++++++++++++++++++++-----------
 1 files changed, 36 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb/blob/9e8782fa/src/couchdb/couch_server.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_server.erl b/src/couchdb/couch_server.erl
index 6e25235..8189761 100644
--- a/src/couchdb/couch_server.erl
+++ b/src/couchdb/couch_server.erl
@@ -426,27 +426,48 @@ code_change(_OldVsn, State, _Extra) ->
     
 handle_info({'EXIT', _Pid, config_change}, Server) ->
     {noreply, shutdown, Server};
-handle_info({'EXIT', Pid, snappy_nif_not_loaded}, Server) ->
+handle_info({'EXIT', Pid, Reason}, Server) ->
     Server2 = case ets:lookup(couch_dbs_by_pid, Pid) of
     [{Pid, Db}] ->
-        [{Db, {opening, Pid, Froms}}] = ets:lookup(couch_dbs_by_name, Db),
-        Msg = io_lib:format("To open the database `~s`, Apache CouchDB "
-            "must be built with Erlang OTP R13B04 or higher.", [Db]),
-        ?LOG_ERROR(Msg, []),
-        lists:foreach(
-            fun(F) -> gen_server:reply(F, {bad_otp_release, Msg}) end,
-            Froms),
-        true = ets:delete(couch_dbs_by_name, Db),
-        true = ets:delete(couch_dbs_by_pid, Pid),
-        case ets:lookup(couch_sys_dbs, Db) of
+        DbName = Db#db.name,
+
+        % If the Pid is known, the name should be as well.
+        % If not, that's an error, which is why there is no [] clause.
+        case ets:lookup(couch_dbs_by_name, DbName) of
+        [{_, {opening, Pid, Froms}}] ->
+            Msg = case Reason of
+            snappy_nif_not_loaded ->
+                io_lib:format(
+                    "To open the database `~s`, Apache CouchDB "
+                    "must be built with Erlang OTP R13B04 or higher.",
+                    [Db]
+                );
+            true ->
+                io_lib:format("Error opening database ~p: ~p", [DbName, 
Reason])
+            end,
+            ?LOG_ERROR(Msg, []),
+            lists:foreach(
+              fun(F) -> gen_server:reply(F, {bad_otp_release, Msg}) end,
+              Froms
+            );
+        [{_, {opened, Pid, LruTime}}] ->
+            ?LOG_ERROR(
+                "Unexpected exit of database process ~p [~p]: ~p",
+                [Pid, DbName, Reason]
+            ),
+            true = ets:delete(couch_dbs_by_lru, LruTime)
+        end,
+
+        true = ets:delete(couch_dbs_by_pid, DbName),
+        true = ets:delete(couch_dbs_by_name, DbName),
+
+        case ets:lookup(couch_sys_dbs, DbName) of
         [{Db, _}] ->
-            true = ets:delete(couch_sys_dbs, Db),
+            true = ets:delete(couch_sys_dbs, DbName),
             Server;
         [] ->
             Server#server{dbs_open = Server#server.dbs_open - 1}
-        end;
-    _ ->
-        Server
+        end
     end,
     {noreply, Server2};
 handle_info(Error, _Server) ->

Reply via email to